[tint] Add robustness for subgroup matrix builtins

They need to be predicated instead of clamped, because we cannot
guarantee that the array is large enough for a single matrix.

The stride is clamped to be at least as large as the number of rows or
columns (depending on the `col_major` parameter).

If the stride, offset, and array size are all constants, we can skip
predication.

Fixed: 403609264
Change-Id: Ic822a1e3bbeb8d28d0443433a034981bcd1185a8
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/242556
Commit-Queue: James Price <jrprice@google.com>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
diff --git a/src/tint/lang/core/ir/transform/robustness.cc b/src/tint/lang/core/ir/transform/robustness.cc
index ac63bb6..73c41ec 100644
--- a/src/tint/lang/core/ir/transform/robustness.cc
+++ b/src/tint/lang/core/ir/transform/robustness.cc
@@ -63,6 +63,7 @@
         Vector<ir::Access*, 64> accesses;
         Vector<ir::LoadVectorElement*, 64> vector_loads;
         Vector<ir::StoreVectorElement*, 64> vector_stores;
+        Vector<ir::CoreBuiltinCall*, 64> subgroup_matrix_calls;
         Vector<ir::CoreBuiltinCall*, 64> texture_calls;
         for (auto* inst : ir.Instructions()) {
             tint::Switch(
@@ -99,6 +100,11 @@
                             texture_calls.Push(call);
                         }
                     }
+                    // Check if this is a subgroup matrix builtin that needs to be clamped.
+                    if (call->Func() == core::BuiltinFn::kSubgroupMatrixLoad ||
+                        call->Func() == core::BuiltinFn::kSubgroupMatrixStore) {
+                        subgroup_matrix_calls.Push(call);
+                    }
                 });
         }
 
@@ -133,6 +139,13 @@
                 ClampTextureCallArgs(call);
             });
         }
+
+        // Predicate subgroup matrix loads and stores based on their offset and stride.
+        for (auto* call : subgroup_matrix_calls) {
+            b.InsertBefore(call, [&] {  //
+                PredicateSubgroupMatrixCall(call);
+            });
+        }
     }
 
     /// Check if clamping should be applied to a particular value.
@@ -331,6 +344,127 @@
         }
     }
 
+    /// Clamp the indices and coordinates of a texture builtin call instruction to ensure they are
+    /// within the limits of the texture that they are accessing.
+    /// @param call the texture builtin call instruction
+    void PredicateSubgroupMatrixCall(ir::CoreBuiltinCall* call) {
+        const auto& args = call->Args();
+
+        // Extract the arguments from the call.
+        auto* arr = args[0];
+        auto* offset = args[1];
+        Value* col_major = nullptr;
+        Value* stride = nullptr;
+        uint32_t stride_index = 0;
+        const type::SubgroupMatrix* matrix_ty = nullptr;
+        if (call->Func() == BuiltinFn::kSubgroupMatrixLoad) {
+            col_major = args[2];
+            stride = args[3];
+            stride_index = 3;
+            matrix_ty = call->Result()->Type()->As<type::SubgroupMatrix>();
+        } else if (call->Func() == BuiltinFn::kSubgroupMatrixStore) {
+            matrix_ty = args[2]->Type()->As<type::SubgroupMatrix>();
+            col_major = args[3];
+            stride = args[4];
+            stride_index = 4;
+        } else {
+            TINT_UNREACHABLE();
+        }
+
+        // Determine the minimum valid stride, and the value that we will multiply the stride by to
+        // determine the number of elements in memory that will be accessed.
+        uint32_t min_stride = 0;
+        uint32_t major_dim = 0;
+        if (col_major->As<Constant>()->Value()->ValueAs<bool>()) {
+            min_stride = matrix_ty->Rows();
+            major_dim = matrix_ty->Columns();
+        } else {
+            min_stride = matrix_ty->Columns();
+            major_dim = matrix_ty->Rows();
+        }
+
+        // Increase the stride so that it is at least `min_stride` if necessary.
+        if (auto* const_stride = stride->As<Constant>()) {
+            if (const_stride->Value()->ValueAs<uint32_t>() < min_stride) {
+                stride = b.Constant(u32(min_stride));
+            }
+        } else {
+            stride = b.Call(ty.u32(), core::BuiltinFn::kMax, stride, u32(min_stride))->Result();
+        }
+        call->SetArg(stride_index, stride);
+
+        // If we are not predicating, then clamping the stride is all we need to do.
+        if (!config.predicate_subgroup_matrix) {
+            return;
+        }
+
+        // Some matrix components types are packed together into a single array element.
+        // Take that into account here by scaling the array length to number of components.
+        // TODO(crbug.com/403609083): I8 and U8 will be 4 components per element.
+        TINT_ASSERT((matrix_ty->Type()->IsAnyOf<type::F16, type::F32, type::I32, type::U32>()));
+        uint32_t components_per_element = 1;
+
+        // Get the length of the array (in terms of matrix elements).
+        auto* arr_ty = arr->Type()->UnwrapPtr()->As<core::type::Array>();
+        TINT_ASSERT(arr_ty);
+        Value* array_length = nullptr;
+        if (arr_ty->ConstantCount()) {
+            array_length =
+                b.Constant(u32(arr_ty->ConstantCount().value() * components_per_element));
+        } else {
+            TINT_ASSERT(arr_ty->Count()->Is<type::RuntimeArrayCount>());
+            array_length = b.Call(ty.u32(), core::BuiltinFn::kArrayLength, arr)->Result(0);
+            if (components_per_element > 1) {
+                array_length = b.Multiply<u32>(array_length, u32(components_per_element))->Result();
+            }
+        }
+
+        // If the array length, offset, and stride are all constants, then we can determine if the
+        // call is in bounds now and skip any predication if so.
+        if (array_length->Is<Constant>() && stride->Is<Constant>() && offset->Is<Constant>()) {
+            uint32_t const_length = array_length->As<Constant>()->Value()->ValueAs<uint32_t>();
+            uint32_t const_stride = stride->As<Constant>()->Value()->ValueAs<uint32_t>();
+            uint32_t const_offset = offset->As<Constant>()->Value()->ValueAs<uint32_t>();
+            uint32_t const_end = const_offset + (const_stride * (major_dim - 1)) + min_stride;
+            if (const_end <= const_length) {
+                return;
+            }
+        }
+
+        // Predicate the builtin call depending on whether it is in bounds.
+        auto insertion_point = call->next;
+        call->Remove();
+        b.InsertBefore(insertion_point, [&] {
+            // The beginning of the last row/column is at `offset + (major_dim-1)*stride`.
+            // We then add another `min_stride` elements to get to the end of the accessed memory.
+            auto* last_slice = b.Add<u32>(offset, b.Multiply<u32>(stride, u32(major_dim - 1)));
+            auto* end = b.Add<u32>(last_slice, u32(min_stride));
+            auto* in_bounds = b.LessThanEqual<bool>(end, array_length);
+            if (call->Func() == BuiltinFn::kSubgroupMatrixLoad) {
+                // Declare a variable to hold the result of the load, or a zero-initialized matrix.
+                auto* result = b.Var(ty.ptr<function>(matrix_ty));
+                auto* load_result = b.InstructionResult(matrix_ty);
+                call->Result()->ReplaceAllUsesWith(load_result);
+
+                auto* if_ = b.If(in_bounds);
+                b.Append(if_->True(), [&] {  //
+                    if_->True()->Append(call);
+                    b.Store(result, call->Result());
+                    b.ExitIf(if_);
+                });
+                b.LoadWithResult(load_result, result);
+            } else if (call->Func() == BuiltinFn::kSubgroupMatrixStore) {
+                auto* if_ = b.If(in_bounds);
+                b.Append(if_->True(), [&] {  //
+                    if_->True()->Append(call);
+                    b.ExitIf(if_);
+                });
+            } else {
+                TINT_UNREACHABLE();
+            }
+        });
+    }
+
     // Returns the root Var for `value` by walking up the chain of instructions,
     // or nullptr if none is found.
     Var* RootVarFor(Value* value) {
diff --git a/src/tint/lang/core/ir/transform/robustness.h b/src/tint/lang/core/ir/transform/robustness.h
index d9dc790..2cf3d79 100644
--- a/src/tint/lang/core/ir/transform/robustness.h
+++ b/src/tint/lang/core/ir/transform/robustness.h
@@ -62,6 +62,10 @@
     /// Should accesses to pointers with the 'workgroup' address space be clamped?
     bool clamp_workgroup = true;
 
+    /// Should subgroup matrix builtins be predicated?
+    /// Note that the stride parameter will still be clamped if predication is disabled.
+    bool predicate_subgroup_matrix = true;
+
     /// Bindings that should always be ignored.
     std::unordered_set<tint::BindingPoint> bindings_ignored;
 
diff --git a/src/tint/lang/core/ir/transform/robustness_test.cc b/src/tint/lang/core/ir/transform/robustness_test.cc
index 65b423b..120bc54 100644
--- a/src/tint/lang/core/ir/transform/robustness_test.cc
+++ b/src/tint/lang/core/ir/transform/robustness_test.cc
@@ -3478,6 +3478,699 @@
     EXPECT_EQ(src, str());
 }
 
+TEST_P(IR_RobustnessTest, SubgroupMatrixLoad_StorageRuntimeArray_ConstStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", mat);
+    b.Append(func->Block(), [&] {
+        // Constant stride of 1 should be clamped to 4 even when predication is disabled.
+        auto* load =
+            b.CallExplicit(mat, BuiltinFn::kSubgroupMatrixLoad, Vector{mat}, arr, 0_u, true, 1_u);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func():subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %3:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, 1u
+    ret %3
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func():subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %3:u32 = arrayLength %arr
+    %4:u32 = mul 4u, 7u
+    %5:u32 = add 0u, %4
+    %6:u32 = add %5, 4u
+    %7:bool = lte %6, %3
+    %8:ptr<function, subgroup_matrix_result<f32, 8, 4>, read_write> = var undef
+    if %7 [t: $B3] {  # if_1
+      $B3: {  # true
+        %9:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, 4u
+        store %8, %9
+        exit_if  # if_1
+      }
+    }
+    %10:subgroup_matrix_result<f32, 8, 4> = load %8
+    ret %10
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func():subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %3:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, 4u
+    ret %3
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixLoad_StorageRuntimeArray_DynamicStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        auto* load = b.CallExplicit(mat, BuiltinFn::kSubgroupMatrixLoad, Vector{mat}, arr, 0_u,
+                                    true, stride);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %stride
+    ret %4
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 4u
+    %5:u32 = arrayLength %arr
+    %6:u32 = mul %4, 7u
+    %7:u32 = add 0u, %6
+    %8:u32 = add %7, 4u
+    %9:bool = lte %8, %5
+    %10:ptr<function, subgroup_matrix_result<f32, 8, 4>, read_write> = var undef
+    if %9 [t: $B3] {  # if_1
+      $B3: {  # true
+        %11:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %4
+        store %10, %11
+        exit_if  # if_1
+      }
+    }
+    %12:subgroup_matrix_result<f32, 8, 4> = load %10
+    ret %12
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 4u
+    %5:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %4
+    ret %5
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixLoad_StorageRuntimeArray_DynamicStride_RowMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        auto* load = b.CallExplicit(mat, BuiltinFn::kSubgroupMatrixLoad, Vector{mat}, arr, 0_u,
+                                    false, stride);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, false, %stride
+    ret %4
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 8u
+    %5:u32 = arrayLength %arr
+    %6:u32 = mul %4, 3u
+    %7:u32 = add 0u, %6
+    %8:u32 = add %7, 8u
+    %9:bool = lte %8, %5
+    %10:ptr<function, subgroup_matrix_result<f32, 8, 4>, read_write> = var undef
+    if %9 [t: $B3] {  # if_1
+      $B3: {  # true
+        %11:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, false, %4
+        store %10, %11
+        exit_if  # if_1
+      }
+    }
+    %12:subgroup_matrix_result<f32, 8, 4> = load %10
+    ret %12
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 8u
+    %5:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, false, %4
+    ret %5
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixLoad_WorkgroupFixedArray_DynamicStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(workgroup, ty.array<f32, 1024>()));
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        auto* load = b.CallExplicit(mat, BuiltinFn::kSubgroupMatrixLoad, Vector{mat}, arr, 0_u,
+                                    true, stride);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %stride
+    ret %4
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 4u
+    %5:u32 = mul %4, 7u
+    %6:u32 = add 0u, %5
+    %7:u32 = add %6, 4u
+    %8:bool = lte %7, 1024u
+    %9:ptr<function, subgroup_matrix_result<f32, 8, 4>, read_write> = var undef
+    if %8 [t: $B3] {  # if_1
+      $B3: {  # true
+        %10:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %4
+        store %9, %10
+        exit_if  # if_1
+      }
+    }
+    %11:subgroup_matrix_result<f32, 8, 4> = load %9
+    ret %11
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%stride:u32):subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %4:u32 = max %stride, 4u
+    %5:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 0u, true, %4
+    ret %5
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+// Test that we avoid any predication and clamping for fixed size arrays when all parameters are
+// constant and in-bounds.
+TEST_P(IR_RobustnessTest, SubgroupMatrixLoad_WorkgroupFixedArray_ConstStrideAndOffset) {
+    auto* arr = b.Var("arr", ty.ptr(workgroup, ty.array<f32, 1024>()));
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", mat);
+    b.Append(func->Block(), [&] {
+        // The final row will start at 1016. Another full stride will take it past the 1024 limit,
+        // but the transform should understand that only 8 elements are accessed on that row.
+        auto* load = b.CallExplicit(mat, BuiltinFn::kSubgroupMatrixLoad, Vector{mat}, arr, 920_u,
+                                    false, 32_u);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func():subgroup_matrix_result<f32, 8, 4> {
+  $B2: {
+    %3:subgroup_matrix_result<f32, 8, 4> = subgroupMatrixLoad<subgroup_matrix_result<f32, 8, 4>> %arr, 920u, false, 32u
+    ret %3
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = src;
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixStore_StorageRuntimeArray_ConstStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", ty.void_());
+    auto* value = b.FunctionParam("value", mat);
+    func->AppendParam(value);
+    b.Append(func->Block(), [&] {
+        // Constant stride of 1 should be clamped to 4 even when predication is disabled.
+        b.Call(ty.void_(), BuiltinFn::kSubgroupMatrixStore, arr, 0_u, value, true, 1_u);
+        b.Return(func);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>):void {
+  $B2: {
+    %4:void = subgroupMatrixStore %arr, 0u, %value, true, 1u
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>):void {
+  $B2: {
+    %4:u32 = arrayLength %arr
+    %5:u32 = mul 4u, 7u
+    %6:u32 = add 0u, %5
+    %7:u32 = add %6, 4u
+    %8:bool = lte %7, %4
+    if %8 [t: $B3] {  # if_1
+      $B3: {  # true
+        %9:void = subgroupMatrixStore %arr, 0u, %value, true, 4u
+        exit_if  # if_1
+      }
+    }
+    ret
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>):void {
+  $B2: {
+    %4:void = subgroupMatrixStore %arr, 0u, %value, true, 4u
+    ret
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixStore_StorageRuntimeArray_DynamicStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", ty.void_());
+    auto* value = b.FunctionParam("value", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(value);
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        b.Call(ty.void_(), BuiltinFn::kSubgroupMatrixStore, arr, 0_u, value, true, stride);
+        b.Return(func);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:void = subgroupMatrixStore %arr, 0u, %value, true, %stride
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 4u
+    %6:u32 = arrayLength %arr
+    %7:u32 = mul %5, 7u
+    %8:u32 = add 0u, %7
+    %9:u32 = add %8, 4u
+    %10:bool = lte %9, %6
+    if %10 [t: $B3] {  # if_1
+      $B3: {  # true
+        %11:void = subgroupMatrixStore %arr, 0u, %value, true, %5
+        exit_if  # if_1
+      }
+    }
+    ret
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 4u
+    %6:void = subgroupMatrixStore %arr, 0u, %value, true, %5
+    ret
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixStore_StorageRuntimeArray_DynamicStride_RowMajor) {
+    auto* arr = b.Var("arr", ty.ptr(storage, ty.array<f32>()));
+    arr->SetBindingPoint(0, 0);
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", ty.void_());
+    auto* value = b.FunctionParam("value", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(value);
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        b.Call(ty.void_(), BuiltinFn::kSubgroupMatrixStore, arr, 0_u, value, false, stride);
+        b.Return(func);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:void = subgroupMatrixStore %arr, 0u, %value, false, %stride
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 8u
+    %6:u32 = arrayLength %arr
+    %7:u32 = mul %5, 3u
+    %8:u32 = add 0u, %7
+    %9:u32 = add %8, 8u
+    %10:bool = lte %9, %6
+    if %10 [t: $B3] {  # if_1
+      $B3: {  # true
+        %11:void = subgroupMatrixStore %arr, 0u, %value, false, %5
+        exit_if  # if_1
+      }
+    }
+    ret
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<storage, array<f32>, read_write> = var undef @binding_point(0, 0)
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 8u
+    %6:void = subgroupMatrixStore %arr, 0u, %value, false, %5
+    ret
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+TEST_P(IR_RobustnessTest, SubgroupMatrixStore_WorkgroupFixedArray_DynamicStride_ColMajor) {
+    auto* arr = b.Var("arr", ty.ptr(workgroup, ty.array<f32, 1024>()));
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", ty.void_());
+    auto* value = b.FunctionParam("value", mat);
+    auto* stride = b.FunctionParam<u32>("stride");
+    func->AppendParam(value);
+    func->AppendParam(stride);
+    b.Append(func->Block(), [&] {
+        // Dynamic stride should be clamped with `max` even when predication is disabled.
+        b.Call(ty.void_(), BuiltinFn::kSubgroupMatrixStore, arr, 0_u, value, true, stride);
+        b.Return(func);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:void = subgroupMatrixStore %arr, 0u, %value, true, %stride
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect_with_predication = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 4u
+    %6:u32 = mul %5, 7u
+    %7:u32 = add 0u, %6
+    %8:u32 = add %7, 4u
+    %9:bool = lte %8, 1024u
+    if %9 [t: $B3] {  # if_1
+      $B3: {  # true
+        %10:void = subgroupMatrixStore %arr, 0u, %value, true, %5
+        exit_if  # if_1
+      }
+    }
+    ret
+  }
+}
+)";
+
+    auto* expect_without_predication = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>, %stride:u32):void {
+  $B2: {
+    %5:u32 = max %stride, 4u
+    %6:void = subgroupMatrixStore %arr, 0u, %value, true, %5
+    ret
+  }
+}
+)";
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(GetParam() ? expect_with_predication : expect_without_predication, str());
+}
+
+// Test that we avoid any predication and clamping for fixed size arrays when all parameters are
+// constant and in-bounds.
+TEST_P(IR_RobustnessTest, SubgroupMatrixStore_WorkgroupFixedArray_ConstStrideAndOffset) {
+    auto* arr = b.Var("arr", ty.ptr(workgroup, ty.array<f32, 1024>()));
+    mod.root_block->Append(arr);
+
+    auto* mat = ty.subgroup_matrix_result(ty.f32(), 8u, 4u);
+
+    auto* func = b.Function("foo", ty.void_());
+    auto* value = b.FunctionParam("value", mat);
+    func->AppendParam(value);
+    b.Append(func->Block(), [&] {
+        // The final row will start at 1016. Another full stride will take it past the 1024 limit,
+        // but the transform should understand that only 8 elements are accessed on that row.
+        b.Call(ty.void_(), BuiltinFn::kSubgroupMatrixStore, arr, 920_u, value, false, 32_u);
+        b.Return(func);
+    });
+
+    auto* src = R"(
+$B1: {  # root
+  %arr:ptr<workgroup, array<f32, 1024>, read_write> = var undef
+}
+
+%foo = func(%value:subgroup_matrix_result<f32, 8, 4>):void {
+  $B2: {
+    %4:void = subgroupMatrixStore %arr, 920u, %value, false, 32u
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = src;
+
+    RobustnessConfig cfg;
+    cfg.predicate_subgroup_matrix = GetParam();
+    Run(Robustness, cfg);
+
+    EXPECT_EQ(expect, str());
+}
+
 INSTANTIATE_TEST_SUITE_P(, IR_RobustnessTest, testing::Values(false, true));
 
 INSTANTIATE_TEST_SUITE_P(,
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
index d4ee9ae..55bb3bf 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_0cc7b0(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_0cc7b0(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
index 263a6d1..8c202de 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -50,28 +50,45 @@
          %13 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %31 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_0cc7b0 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_rw 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_0cc7b0
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_half %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_0cc7b0
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_half %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.msl
index c866146..a0a903c 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_128bf4(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_128bf4(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
index f73a304..6e4d14b 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -51,28 +51,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %32 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_128bf4 = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_128bf4
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_half %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_128bf4
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_half %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
index 66947dd..7e23fe5 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 50
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -47,28 +47,44 @@
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %31 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %42 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_12ca82 = OpFunction %12 None %16
          %17 = OpLabel
+         %29 = OpVariable %_ptr_Function_12 Function %31
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %12 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %12 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_rw 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %32 None
+               OpBranchConditional %27 %33 %32
+         %33 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %39 = OpCooperativeMatrixLoadKHR %12 %37 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %39 None
+               OpBranch %32
+         %32 = OpLabel
+         %34 = OpLoad %12 %29 None
+               OpStore %res %34
+         %36 = OpLoad %12 %res None
+               OpReturnValue %36
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %12 %subgroupMatrixLoad_12ca82
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_uint %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %42
+         %43 = OpLabel
+         %44 = OpFunctionCall %12 %subgroupMatrixLoad_12ca82
+         %45 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %47 = OpAccessChain %_ptr_StorageBuffer_uint %45 %uint_0
+               OpCooperativeMatrixStoreKHR %47 %44 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.msl
index f5bd69b..9ba5a41 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_15689c(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_15689c(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
index b2f5ee6..2d1b4ba 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,27 +48,44 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %31 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_15689c = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_15689c
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_15689c
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_float %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
index 697f988..dc7cd36 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_21f2c7(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_21f2c7(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
index dded813..0485ba4 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 40
+; Bound: 53
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -52,29 +52,46 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %32 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_21f2c7 = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_21f2c7
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_half_0 %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_21f2c7
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_half_0 %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/30634b.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
index 7b32c7c..ec8096c 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -47,28 +47,45 @@
          %13 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %31 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_30634b = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_rw 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_30634b
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_int %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_30634b
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_int %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/457d20.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
index 9deaab8..8c31657 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 50
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -47,29 +47,45 @@
          %12 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_0
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %30 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %29 = OpTypeFunction %void
+         %41 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_457d20 = OpFunction %12 None %16
          %17 = OpLabel
+         %28 = OpVariable %_ptr_Function_12 Function %30
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %23 = OpCooperativeMatrixLoadKHR %12 %20 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %23
-         %26 = OpLoad %12 %res None
-               OpReturnValue %26
+         %20 = OpArrayLength %uint %sb_ro 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %25 = OpIAdd %uint %23 %uint_8
+         %26 = OpULessThanEqual %bool %25 %20
+               OpSelectionMerge %31 None
+               OpBranchConditional %26 %32 %31
+         %32 = OpLabel
+         %36 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %38 = OpCooperativeMatrixLoadKHR %12 %36 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %28 %38 None
+               OpBranch %31
+         %31 = OpLabel
+         %33 = OpLoad %12 %28 None
+               OpStore %res %33
+         %35 = OpLoad %12 %res None
+               OpReturnValue %35
                OpFunctionEnd
-%compute_main = OpFunction %void None %29
-         %30 = OpLabel
-         %31 = OpFunctionCall %12 %subgroupMatrixLoad_457d20
-         %32 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %34 = OpAccessChain %_ptr_StorageBuffer_uint_0 %32 %uint_0
-               OpCooperativeMatrixStoreKHR %34 %31 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %41
+         %42 = OpLabel
+         %43 = OpFunctionCall %12 %subgroupMatrixLoad_457d20
+         %44 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %46 = OpAccessChain %_ptr_StorageBuffer_uint_0 %44 %uint_0
+               OpCooperativeMatrixStoreKHR %46 %43 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
index ab144b6..1c600f0 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 37
+; Bound: 49
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -46,28 +46,44 @@
          %12 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_0
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %30 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %29 = OpTypeFunction %void
+         %41 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_5155ed = OpFunction %12 None %16
          %17 = OpLabel
+         %28 = OpVariable %_ptr_Function_12 Function %30
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %23 = OpCooperativeMatrixLoadKHR %12 %20 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %23
-         %26 = OpLoad %12 %res None
-               OpReturnValue %26
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %25 = OpIAdd %uint %23 %uint_8
+         %26 = OpULessThanEqual %bool %25 %20
+               OpSelectionMerge %31 None
+               OpBranchConditional %26 %32 %31
+         %32 = OpLabel
+         %36 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %38 = OpCooperativeMatrixLoadKHR %12 %36 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %28 %38 None
+               OpBranch %31
+         %31 = OpLabel
+         %33 = OpLoad %12 %28 None
+               OpStore %res %33
+         %35 = OpLoad %12 %res None
+               OpReturnValue %35
                OpFunctionEnd
-%compute_main = OpFunction %void None %29
-         %30 = OpLabel
-         %31 = OpFunctionCall %12 %subgroupMatrixLoad_5155ed
-         %32 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %34 = OpAccessChain %_ptr_StorageBuffer_uint %32 %uint_0
-               OpCooperativeMatrixStoreKHR %34 %31 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %41
+         %42 = OpLabel
+         %43 = OpFunctionCall %12 %subgroupMatrixLoad_5155ed
+         %44 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %46 = OpAccessChain %_ptr_StorageBuffer_uint %44 %uint_0
+               OpCooperativeMatrixStoreKHR %46 %43 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.msl
index 002f2c1..3a84bb3 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_5de410(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_5de410(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
index 0c7a26b..b42ba4f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -51,29 +51,46 @@
          %13 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %31 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_5de410 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_ro 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_5de410
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_half_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_5de410
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_half_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
index 788c64c..51ecaf5 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_6c1e00(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_6c1e00(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
index 6de182e..9785c02 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -49,28 +49,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %31 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_6c1e00 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_6c1e00
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_6c1e00
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_float_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
index e5ebb4b..6adbec3 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,28 +48,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %32 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_6e2773 = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_6e2773
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_int %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_6e2773
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_int %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/80b778.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
index ae2f442..796ecdb 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 37
+; Bound: 49
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -47,27 +47,43 @@
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %30 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %29 = OpTypeFunction %void
+         %41 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_80b778 = OpFunction %12 None %16
          %17 = OpLabel
+         %28 = OpVariable %_ptr_Function_12 Function %30
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %23 = OpCooperativeMatrixLoadKHR %12 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %23
-         %26 = OpLoad %12 %res None
-               OpReturnValue %26
+         %21 = OpArrayLength %uint %sb_rw 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %25 = OpIAdd %uint %24 %uint_8
+         %26 = OpULessThanEqual %bool %25 %21
+               OpSelectionMerge %31 None
+               OpBranchConditional %26 %32 %31
+         %32 = OpLabel
+         %36 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %38 = OpCooperativeMatrixLoadKHR %12 %36 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %28 %38 None
+               OpBranch %31
+         %31 = OpLabel
+         %33 = OpLoad %12 %28 None
+               OpStore %res %33
+         %35 = OpLoad %12 %res None
+               OpReturnValue %35
                OpFunctionEnd
-%compute_main = OpFunction %void None %29
-         %30 = OpLabel
-         %31 = OpFunctionCall %12 %subgroupMatrixLoad_80b778
-         %32 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %34 = OpAccessChain %_ptr_StorageBuffer_uint %32 %uint_0
-               OpCooperativeMatrixStoreKHR %34 %31 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %41
+         %42 = OpLabel
+         %43 = OpFunctionCall %12 %subgroupMatrixLoad_80b778
+         %44 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %46 = OpAccessChain %_ptr_StorageBuffer_uint %44 %uint_0
+               OpCooperativeMatrixStoreKHR %46 %43 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
index 6b0845f..5a4503e 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 40
+; Bound: 53
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -49,29 +49,46 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %32 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_a798ae = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_a798ae
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_int_0 %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_a798ae
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_int_0 %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.msl
index 299b7a3..8092b9d 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_a95397(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_a95397(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
index 47e5d6d..f13a727 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -52,28 +52,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %31 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_a95397 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_a95397
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_half_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_a95397
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_half_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
index 43f8d77..9b949bc 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,27 +48,44 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %31 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_aadc6f = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_aadc6f
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_int %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_aadc6f
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_int %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
index 238565a..7ecebcc 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_c71ce4(tint_module_vars_struct tint_module_vars) {
   simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_1 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_half8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_c71ce4(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
index 89810df..e2a8698 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -51,27 +51,44 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %31 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_c71ce4 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_half %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_c71ce4
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_half %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_c71ce4
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_half %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.msl
index 33125c5..8f11484 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_ca4539(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_ca4539(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
index c1d07aa..6f24ac3 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 40
+; Bound: 53
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -49,29 +49,46 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %32 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_ca4539 = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_ca4539
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_float_0 %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_ca4539
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_float_0 %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
index 1f4f36e..a6dde5f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,29 +48,45 @@
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %31 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %42 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_d80c87 = OpFunction %12 None %16
          %17 = OpLabel
+         %29 = OpVariable %_ptr_Function_12 Function %31
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %12 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %12 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_ro 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %32 None
+               OpBranchConditional %27 %33 %32
+         %33 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %39 = OpCooperativeMatrixLoadKHR %12 %37 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %39 None
+               OpBranch %32
+         %32 = OpLabel
+         %34 = OpLoad %12 %29 None
+               OpStore %res %34
+         %36 = OpLoad %12 %res None
+               OpReturnValue %36
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %12 %subgroupMatrixLoad_d80c87
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_uint_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %42
+         %43 = OpLabel
+         %44 = OpFunctionCall %12 %subgroupMatrixLoad_d80c87
+         %45 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %47 = OpAccessChain %_ptr_StorageBuffer_uint_0 %45 %uint_0
+               OpCooperativeMatrixStoreKHR %47 %44 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
index b98582e..2ea29fe 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -49,28 +49,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %31 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_da5d2c = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %22 = OpArrayLength %uint %sb_ro 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %26 = OpIAdd %uint %25 %uint_8
+         %27 = OpULessThanEqual %bool %26 %22
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_da5d2c
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_int_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_da5d2c
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_int_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
index bc4cd14..aebd99f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 50
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,28 +48,44 @@
          %16 = OpTypeFunction %12
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %30 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %29 = OpTypeFunction %void
+         %41 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_df98ff = OpFunction %12 None %16
          %17 = OpLabel
+         %28 = OpVariable %_ptr_Function_12 Function %30
         %res = OpVariable %_ptr_Function_12 Function
          %18 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
-         %23 = OpCooperativeMatrixLoadKHR %12 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %23
-         %26 = OpLoad %12 %res None
-               OpReturnValue %26
+         %21 = OpArrayLength %uint %sb_ro 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %25 = OpIAdd %uint %24 %uint_8
+         %26 = OpULessThanEqual %bool %25 %21
+               OpSelectionMerge %31 None
+               OpBranchConditional %26 %32 %31
+         %32 = OpLabel
+         %36 = OpAccessChain %_ptr_StorageBuffer_uint %18 %uint_1
+         %38 = OpCooperativeMatrixLoadKHR %12 %36 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %28 %38 None
+               OpBranch %31
+         %31 = OpLabel
+         %33 = OpLoad %12 %28 None
+               OpStore %res %33
+         %35 = OpLoad %12 %res None
+               OpReturnValue %35
                OpFunctionEnd
-%compute_main = OpFunction %void None %29
-         %30 = OpLabel
-         %31 = OpFunctionCall %12 %subgroupMatrixLoad_df98ff
-         %32 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %34 = OpAccessChain %_ptr_StorageBuffer_uint_0 %32 %uint_0
-               OpCooperativeMatrixStoreKHR %34 %31 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %41
+         %42 = OpLabel
+         %43 = OpFunctionCall %12 %subgroupMatrixLoad_df98ff
+         %44 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %46 = OpAccessChain %_ptr_StorageBuffer_uint_0 %44 %uint_0
+               OpCooperativeMatrixStoreKHR %46 %43 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.msl
index 6348946..45329c1 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_e5caba(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_e5caba(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
index df6d167..8b60a79 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,28 +48,45 @@
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %32 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %31 = OpTypeFunction %void
+         %44 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_e5caba = OpFunction %13 None %17
          %18 = OpLabel
+         %30 = OpVariable %_ptr_Function_13 Function %32
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %22 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %25 = OpCooperativeMatrixLoadKHR %13 %22 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %25
-         %28 = OpLoad %13 %res None
-               OpReturnValue %28
+         %22 = OpArrayLength %uint %sb_rw 0
+         %23 = OpIMul %uint %uint_8 %uint_7
+         %25 = OpIAdd %uint %uint_1 %23
+         %27 = OpIAdd %uint %25 %uint_8
+         %28 = OpULessThanEqual %bool %27 %22
+               OpSelectionMerge %34 None
+               OpBranchConditional %28 %35 %34
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %41 = OpCooperativeMatrixLoadKHR %13 %39 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %30 %41 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %13 %30 None
+               OpStore %res %36
+         %38 = OpLoad %13 %res None
+               OpReturnValue %38
                OpFunctionEnd
-%compute_main = OpFunction %void None %31
-         %32 = OpLabel
-         %33 = OpFunctionCall %13 %subgroupMatrixLoad_e5caba
-         %34 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %36 = OpAccessChain %_ptr_StorageBuffer_float %34 %uint_0
-               OpCooperativeMatrixStoreKHR %36 %33 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %44
+         %45 = OpLabel
+         %46 = OpFunctionCall %13 %subgroupMatrixLoad_e5caba
+         %47 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %49 = OpAccessChain %_ptr_StorageBuffer_float %47 %uint_0
+               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.msl
index 65e594f..aac80eb 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_f58623(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_ro).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_f58623(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
index adbbf33..9228958 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,29 +48,46 @@
          %13 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %31 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_f58623 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_ro 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_f58623
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_f58623
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_float_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.msl
index a7444b0..446d1ce 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.msl
@@ -20,16 +20,21 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_f9b989(tint_module_vars_struct tint_module_vars) {
   simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_1 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_1, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+    v = v_1;
+  }
   simdgroup_float8x8 res = v;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_f9b989(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
index 877645e..9243fdd 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 38
+; Bound: 51
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -47,28 +47,45 @@
          %13 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %31 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_f9b989 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_rw 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_f9b989
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_f9b989
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_float %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
index 4b6634d..d18deea 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 39
+; Bound: 52
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -48,29 +48,46 @@
          %13 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_0
          %17 = OpTypeFunction %13
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %31 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %30 = OpTypeFunction %void
+         %43 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_fa3c74 = OpFunction %13 None %17
          %18 = OpLabel
+         %29 = OpVariable %_ptr_Function_13 Function %31
         %res = OpVariable %_ptr_Function_13 Function
          %19 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
-         %21 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
-         %24 = OpCooperativeMatrixLoadKHR %13 %21 %uint_1 %uint_8 NonPrivatePointer
-               OpStore %res %24
-         %27 = OpLoad %13 %res None
-               OpReturnValue %27
+         %21 = OpArrayLength %uint %sb_ro 0
+         %22 = OpIMul %uint %uint_8 %uint_7
+         %24 = OpIAdd %uint %uint_1 %22
+         %26 = OpIAdd %uint %24 %uint_8
+         %27 = OpULessThanEqual %bool %26 %21
+               OpSelectionMerge %33 None
+               OpBranchConditional %27 %34 %33
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_int %19 %uint_1
+         %40 = OpCooperativeMatrixLoadKHR %13 %38 %uint_1 %uint_8 NonPrivatePointer
+               OpStore %29 %40 None
+               OpBranch %33
+         %33 = OpLabel
+         %35 = OpLoad %13 %29 None
+               OpStore %res %35
+         %37 = OpLoad %13 %res None
+               OpReturnValue %37
                OpFunctionEnd
-%compute_main = OpFunction %void None %30
-         %31 = OpLabel
-         %32 = OpFunctionCall %13 %subgroupMatrixLoad_fa3c74
-         %33 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_int_0 %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %43
+         %44 = OpLabel
+         %45 = OpFunctionCall %13 %subgroupMatrixLoad_fa3c74
+         %46 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %48 = OpAccessChain %_ptr_StorageBuffer_int_0 %46 %uint_0
+               OpCooperativeMatrixStoreKHR %48 %45 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
index c94229a..7b97275 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 24
+; Bound: 33
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -36,16 +36,28 @@
      %uint_1 = OpConstant %uint 1
          %14 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_1
          %13 = OpConstantComposite %14 %uint_0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_2d78d3 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %18 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
-               OpCooperativeMatrixStoreKHR %18 %13 %uint_1 %uint_8 NonPrivatePointer
+         %18 = OpArrayLength %uint %sb_rw 0
+         %19 = OpIMul %uint %uint_8 %uint_7
+         %21 = OpIAdd %uint %uint_1 %19
+         %22 = OpIAdd %uint %21 %uint_8
+         %23 = OpULessThanEqual %bool %22 %18
+               OpSelectionMerge %25 None
+               OpBranchConditional %23 %26 %25
+         %26 = OpLabel
+         %27 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
+               OpCooperativeMatrixStoreKHR %27 %13 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %25
+         %25 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %22 = OpLabel
-         %23 = OpFunctionCall %void %subgroupMatrixStore_2d78d3
+         %31 = OpLabel
+         %32 = OpFunctionCall %void %subgroupMatrixStore_2d78d3
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
index 1e486f2..760924f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -37,17 +37,29 @@
          %15 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_0
       %int_0 = OpConstant %int 0
          %14 = OpConstantComposite %15 %int_0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_3ea76e = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %19 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
-               OpCooperativeMatrixStoreKHR %19 %14 %uint_1 %uint_8 NonPrivatePointer
+         %19 = OpArrayLength %uint %sb_rw 0
+         %20 = OpIMul %uint %uint_8 %uint_7
+         %22 = OpIAdd %uint %uint_1 %20
+         %24 = OpIAdd %uint %22 %uint_8
+         %25 = OpULessThanEqual %bool %24 %19
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_3ea76e
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_3ea76e
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/476cdf.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
index 7e022c6..7cc365e 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 27
+; Bound: 36
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -38,17 +38,29 @@
          %15 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_2
       %int_0 = OpConstant %int 0
          %14 = OpConstantComposite %15 %int_0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_476cdf = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %25 = OpIAdd %uint %23 %uint_8
+         %26 = OpULessThanEqual %bool %25 %20
+               OpSelectionMerge %28 None
+               OpBranchConditional %26 %29 %28
+         %29 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
+               OpCooperativeMatrixStoreKHR %30 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %28
+         %28 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %25 = OpLabel
-         %26 = OpFunctionCall %void %subgroupMatrixStore_476cdf
+         %34 = OpLabel
+         %35 = OpFunctionCall %void %subgroupMatrixStore_476cdf
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.msl
index 0702ea9..d1740d7 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_543411(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_float8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_float8x8 const v = simdgroup_float8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_543411(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.spvasm
index 7402ae2..94160de 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/543411.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -38,16 +38,28 @@
          %15 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_1
     %float_0 = OpConstant %float 0
          %14 = OpConstantComposite %15 %float_0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_543411 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %24 = OpIAdd %uint %23 %uint_8
+         %25 = OpULessThanEqual %bool %24 %20
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_543411
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_543411
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.msl
index c2d163d..04d69fb 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_6d8de7(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_half8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_half8x8 const v = simdgroup_half8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_6d8de7(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
index 22a5ddc..aa2c448 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -41,16 +41,28 @@
          %15 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_1
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %14 = OpConstantComposite %15 %half_0x0p_0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_6d8de7 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %24 = OpIAdd %uint %23 %uint_8
+         %25 = OpULessThanEqual %bool %24 %20
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_6d8de7
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_6d8de7
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
index c0551cd..4be482f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 24
+; Bound: 33
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -35,17 +35,29 @@
      %uint_8 = OpConstant %uint 8
          %14 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_0
          %13 = OpConstantComposite %14 %uint_0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_8c59ed = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %17 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
-               OpCooperativeMatrixStoreKHR %17 %13 %uint_1 %uint_8 NonPrivatePointer
+         %17 = OpArrayLength %uint %sb_rw 0
+         %18 = OpIMul %uint %uint_8 %uint_7
+         %20 = OpIAdd %uint %uint_1 %18
+         %22 = OpIAdd %uint %20 %uint_8
+         %23 = OpULessThanEqual %bool %22 %17
+               OpSelectionMerge %25 None
+               OpBranchConditional %23 %26 %25
+         %26 = OpLabel
+         %27 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
+               OpCooperativeMatrixStoreKHR %27 %13 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %25
+         %25 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %22 = OpLabel
-         %23 = OpFunctionCall %void %subgroupMatrixStore_8c59ed
+         %31 = OpLabel
+         %32 = OpFunctionCall %void %subgroupMatrixStore_8c59ed
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.msl
index 230f851..6fd59fc 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_adbc3e(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_half8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_half8x8 const v = simdgroup_half8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_adbc3e(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
index 790a2e9..e11952f 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -40,17 +40,29 @@
          %15 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_0
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %14 = OpConstantComposite %15 %half_0x0p_0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_adbc3e = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %19 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
-               OpCooperativeMatrixStoreKHR %19 %14 %uint_1 %uint_8 NonPrivatePointer
+         %19 = OpArrayLength %uint %sb_rw 0
+         %20 = OpIMul %uint %uint_8 %uint_7
+         %22 = OpIAdd %uint %uint_1 %20
+         %24 = OpIAdd %uint %22 %uint_8
+         %25 = OpULessThanEqual %bool %24 %19
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_adbc3e
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_adbc3e
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.msl
index 5175d64..759b121 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bb2478(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_float8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_float8x8 const v = simdgroup_float8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bb2478(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
index 2fddc05..a67c4b1 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 27
+; Bound: 36
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -38,17 +38,29 @@
          %15 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_2
     %float_0 = OpConstant %float 0
          %14 = OpConstantComposite %15 %float_0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_bb2478 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %25 = OpIAdd %uint %23 %uint_8
+         %26 = OpULessThanEqual %bool %25 %20
+               OpSelectionMerge %28 None
+               OpBranchConditional %26 %29 %28
+         %29 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
+               OpCooperativeMatrixStoreKHR %30 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %28
+         %28 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %25 = OpLabel
-         %26 = OpFunctionCall %void %subgroupMatrixStore_bb2478
+         %34 = OpLabel
+         %35 = OpFunctionCall %void %subgroupMatrixStore_bb2478
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.msl
index d230a21..18ca2c5 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bb5d49(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_float8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_float8x8 const v = simdgroup_float8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bb5d49(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
index cbbf625..19cb1f7 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -37,17 +37,29 @@
          %15 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_0
     %float_0 = OpConstant %float 0
          %14 = OpConstantComposite %15 %float_0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_bb5d49 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
-         %19 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
-               OpCooperativeMatrixStoreKHR %19 %14 %uint_1 %uint_8 NonPrivatePointer
+         %19 = OpArrayLength %uint %sb_rw 0
+         %20 = OpIMul %uint %uint_8 %uint_7
+         %22 = OpIAdd %uint %uint_1 %20
+         %24 = OpIAdd %uint %22 %uint_8
+         %25 = OpULessThanEqual %bool %24 %19
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_float %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_bb5d49
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_bb5d49
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.msl b/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.msl
index bd30fd6..d0c78b7 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.msl
@@ -19,13 +19,17 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bfe106(tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(simdgroup_half8x8(), (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  simdgroup_half8x8 const v = simdgroup_half8x8();
+  if ((((1u + (8u * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v, (&(*tint_module_vars.sb_rw).arg_0[1u]), ulong(8u), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bfe106(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
index 9661f16..b44cfca 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 27
+; Bound: 36
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -41,17 +41,29 @@
          %15 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_2
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %14 = OpConstantComposite %15 %half_0x0p_0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_bfe106 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %25 = OpIAdd %uint %23 %uint_8
+         %26 = OpULessThanEqual %bool %25 %20
+               OpSelectionMerge %28 None
+               OpBranchConditional %26 %29 %28
+         %29 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_half %10 %uint_1
+               OpCooperativeMatrixStoreKHR %30 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %28
+         %28 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %25 = OpLabel
-         %26 = OpFunctionCall %void %subgroupMatrixStore_bfe106
+         %34 = OpLabel
+         %35 = OpFunctionCall %void %subgroupMatrixStore_bfe106
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/d55153.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/d55153.wgsl.expected.spvasm
index 643655c..6b839a3 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/d55153.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/d55153.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 25
+; Bound: 34
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -36,17 +36,29 @@
      %uint_2 = OpConstant %uint 2
          %14 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_2
          %13 = OpConstantComposite %14 %uint_0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
      %uint_1 = OpConstant %uint 1
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_d55153 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
-         %18 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
-               OpCooperativeMatrixStoreKHR %18 %13 %uint_1 %uint_8 NonPrivatePointer
+         %18 = OpArrayLength %uint %sb_rw 0
+         %19 = OpIMul %uint %uint_8 %uint_7
+         %21 = OpIAdd %uint %uint_1 %19
+         %23 = OpIAdd %uint %21 %uint_8
+         %24 = OpULessThanEqual %bool %23 %18
+               OpSelectionMerge %26 None
+               OpBranchConditional %24 %27 %26
+         %27 = OpLabel
+         %28 = OpAccessChain %_ptr_StorageBuffer_uint %10 %uint_1
+               OpCooperativeMatrixStoreKHR %28 %13 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %26
+         %26 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %23 = OpLabel
-         %24 = OpFunctionCall %void %subgroupMatrixStore_d55153
+         %32 = OpLabel
+         %33 = OpFunctionCall %void %subgroupMatrixStore_d55153
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/literal/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
index 48af268..28c7f20 100644
--- a/test/tint/builtins/gen/literal/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 26
+; Bound: 35
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -38,16 +38,28 @@
          %15 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_1
       %int_0 = OpConstant %int 0
          %14 = OpConstantComposite %15 %int_0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_eae7d8 = OpFunction %void None %8
           %9 = OpLabel
          %10 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
-         %20 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
-               OpCooperativeMatrixStoreKHR %20 %14 %uint_1 %uint_8 NonPrivatePointer
+         %20 = OpArrayLength %uint %sb_rw 0
+         %21 = OpIMul %uint %uint_8 %uint_7
+         %23 = OpIAdd %uint %uint_1 %21
+         %24 = OpIAdd %uint %23 %uint_8
+         %25 = OpULessThanEqual %bool %24 %20
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+         %29 = OpAccessChain %_ptr_StorageBuffer_int %10 %uint_1
+               OpCooperativeMatrixStoreKHR %29 %14 %uint_1 %uint_8 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %24 = OpLabel
-         %25 = OpFunctionCall %void %subgroupMatrixStore_eae7d8
+         %33 = OpLabel
+         %34 = OpFunctionCall %void %subgroupMatrixStore_eae7d8
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.msl
index 5674e8b..8742fa0 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_015e29(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.spvasm
index 7e4999c..38c971c 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/015e29.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,32 +53,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %37 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_015e29 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_015e29
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_float %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_015e29
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_float %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.msl
index 134ce97..c180f0d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_06933f(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.spvasm
index 9963ac5..52ff5bc 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/06933f.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -55,32 +56,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %37 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_06933f = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_06933f
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_half %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_06933f
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_half %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
index 467d599..e5379e3 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_0cc7b0(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_0cc7b0(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
index 68257de..453fcc0 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/0cc7b0.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -54,33 +55,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_0cc7b0 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_0cc7b0
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_0cc7b0
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_half %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.msl
index b3c951f..714ddfa 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_0f48aa(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.spvasm
index 9b46851..30ee63e 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/0f48aa.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -54,10 +55,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024_0 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -65,23 +70,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_0f48aa
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_float_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_0f48aa
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_float_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/119c99.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/119c99.wgsl.expected.spvasm
index 0d81489..4cee419 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/119c99.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/119c99.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,32 +53,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %37 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_119c99 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_119c99
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_int %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_119c99
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_int %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.msl
index a94fa74..00ed7c9 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_11bb4b(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.spvasm
index 4b0f9cb..939debe 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/11bb4b.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -56,32 +57,49 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_11bb4b = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_11bb4b
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half %39 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_11bb4b
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_half %53 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.msl
index 549fb44..51b1e0e 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_128bf4(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_128bf4(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
index 1752b2d..3d06833 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/128bf4.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -55,33 +56,51 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %39 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_128bf4 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_128bf4
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_half %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_128bf4
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_half %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.msl
index fdd9496..69a2b7a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_half8x8 subgroupMatrixLoad_12b63e(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0h;
+      (*tint_module_vars.arg_0)[v_5] = 0.0h;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_12b63e(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.spvasm
index 2641e68..b5772f2 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/12b63e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %26 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -50,68 +51,83 @@
          %14 = OpTypeCooperativeMatrixKHR %half %uint_3 %uint_8 %uint_8 %uint_1
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
-%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %35 = OpConstantComposite %14 %half_0x0p_0
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void %uint
+         %48 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-%half_0x0p_0 = OpConstant %half 0x0p+0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_12b63e = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %33 = OpVariable %_ptr_Function_14 Function %35
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %uint %arg_3 None
-         %25 = OpAccessChain %_ptr_Workgroup_half %arg_0 %23
-         %27 = OpCooperativeMatrixLoadKHR %14 %25 %uint_1 %24 NonPrivatePointer
-               OpStore %res %27
-         %30 = OpLoad %14 %res None
-               OpReturnValue %30
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %34
-%tint_local_index = OpFunctionParameter %uint
-         %35 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpPhi %uint %tint_local_index %36 %42 %38
-               OpLoopMerge %40 %38 None
+         %25 = OpExtInst %uint %26 UMax %24 %uint_8
+         %27 = OpIMul %uint %25 %uint_7
+         %29 = OpIAdd %uint %23 %27
+         %30 = OpIAdd %uint %29 %uint_8
+         %31 = OpULessThanEqual %bool %30 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %31 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_Workgroup_half %arg_0 %23
+         %44 = OpCooperativeMatrixLoadKHR %14 %42 %uint_1 %25 NonPrivatePointer
+               OpStore %33 %44 None
                OpBranch %37
          %37 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %41 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %40
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_half %arg_0 %41
-               OpStore %58 %half_0x0p_0 NonPrivatePointer
-               OpBranch %38
-         %38 = OpLabel
-         %42 = OpIAdd %uint %41 %uint_1
-               OpBranch %39
-         %40 = OpLabel
+         %39 = OpLoad %14 %33 None
+               OpStore %res %39
+         %41 = OpLoad %14 %res None
+               OpReturnValue %41
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %48
+%tint_local_index = OpFunctionParameter %uint
+         %49 = OpLabel
+               OpBranch %50
+         %50 = OpLabel
+               OpBranch %53
+         %53 = OpLabel
+         %55 = OpPhi %uint %tint_local_index %50 %56 %52
+               OpLoopMerge %54 %52 None
+               OpBranch %51
+         %51 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %55 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %54
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_half %arg_0 %55
+               OpStore %71 %half_0x0p_0 NonPrivatePointer
+               OpBranch %52
+         %52 = OpLabel
+         %56 = OpIAdd %uint %55 %uint_1
+               OpBranch %53
+         %54 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_12b63e
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_half %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_12b63e
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_half %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
index e78d6b6..7527671 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/12ca82.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,33 +52,50 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %38 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_12ca82 = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_12 Function %38
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %12 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %12 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %39 None
+               OpBranchConditional %34 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %12 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %12 %36 None
+               OpStore %res %41
+         %43 = OpLoad %12 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %12 %subgroupMatrixLoad_12ca82
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_uint %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %12 %subgroupMatrixLoad_12ca82
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_uint %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.msl
index 0c832bb..46f248d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_15689c(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_15689c(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
index 1de2122..4af9fd3 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/15689c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,33 +52,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_15689c = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_15689c
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_15689c
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_float %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.msl
index 79d163a..fe6645f 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_half8x8 subgroupMatrixLoad_1a0572(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0h;
+      (*tint_module_vars.arg_0)[v_5] = 0.0h;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_1a0572(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.spvasm
index 14548f7..b88f9b5 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/1a0572.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,67 +52,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %36 = OpConstantComposite %14 %half_0x0p_0
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-%half_0x0p_0 = OpConstant %half 0x0p+0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_1a0572 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_half %arg_0 %42
-               OpStore %58 %half_0x0p_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_half %arg_0 %56
+               OpStore %71 %half_0x0p_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_1a0572
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_half %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_1a0572
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_half %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
index 0a95b46..acccb0a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_21f2c7(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_21f2c7(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
index 89f0d90..be25a46 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/21f2c7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 60
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -56,10 +57,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %39 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -67,23 +72,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_ro 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_21f2c7
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_half_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_21f2c7
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_half_0 %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/23385e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/23385e.wgsl.expected.spvasm
index c16abc1..dad4a47 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/23385e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/23385e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 41
+; Bound: 54
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,32 +52,48 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %36 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %47 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_23385e = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_12 Function %36
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %21 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %32 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
+         %44 = OpCooperativeMatrixLoadKHR %12 %42 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %44 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %12 %34 None
+               OpStore %res %39
+         %41 = OpLoad %12 %res None
+               OpReturnValue %41
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_23385e
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %38 = OpAccessChain %_ptr_StorageBuffer_uint %37 %uint_0
-               OpCooperativeMatrixStoreKHR %38 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %47
+         %48 = OpLabel
+         %49 = OpFunctionCall %12 %subgroupMatrixLoad_23385e
+         %50 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %51 = OpAccessChain %_ptr_StorageBuffer_uint %50 %uint_0
+               OpCooperativeMatrixStoreKHR %51 %49 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/30634b.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
index 207e55d..21c7dca 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/30634b.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,33 +52,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_30634b = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_30634b
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_30634b
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_int %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/42db25.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/42db25.wgsl.expected.spvasm
index af2ba98..ad94537 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/42db25.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/42db25.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,13 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024_0 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -64,23 +68,36 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %12 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %12 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %12 %subgroupMatrixLoad_42db25
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_uint_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_42db25
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_uint_0 %51 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/43c097.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/43c097.wgsl.expected.spvasm
index b3012fa..f140e0a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/43c097.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/43c097.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,32 +54,49 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_43c097 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_43c097
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int %39 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_43c097
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_int %53 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/457d20.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
index ce78629..b399700 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/457d20.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,10 +52,13 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -62,23 +66,37 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_ro 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_457d20
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint_0 %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_457d20
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_uint_0 %51 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.msl
index b99f583..9634df6 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_float8x8 subgroupMatrixLoad_459957(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0f;
+      (*tint_module_vars.arg_0)[v_5] = 0.0f;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_459957(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.spvasm
index 87f4b44..81b0b21 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/459957.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,67 +49,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+    %float_0 = OpConstant %float 0
+         %36 = OpConstantComposite %14 %float_0
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-    %float_0 = OpConstant %float 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_459957 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_float %arg_0 %42
-               OpStore %58 %float_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_float %arg_0 %56
+               OpStore %71 %float_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_459957
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_float %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_459957
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_float %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.msl
index 0fd8544..3a18df6 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_4fd336(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.spvasm
index 650a7f7..4794891 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/4fd336.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %37 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024_0 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_4fd336
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_4fd336
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_float_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
index 38fcd16..f47371b 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/5155ed.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -50,33 +51,50 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_5155ed = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_rw 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_5155ed
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_5155ed
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_uint %51 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.msl
index adcfa84..2ff6a9f 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_52acb0(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.spvasm
index 82d1af7..84a1e0b 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/52acb0.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -55,32 +56,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %37 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_52acb0 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_52acb0
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_half %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_52acb0
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_half %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.msl
index 7636e84..a20f734 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_float8x8 subgroupMatrixLoad_5c0896(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0f;
+      (*tint_module_vars.arg_0)[v_5] = 0.0f;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_5c0896(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.spvasm
index d034256..e608d51 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/5c0896.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,67 +49,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+    %float_0 = OpConstant %float 0
+         %36 = OpConstantComposite %14 %float_0
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-    %float_0 = OpConstant %float 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_5c0896 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_float %arg_0 %42
-               OpStore %58 %float_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_float %arg_0 %56
+               OpStore %71 %float_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %47 = OpFunctionCall %14 %subgroupMatrixLoad_5c0896
-         %48 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_float %48 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %47 %uint_0 %uint_64 NonPrivatePointer
+         %61 = OpFunctionCall %14 %subgroupMatrixLoad_5c0896
+         %62 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_float %62 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %61 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.msl
index 4f84b51..7aa7906 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_5de410(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_5de410(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
index 33acc51..6bccdce 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/5de410.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -55,10 +56,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -66,23 +71,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_5de410
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_5de410
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_half_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/6432c6.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/6432c6.wgsl.expected.spvasm
index d4f93c2..09aacc4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/6432c6.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/6432c6.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 41
+; Bound: 54
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,32 +52,48 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %36 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %47 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_6432c6 = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_12 Function %36
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %32 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
+         %44 = OpCooperativeMatrixLoadKHR %12 %42 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %44 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %12 %34 None
+               OpStore %res %39
+         %41 = OpLoad %12 %res None
+               OpReturnValue %41
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_6432c6
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %38 = OpAccessChain %_ptr_StorageBuffer_uint %37 %uint_0
-               OpCooperativeMatrixStoreKHR %38 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %47
+         %48 = OpLabel
+         %49 = OpFunctionCall %12 %subgroupMatrixLoad_6432c6
+         %50 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %51 = OpAccessChain %_ptr_StorageBuffer_uint %50 %uint_0
+               OpCooperativeMatrixStoreKHR %51 %49 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
index fdd03e9..ee739ff 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_6c1e00(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_6c1e00(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
index ce40db3..40ec938 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/6c1e00.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -63,23 +68,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_6c1e00
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_6c1e00
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_float_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
index f5ed11d..0af11d4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/6e2773.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,33 +53,51 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %39 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_6e2773 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_6e2773
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_int %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_6e2773
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_int %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.msl
index f4bd912..78fa8c6 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_7bacd3(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.spvasm
index bc77c3e..985ea42 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bacd3.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,32 +54,49 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_7bacd3 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_7bacd3
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float %39 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_7bacd3
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_float %53 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bb257.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bb257.wgsl.expected.spvasm
index 00f4bc4..6f9dfb5 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/7bb257.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/7bb257.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,67 +49,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_int = OpTypePointer Workgroup %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+      %int_0 = OpConstant %int 0
+         %36 = OpConstantComposite %14 %int_0
+%_ptr_Workgroup_int = OpTypePointer Workgroup %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-      %int_0 = OpConstant %int 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_7bb257 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_int %arg_0 %42
-               OpStore %58 %int_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_int %arg_0 %56
+               OpStore %71 %int_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %47 = OpFunctionCall %14 %subgroupMatrixLoad_7bb257
-         %48 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_int %48 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %47 %uint_0 %uint_64 NonPrivatePointer
+         %61 = OpFunctionCall %14 %subgroupMatrixLoad_7bb257
+         %62 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_int %62 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %61 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/80b778.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
index 3f8b996..45fec5d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/80b778.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -50,33 +51,50 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_80b778 = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %21 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_rw %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_rw 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_80b778
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_80b778
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_uint %51 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/85fc76.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/85fc76.wgsl.expected.spvasm
index 2d5034b..26a54b9 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/85fc76.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/85fc76.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,13 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %36 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %47 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024_0 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -63,23 +67,36 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_12 Function %36
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %32 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
+         %44 = OpCooperativeMatrixLoadKHR %12 %42 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %44 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %12 %34 None
+               OpStore %res %39
+         %41 = OpLoad %12 %res None
+               OpReturnValue %41
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_85fc76
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint_0 %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %47
+         %48 = OpLabel
+         %49 = OpFunctionCall %12 %subgroupMatrixLoad_85fc76
+         %50 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
+         %52 = OpAccessChain %_ptr_StorageBuffer_uint_0 %50 %uint_0
+               OpCooperativeMatrixStoreKHR %52 %49 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/865a3c.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/865a3c.wgsl.expected.spvasm
index 4beca30..eb84f9a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/865a3c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/865a3c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %37 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024_0 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_865a3c
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_865a3c
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_int_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.msl
index d493850..dac0432 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_8ec8ba(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.spvasm
index 06a1d6b..e0162c9 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/8ec8ba.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %37 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024_0 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_8ec8ba
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_8ec8ba
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_float_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/99d2a2.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/99d2a2.wgsl.expected.spvasm
index 74c8428..1150668 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/99d2a2.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/99d2a2.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 55
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,32 +53,48 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_99d2a2 = OpFunction %12 None %16
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %12 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %12 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %12 %subgroupMatrixLoad_99d2a2
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_99d2a2
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %52 = OpAccessChain %_ptr_StorageBuffer_uint %51 %uint_0
+               OpCooperativeMatrixStoreKHR %52 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/9e2d69.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/9e2d69.wgsl.expected.spvasm
index 9d22f2f..a74a396 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/9e2d69.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/9e2d69.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,32 +53,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %37 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_9e2d69 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_9e2d69
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_int %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_9e2d69
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_int %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.msl
index c5dd716..d84cde5 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_half8x8 subgroupMatrixLoad_a13603(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0h;
+      (*tint_module_vars.arg_0)[v_5] = 0.0h;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_a13603(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<half, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.spvasm
index fb43ca3..fa410ab 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/a13603.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,67 +52,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %36 = OpConstantComposite %14 %half_0x0p_0
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-%half_0x0p_0 = OpConstant %half 0x0p+0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_a13603 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_half %arg_0 %42
-               OpStore %58 %half_0x0p_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_half %arg_0 %56
+               OpStore %71 %half_0x0p_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %47 = OpFunctionCall %14 %subgroupMatrixLoad_a13603
-         %48 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_half %48 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %47 %uint_0 %uint_64 NonPrivatePointer
+         %61 = OpFunctionCall %14 %subgroupMatrixLoad_a13603
+         %62 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_half %62 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %61 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
index 24b1cf7..d12086a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/a798ae.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 60
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %39 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_ro 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_a798ae
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_int_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_a798ae
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_int_0 %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.msl
index 26b7711..7423811 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_a95397(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_a95397(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
index ec21e4f..2d1ffd8 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/a95397.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -55,10 +56,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -66,23 +71,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_a95397
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_a95397
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_half_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
index 282f5cd..8cd0682 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/aadc6f.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,33 +52,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_aadc6f = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_aadc6f
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_aadc6f
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_int %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.msl
index 01ca9e2..db3bb9a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_b15f59(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.spvasm
index 7db4a3f..c96b2e4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/b15f59.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -56,10 +57,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %37 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024_0 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -67,23 +72,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_b15f59
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_b15f59
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_half_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/b354d2.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/b354d2.wgsl.expected.spvasm
index 8f5eaf8..d0d630c 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/b354d2.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/b354d2.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,67 +49,82 @@
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_int = OpTypePointer Workgroup %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+      %int_0 = OpConstant %int 0
+         %36 = OpConstantComposite %14 %int_0
+%_ptr_Workgroup_int = OpTypePointer Workgroup %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void %uint
+         %49 = OpTypeFunction %void %uint
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-      %int_0 = OpConstant %int 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_b354d2 = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_14 Function %36
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
-         %28 = OpCooperativeMatrixLoadKHR %14 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %14 %res None
-               OpReturnValue %31
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %35
-%tint_local_index = OpFunctionParameter %uint
-         %36 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpPhi %uint %tint_local_index %37 %43 %39
-               OpLoopMerge %41 %39 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %32 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
+         %45 = OpCooperativeMatrixLoadKHR %14 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %45 None
                OpBranch %38
          %38 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %42 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %41
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_int %arg_0 %42
-               OpStore %58 %int_0 NonPrivatePointer
-               OpBranch %39
-         %39 = OpLabel
-         %43 = OpIAdd %uint %42 %uint_1
-               OpBranch %40
-         %41 = OpLabel
+         %40 = OpLoad %14 %34 None
+               OpStore %res %40
+         %42 = OpLoad %14 %res None
+               OpReturnValue %42
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %49
+%tint_local_index = OpFunctionParameter %uint
+         %50 = OpLabel
+               OpBranch %51
+         %51 = OpLabel
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpPhi %uint %tint_local_index %51 %57 %53
+               OpLoopMerge %55 %53 None
+               OpBranch %52
+         %52 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %56 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %55
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_int %arg_0 %56
+               OpStore %71 %int_0 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %57 = OpIAdd %uint %56 %uint_1
+               OpBranch %54
+         %55 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_b354d2
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_int %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_b354d2
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_int %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/c700ca.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/c700ca.wgsl.expected.spvasm
index 243539b..24a91f3 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/c700ca.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/c700ca.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,13 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %36 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %47 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024_0 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -63,23 +67,36 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %34 = OpVariable %_ptr_Function_12 Function %36
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %21 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %7 %uint_0 %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %24 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %32 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
+         %44 = OpCooperativeMatrixLoadKHR %12 %42 %uint_1 %26 NonPrivatePointer
+               OpStore %34 %44 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %12 %34 None
+               OpStore %res %39
+         %41 = OpLoad %12 %res None
+               OpReturnValue %41
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_c700ca
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint_0 %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %47
+         %48 = OpLabel
+         %49 = OpFunctionCall %12 %subgroupMatrixLoad_c700ca
+         %50 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024_0 %1 %uint_0
+         %52 = OpAccessChain %_ptr_StorageBuffer_uint_0 %50 %uint_0
+               OpCooperativeMatrixStoreKHR %52 %49 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
index 44463bd..ddec2fc 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<half, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_half8x8 subgroupMatrixLoad_c71ce4(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<half, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_c71ce4(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
index eafe688..fd3b5e7 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/c71ce4.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -54,33 +55,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_c71ce4 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_half %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_c71ce4
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_c71ce4
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_half %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4345.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4345.wgsl.expected.spvasm
index b27e548..487c36e 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4345.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4345.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -54,10 +55,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024_0 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -65,23 +70,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_ca4345
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_int_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_ca4345
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_int_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.msl
index 9837f33..5349bc5 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_ca4539(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_ca4539(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
index 3a86506..7801317 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/ca4539.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 60
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %39 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_ro 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_ca4539
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_float_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_ca4539
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_float_0 %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/cc5556.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/cc5556.wgsl.expected.spvasm
index 98cf3ef..cf2c7a0 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/cc5556.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/cc5556.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -53,10 +54,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %37 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024_0 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -64,23 +69,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_cc5556
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_cc5556
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_int_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.msl
index 3bbb656..4feef14 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.msl
@@ -25,24 +25,30 @@
 simdgroup_float8x8 subgroupMatrixLoad_cce4cd(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.arg_0)[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v_1 = 0u;
-    v_1 = tint_local_index;
+    uint v_4 = 0u;
+    v_4 = tint_local_index;
     while(true) {
-      uint const v_2 = v_1;
-      if ((v_2 >= 1024u)) {
+      uint const v_5 = v_4;
+      if ((v_5 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_2] = 0.0f;
+      (*tint_module_vars.arg_0)[v_5] = 0.0f;
       {
-        v_1 = (v_2 + 1u);
+        v_4 = (v_5 + 1u);
       }
       continue;
     }
@@ -51,7 +57,7 @@
   simdgroup_store(subgroupMatrixLoad_cce4cd(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_3 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_3).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], device tint_array<float, 1024>* prevent_dce [[buffer(0)]], threadgroup tint_symbol_1* v_6 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .arg_0=(&(*v_6).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.spvasm
index f4c342d..668083d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/cce4cd.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %26 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,68 +48,83 @@
          %14 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_1
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
-%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+    %float_0 = OpConstant %float 0
+         %35 = OpConstantComposite %14 %float_0
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void %uint
+         %48 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-    %float_0 = OpConstant %float 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_cce4cd = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %33 = OpVariable %_ptr_Function_14 Function %35
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %uint %arg_3 None
-         %25 = OpAccessChain %_ptr_Workgroup_float %arg_0 %23
-         %27 = OpCooperativeMatrixLoadKHR %14 %25 %uint_1 %24 NonPrivatePointer
-               OpStore %res %27
-         %30 = OpLoad %14 %res None
-               OpReturnValue %30
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %34
-%tint_local_index = OpFunctionParameter %uint
-         %35 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpPhi %uint %tint_local_index %36 %42 %38
-               OpLoopMerge %40 %38 None
+         %25 = OpExtInst %uint %26 UMax %24 %uint_8
+         %27 = OpIMul %uint %25 %uint_7
+         %29 = OpIAdd %uint %23 %27
+         %30 = OpIAdd %uint %29 %uint_8
+         %31 = OpULessThanEqual %bool %30 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %31 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_Workgroup_float %arg_0 %23
+         %44 = OpCooperativeMatrixLoadKHR %14 %42 %uint_1 %25 NonPrivatePointer
+               OpStore %33 %44 None
                OpBranch %37
          %37 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %41 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %40
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_float %arg_0 %41
-               OpStore %58 %float_0 NonPrivatePointer
-               OpBranch %38
-         %38 = OpLabel
-         %42 = OpIAdd %uint %41 %uint_1
-               OpBranch %39
-         %40 = OpLabel
+         %39 = OpLoad %14 %33 None
+               OpStore %res %39
+         %41 = OpLoad %14 %res None
+               OpReturnValue %41
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %48
+%tint_local_index = OpFunctionParameter %uint
+         %49 = OpLabel
+               OpBranch %50
+         %50 = OpLabel
+               OpBranch %53
+         %53 = OpLabel
+         %55 = OpPhi %uint %tint_local_index %50 %56 %52
+               OpLoopMerge %54 %52 None
+               OpBranch %51
+         %51 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %55 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %54
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_float %arg_0 %55
+               OpStore %71 %float_0 NonPrivatePointer
+               OpBranch %52
+         %52 = OpLabel
+         %56 = OpIAdd %uint %55 %uint_1
+               OpBranch %53
+         %54 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_cce4cd
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_float %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_cce4cd
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_float %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.msl
index 66a7dc5..5126d94 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_d2b502(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.spvasm
index c4dcdab..f28a1a4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2b502.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 45
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -57,10 +58,14 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %38 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024_0 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -68,23 +73,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %26 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %28 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_d2b502
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_half_0 %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_d2b502
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_half_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2c77c.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2c77c.wgsl.expected.spvasm
index 30f7e50..0b15d08 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/d2c77c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/d2c77c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 65
+; Bound: 77
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %26 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,68 +48,83 @@
          %14 = OpTypeCooperativeMatrixKHR %int %uint_3 %uint_8 %uint_8 %uint_1
          %18 = OpTypeFunction %14
 %_ptr_Function_uint = OpTypePointer Function %uint
-%_ptr_Workgroup_int = OpTypePointer Workgroup %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_14 = OpTypePointer Function %14
+      %int_0 = OpConstant %int 0
+         %35 = OpConstantComposite %14 %int_0
+%_ptr_Workgroup_int = OpTypePointer Workgroup %int
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void %uint
+         %48 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-      %int_0 = OpConstant %int 0
-         %61 = OpTypeFunction %void
+         %73 = OpTypeFunction %void
 %subgroupMatrixLoad_d2c77c = OpFunction %14 None %18
          %19 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %33 = OpVariable %_ptr_Function_14 Function %35
         %res = OpVariable %_ptr_Function_14 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %uint %arg_3 None
-         %25 = OpAccessChain %_ptr_Workgroup_int %arg_0 %23
-         %27 = OpCooperativeMatrixLoadKHR %14 %25 %uint_1 %24 NonPrivatePointer
-               OpStore %res %27
-         %30 = OpLoad %14 %res None
-               OpReturnValue %30
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %34
-%tint_local_index = OpFunctionParameter %uint
-         %35 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpPhi %uint %tint_local_index %36 %42 %38
-               OpLoopMerge %40 %38 None
+         %25 = OpExtInst %uint %26 UMax %24 %uint_8
+         %27 = OpIMul %uint %25 %uint_7
+         %29 = OpIAdd %uint %23 %27
+         %30 = OpIAdd %uint %29 %uint_8
+         %31 = OpULessThanEqual %bool %30 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %31 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_Workgroup_int %arg_0 %23
+         %44 = OpCooperativeMatrixLoadKHR %14 %42 %uint_1 %25 NonPrivatePointer
+               OpStore %33 %44 None
                OpBranch %37
          %37 = OpLabel
-         %54 = OpUGreaterThanEqual %bool %41 %uint_1024
-               OpSelectionMerge %56 None
-               OpBranchConditional %54 %57 %56
-         %57 = OpLabel
-               OpBranch %40
-         %56 = OpLabel
-         %58 = OpAccessChain %_ptr_Workgroup_int %arg_0 %41
-               OpStore %58 %int_0 NonPrivatePointer
-               OpBranch %38
-         %38 = OpLabel
-         %42 = OpIAdd %uint %41 %uint_1
-               OpBranch %39
-         %40 = OpLabel
+         %39 = OpLoad %14 %33 None
+               OpStore %res %39
+         %41 = OpLoad %14 %res None
+               OpReturnValue %41
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %48
+%tint_local_index = OpFunctionParameter %uint
+         %49 = OpLabel
+               OpBranch %50
+         %50 = OpLabel
+               OpBranch %53
+         %53 = OpLabel
+         %55 = OpPhi %uint %tint_local_index %50 %56 %52
+               OpLoopMerge %54 %52 None
+               OpBranch %51
+         %51 = OpLabel
+         %68 = OpUGreaterThanEqual %bool %55 %uint_1024
+               OpSelectionMerge %69 None
+               OpBranchConditional %68 %70 %69
+         %70 = OpLabel
+               OpBranch %54
+         %69 = OpLabel
+         %71 = OpAccessChain %_ptr_Workgroup_int %arg_0 %55
+               OpStore %71 %int_0 NonPrivatePointer
+               OpBranch %52
+         %52 = OpLabel
+         %56 = OpIAdd %uint %55 %uint_1
+               OpBranch %53
+         %54 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %14 %subgroupMatrixLoad_d2c77c
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %50 = OpAccessChain %_ptr_StorageBuffer_int %47 %uint_0
-               OpCooperativeMatrixStoreKHR %50 %46 %uint_0 %uint_64 NonPrivatePointer
+         %60 = OpFunctionCall %14 %subgroupMatrixLoad_d2c77c
+         %61 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %64 = OpAccessChain %_ptr_StorageBuffer_int %61 %uint_0
+               OpCooperativeMatrixStoreKHR %64 %60 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %61
-         %62 = OpLabel
-         %63 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %64 = OpFunctionCall %void %compute_main_inner %63
+%compute_main = OpFunction %void None %73
+         %74 = OpLabel
+         %75 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %76 = OpFunctionCall %void %compute_main_inner %75
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/d6e273.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/d6e273.wgsl.expected.spvasm
index a1b739d..b85404b 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/d6e273.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/d6e273.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 63
+; Bound: 75
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %26 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,66 +48,81 @@
          %17 = OpTypeFunction %13
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+     %uint_0 = OpConstant %uint 0
+         %35 = OpConstantComposite %13 %uint_0
+%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void %uint
+         %48 = OpTypeFunction %void %uint
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
-     %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-         %59 = OpTypeFunction %void
+         %71 = OpTypeFunction %void
 %subgroupMatrixLoad_d6e273 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %33 = OpVariable %_ptr_Function_13 Function %35
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %uint %arg_3 None
-         %25 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
-         %27 = OpCooperativeMatrixLoadKHR %13 %25 %uint_1 %24 NonPrivatePointer
-               OpStore %res %27
-         %30 = OpLoad %13 %res None
-               OpReturnValue %30
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %34
-%tint_local_index = OpFunctionParameter %uint
-         %35 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpPhi %uint %tint_local_index %36 %42 %38
-               OpLoopMerge %40 %38 None
+         %25 = OpExtInst %uint %26 UMax %24 %uint_8
+         %27 = OpIMul %uint %25 %uint_7
+         %29 = OpIAdd %uint %23 %27
+         %30 = OpIAdd %uint %29 %uint_8
+         %31 = OpULessThanEqual %bool %30 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %31 %38 %37
+         %38 = OpLabel
+         %42 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
+         %44 = OpCooperativeMatrixLoadKHR %13 %42 %uint_1 %25 NonPrivatePointer
+               OpStore %33 %44 None
                OpBranch %37
          %37 = OpLabel
-         %53 = OpUGreaterThanEqual %bool %41 %uint_1024
-               OpSelectionMerge %55 None
-               OpBranchConditional %53 %56 %55
-         %56 = OpLabel
-               OpBranch %40
-         %55 = OpLabel
-         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %41
-               OpStore %57 %uint_0 NonPrivatePointer
-               OpBranch %38
-         %38 = OpLabel
-         %42 = OpIAdd %uint %41 %uint_1
-               OpBranch %39
-         %40 = OpLabel
+         %39 = OpLoad %13 %33 None
+               OpStore %res %39
+         %41 = OpLoad %13 %res None
+               OpReturnValue %41
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %48
+%tint_local_index = OpFunctionParameter %uint
+         %49 = OpLabel
+               OpBranch %50
+         %50 = OpLabel
+               OpBranch %53
+         %53 = OpLabel
+         %55 = OpPhi %uint %tint_local_index %50 %56 %52
+               OpLoopMerge %54 %52 None
+               OpBranch %51
+         %51 = OpLabel
+         %66 = OpUGreaterThanEqual %bool %55 %uint_1024
+               OpSelectionMerge %67 None
+               OpBranchConditional %66 %68 %67
+         %68 = OpLabel
+               OpBranch %54
+         %67 = OpLabel
+         %69 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %55
+               OpStore %69 %uint_0 NonPrivatePointer
+               OpBranch %52
+         %52 = OpLabel
+         %56 = OpIAdd %uint %55 %uint_1
+               OpBranch %53
+         %54 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %45 = OpFunctionCall %13 %subgroupMatrixLoad_d6e273
-         %46 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %49 = OpAccessChain %_ptr_StorageBuffer_uint %46 %uint_0
-               OpCooperativeMatrixStoreKHR %49 %45 %uint_0 %uint_64 NonPrivatePointer
+         %59 = OpFunctionCall %13 %subgroupMatrixLoad_d6e273
+         %60 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %62 = OpAccessChain %_ptr_StorageBuffer_uint %60 %uint_0
+               OpCooperativeMatrixStoreKHR %62 %59 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %59
-         %60 = OpLabel
-         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %62 = OpFunctionCall %void %compute_main_inner %61
+%compute_main = OpFunction %void None %71
+         %72 = OpLabel
+         %73 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %74 = OpFunctionCall %void %compute_main_inner %73
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
index a7cf381..00044fb 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/d80c87.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,13 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %38 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -63,23 +67,37 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_12 Function %38
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %12 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %12 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %39 None
+               OpBranchConditional %34 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_uint %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %12 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %12 %36 None
+               OpStore %res %41
+         %43 = OpLoad %12 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %12 %subgroupMatrixLoad_d80c87
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_uint_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %12 %subgroupMatrixLoad_d80c87
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_uint_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
index 0a6e09c..a8d34fb 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/da5d2c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -63,23 +68,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_da5d2c
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_da5d2c
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_int_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
index 340aba9..75e8309 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/df98ff.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 57
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,10 +52,13 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_12 = OpTypePointer Function %12
+         %37 = OpConstantComposite %12 %uint_0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void
+         %48 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
@@ -62,23 +66,37 @@
          %17 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_12 Function %37
         %res = OpVariable %_ptr_Function_12 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %21 = OpAccessChain %_ptr_StorageBuffer__runtimearr_uint %sb_ro %uint_0
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %uint %arg_3 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
-         %28 = OpCooperativeMatrixLoadKHR %12 %26 %uint_1 %25 NonPrivatePointer
-               OpStore %res %28
-         %31 = OpLoad %12 %res None
-               OpReturnValue %31
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_ro 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %38 None
+               OpBranchConditional %33 %39 %38
+         %39 = OpLabel
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %21 %24
+         %45 = OpCooperativeMatrixLoadKHR %12 %43 %uint_1 %26 NonPrivatePointer
+               OpStore %35 %45 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %12 %35 None
+               OpStore %res %40
+         %42 = OpLoad %12 %res None
+               OpReturnValue %42
                OpFunctionEnd
-%compute_main = OpFunction %void None %34
-         %35 = OpLabel
-         %36 = OpFunctionCall %12 %subgroupMatrixLoad_df98ff
-         %37 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint_0 %37 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %36 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %48
+         %49 = OpLabel
+         %50 = OpFunctionCall %12 %subgroupMatrixLoad_df98ff
+         %51 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_uint_0 %51 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %50 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.msl
index def12c1..35384fa 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_e5caba(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_e5caba(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
index dcf6180..ac17937 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/e5caba.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,33 +53,51 @@
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %39 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %36 = OpTypeFunction %void
+         %51 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_e5caba = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %37 = OpVariable %_ptr_Function_13 Function %39
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %uint %arg_3 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
-         %30 = OpCooperativeMatrixLoadKHR %13 %28 %uint_1 %27 NonPrivatePointer
-               OpStore %res %30
-         %33 = OpLoad %13 %res None
-               OpReturnValue %33
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %41 None
+               OpBranchConditional %35 %42 %41
+         %42 = OpLabel
+         %46 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
+         %48 = OpCooperativeMatrixLoadKHR %13 %46 %uint_1 %28 NonPrivatePointer
+               OpStore %37 %48 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %13 %37 None
+               OpStore %res %43
+         %45 = OpLoad %13 %res None
+               OpReturnValue %45
                OpFunctionEnd
-%compute_main = OpFunction %void None %36
-         %37 = OpLabel
-         %38 = OpFunctionCall %13 %subgroupMatrixLoad_e5caba
-         %39 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %41 = OpAccessChain %_ptr_StorageBuffer_float %39 %uint_0
-               OpCooperativeMatrixStoreKHR %41 %38 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %51
+         %52 = OpLabel
+         %53 = OpFunctionCall %13 %subgroupMatrixLoad_e5caba
+         %54 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %56 = OpAccessChain %_ptr_StorageBuffer_float %54 %uint_0
+               OpCooperativeMatrixStoreKHR %56 %53 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.msl
index c92b3ee..e3a8f12 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_float8x8 subgroupMatrixLoad_e995ba(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.spvasm
index 8b06057..a9b476c 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/e995ba.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 42
+; Bound: 56
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,32 +53,49 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %37 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_e995ba = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_e995ba
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_float %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_e995ba
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %53 = OpAccessChain %_ptr_StorageBuffer_float %52 %uint_0
+               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f406f6.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/f406f6.wgsl.expected.spvasm
index f2afaac..da0b1e5 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f406f6.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f406f6.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 63
+; Bound: 75
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %25 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -46,67 +47,82 @@
          %13 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_1
          %17 = OpTypeFunction %13
 %_ptr_Function_uint = OpTypePointer Function %uint
-%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+     %uint_0 = OpConstant %uint 0
+         %34 = OpConstantComposite %13 %uint_0
+%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
        %void = OpTypeVoid
-         %33 = OpTypeFunction %void %uint
+         %47 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
-     %uint_0 = OpConstant %uint 0
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-         %59 = OpTypeFunction %void
+         %71 = OpTypeFunction %void
 %subgroupMatrixLoad_f406f6 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %32 = OpVariable %_ptr_Function_13 Function %34
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpLoad %uint %arg_1 None
          %23 = OpLoad %uint %arg_3 None
-         %24 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %22
-         %26 = OpCooperativeMatrixLoadKHR %13 %24 %uint_1 %23 NonPrivatePointer
-               OpStore %res %26
-         %29 = OpLoad %13 %res None
-               OpReturnValue %29
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %24 = OpExtInst %uint %25 UMax %23 %uint_8
+         %26 = OpIMul %uint %24 %uint_7
+         %28 = OpIAdd %uint %22 %26
+         %29 = OpIAdd %uint %28 %uint_8
+         %30 = OpULessThanEqual %bool %29 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %30 %37 %36
+         %37 = OpLabel
+         %41 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %22
+         %43 = OpCooperativeMatrixLoadKHR %13 %41 %uint_1 %24 NonPrivatePointer
+               OpStore %32 %43 None
                OpBranch %36
          %36 = OpLabel
-         %53 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %55 None
-               OpBranchConditional %53 %56 %55
-         %56 = OpLabel
-               OpBranch %39
-         %55 = OpLabel
-         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %40
-               OpStore %57 %uint_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %38 = OpLoad %13 %32 None
+               OpStore %res %38
+         %40 = OpLoad %13 %res None
+               OpReturnValue %40
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %47
+%tint_local_index = OpFunctionParameter %uint
+         %48 = OpLabel
+               OpBranch %49
+         %49 = OpLabel
+               OpBranch %52
+         %52 = OpLabel
+         %54 = OpPhi %uint %tint_local_index %49 %55 %51
+               OpLoopMerge %53 %51 None
+               OpBranch %50
+         %50 = OpLabel
+         %66 = OpUGreaterThanEqual %bool %54 %uint_1024
+               OpSelectionMerge %67 None
+               OpBranchConditional %66 %68 %67
+         %68 = OpLabel
+               OpBranch %53
+         %67 = OpLabel
+         %69 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %54
+               OpStore %69 %uint_0 NonPrivatePointer
+               OpBranch %51
+         %51 = OpLabel
+         %55 = OpIAdd %uint %54 %uint_1
+               OpBranch %52
+         %53 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %45 = OpFunctionCall %13 %subgroupMatrixLoad_f406f6
-         %46 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %49 = OpAccessChain %_ptr_StorageBuffer_uint %46 %uint_0
-               OpCooperativeMatrixStoreKHR %49 %45 %uint_0 %uint_64 NonPrivatePointer
+         %59 = OpFunctionCall %13 %subgroupMatrixLoad_f406f6
+         %60 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %62 = OpAccessChain %_ptr_StorageBuffer_uint %60 %uint_0
+               OpCooperativeMatrixStoreKHR %62 %59 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %59
-         %60 = OpLabel
-         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %62 = OpFunctionCall %void %compute_main_inner %61
+%compute_main = OpFunction %void None %71
+         %72 = OpLabel
+         %73 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %74 = OpFunctionCall %void %compute_main_inner %73
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.msl
index 2c473b4..c4a85b6 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   const device SB_RO* sb_ro;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_f58623(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], const device SB_RO* sb_ro [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_ro=sb_ro, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_f58623(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
index 3916a9e..314171f 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f58623.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
 %_ptr_StorageBuffer_float_0 = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
@@ -63,23 +68,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_f58623
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_f58623
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_float_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.msl
index 9525c4d..931f64c 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.msl
@@ -25,9 +25,15 @@
 simdgroup_half8x8 subgroupMatrixLoad_f792f5(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_half8x8 v = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
-  simdgroup_load(v, (&(*tint_module_vars.sb_ro).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_half8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_half8x8 v_2 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+  if ((((v + (v_1 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_half8x8 v_3 = make_filled_simdgroup_matrix<half, 8, 8>(0.0h);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_ro).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_half8x8 res = v_2;
   return res;
 }
 
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.spvasm
index 748a53c..3b50859 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f792f5.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -56,10 +57,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+%half_0x0p_0 = OpConstant %half 0x0p+0
+         %37 = OpConstantComposite %13 %half_0x0p_0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %49 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_half_uint_1024_0 = OpTypePointer StorageBuffer %_arr_half_uint_1024
 %_ptr_StorageBuffer_half_0 = OpTypePointer StorageBuffer %half
     %uint_64 = OpConstant %uint 64
@@ -67,23 +72,36 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %35 = OpVariable %_ptr_Function_13 Function %37
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %22 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024 %8 %uint_0 %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %25 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %33 %40 %39
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
+         %46 = OpCooperativeMatrixLoadKHR %13 %44 %uint_1 %27 NonPrivatePointer
+               OpStore %35 %46 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %13 %35 None
+               OpStore %res %41
+         %43 = OpLoad %13 %res None
+               OpReturnValue %43
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_f792f5
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_half_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %49
+         %50 = OpLabel
+         %51 = OpFunctionCall %13 %subgroupMatrixLoad_f792f5
+         %52 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_1024_0 %1 %uint_0
+         %54 = OpAccessChain %_ptr_StorageBuffer_half_0 %52 %uint_0
+               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.msl
index 852a879..62e6a32 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.msl
@@ -20,18 +20,25 @@
 struct tint_module_vars_struct {
   device tint_array<float, 1024>* prevent_dce;
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 subgroupMatrixLoad_f9b989(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   uint arg_3 = 8u;
-  simdgroup_float8x8 v = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
-  simdgroup_load(v, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_3), ulong2(0ul), true);
-  simdgroup_float8x8 res = v;
+  uint const v = arg_1;
+  uint const v_1 = max(arg_3, 8u);
+  simdgroup_float8x8 v_2 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+  if ((((v + (v_1 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_float8x8 v_3 = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
+    simdgroup_load(v_3, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_1), ulong2(0ul), true);
+    v_2 = v_3;
+  }
+  simdgroup_float8x8 res = v_2;
   return res;
 }
 
-kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw};
+kernel void compute_main(device tint_array<float, 1024>* prevent_dce [[buffer(0)]], device SB_RW* sb_rw [[buffer(1)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.prevent_dce=prevent_dce, .sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_store(subgroupMatrixLoad_f9b989(tint_module_vars), (&(*tint_module_vars.prevent_dce)[0u]), ulong(64u), ulong2(0ul), false);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
index d59af03..5127795 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/f9b989.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 43
+; Bound: 58
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,33 +52,51 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+    %float_0 = OpConstant %float 0
+         %38 = OpConstantComposite %13 %float_0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
     %uint_64 = OpConstant %uint 64
 %subgroupMatrixLoad_f9b989 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %sb_rw %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_f9b989
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_float %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_f9b989
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_float_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_float %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
index 9e63d42..3f76803 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/fa3c74.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 44
+; Bound: 59
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -52,10 +53,14 @@
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
-%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+      %int_0 = OpConstant %int 0
+         %38 = OpConstantComposite %13 %int_0
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
        %void = OpTypeVoid
-         %35 = OpTypeFunction %void
+         %50 = OpTypeFunction %void
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
 %_ptr_StorageBuffer_int_0 = OpTypePointer StorageBuffer %int
     %uint_64 = OpConstant %uint 64
@@ -63,23 +68,37 @@
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %36 = OpVariable %_ptr_Function_13 Function %38
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpAccessChain %_ptr_StorageBuffer__runtimearr_int %sb_ro %uint_0
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %uint %arg_3 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
-         %29 = OpCooperativeMatrixLoadKHR %13 %27 %uint_1 %26 NonPrivatePointer
-               OpStore %res %29
-         %32 = OpLoad %13 %res None
-               OpReturnValue %32
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_ro 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %40 None
+               OpBranchConditional %34 %41 %40
+         %41 = OpLabel
+         %45 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
+         %47 = OpCooperativeMatrixLoadKHR %13 %45 %uint_1 %27 NonPrivatePointer
+               OpStore %36 %47 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %13 %36 None
+               OpStore %res %42
+         %44 = OpLoad %13 %res None
+               OpReturnValue %44
                OpFunctionEnd
-%compute_main = OpFunction %void None %35
-         %36 = OpLabel
-         %37 = OpFunctionCall %13 %subgroupMatrixLoad_fa3c74
-         %38 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
-         %40 = OpAccessChain %_ptr_StorageBuffer_int_0 %38 %uint_0
-               OpCooperativeMatrixStoreKHR %40 %37 %uint_0 %uint_64 NonPrivatePointer
+%compute_main = OpFunction %void None %50
+         %51 = OpLabel
+         %52 = OpFunctionCall %13 %subgroupMatrixLoad_fa3c74
+         %53 = OpAccessChain %_ptr_StorageBuffer__arr_int_uint_1024 %1 %uint_0
+         %55 = OpAccessChain %_ptr_StorageBuffer_int_0 %53 %uint_0
+               OpCooperativeMatrixStoreKHR %55 %52 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixLoad/fde5b5.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixLoad/fde5b5.wgsl.expected.spvasm
index 415c561..da55096 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixLoad/fde5b5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixLoad/fde5b5.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 63
+; Bound: 75
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %26 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,66 +48,81 @@
          %17 = OpTypeFunction %13
 %_ptr_Function_uint = OpTypePointer Function %uint
      %uint_1 = OpConstant %uint 1
-%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Function_13 = OpTypePointer Function %13
+         %35 = OpConstantComposite %13 %uint_0
+%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
        %void = OpTypeVoid
-         %34 = OpTypeFunction %void %uint
+         %47 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
     %uint_64 = OpConstant %uint 64
-       %bool = OpTypeBool
-         %59 = OpTypeFunction %void
+         %71 = OpTypeFunction %void
 %subgroupMatrixLoad_fde5b5 = OpFunction %13 None %17
          %18 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
       %arg_3 = OpVariable %_ptr_Function_uint Function
+         %33 = OpVariable %_ptr_Function_13 Function %35
         %res = OpVariable %_ptr_Function_13 Function
                OpStore %arg_1 %uint_1
                OpStore %arg_3 %uint_8
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %uint %arg_3 None
-         %25 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
-         %27 = OpCooperativeMatrixLoadKHR %13 %25 %uint_1 %24 NonPrivatePointer
-               OpStore %res %27
-         %30 = OpLoad %13 %res None
-               OpReturnValue %30
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %34
-%tint_local_index = OpFunctionParameter %uint
-         %35 = OpLabel
+         %25 = OpExtInst %uint %26 UMax %24 %uint_8
+         %27 = OpIMul %uint %25 %uint_7
+         %29 = OpIAdd %uint %23 %27
+         %30 = OpIAdd %uint %29 %uint_8
+         %31 = OpULessThanEqual %bool %30 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %31 %37 %36
+         %37 = OpLabel
+         %41 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
+         %43 = OpCooperativeMatrixLoadKHR %13 %41 %uint_1 %25 NonPrivatePointer
+               OpStore %33 %43 None
                OpBranch %36
          %36 = OpLabel
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpPhi %uint %tint_local_index %36 %42 %38
-               OpLoopMerge %40 %38 None
-               OpBranch %37
-         %37 = OpLabel
-         %53 = OpUGreaterThanEqual %bool %41 %uint_1024
-               OpSelectionMerge %55 None
-               OpBranchConditional %53 %56 %55
-         %56 = OpLabel
-               OpBranch %40
-         %55 = OpLabel
-         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %41
-               OpStore %57 %uint_0 NonPrivatePointer
-               OpBranch %38
-         %38 = OpLabel
-         %42 = OpIAdd %uint %41 %uint_1
-               OpBranch %39
-         %40 = OpLabel
+         %38 = OpLoad %13 %33 None
+               OpStore %res %38
+         %40 = OpLoad %13 %res None
+               OpReturnValue %40
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %47
+%tint_local_index = OpFunctionParameter %uint
+         %48 = OpLabel
+               OpBranch %49
+         %49 = OpLabel
+               OpBranch %52
+         %52 = OpLabel
+         %54 = OpPhi %uint %tint_local_index %49 %55 %51
+               OpLoopMerge %53 %51 None
+               OpBranch %50
+         %50 = OpLabel
+         %66 = OpUGreaterThanEqual %bool %54 %uint_1024
+               OpSelectionMerge %67 None
+               OpBranchConditional %66 %68 %67
+         %68 = OpLabel
+               OpBranch %53
+         %67 = OpLabel
+         %69 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %54
+               OpStore %69 %uint_0 NonPrivatePointer
+               OpBranch %51
+         %51 = OpLabel
+         %55 = OpIAdd %uint %54 %uint_1
+               OpBranch %52
+         %53 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %46 = OpFunctionCall %13 %subgroupMatrixLoad_fde5b5
-         %47 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
-         %49 = OpAccessChain %_ptr_StorageBuffer_uint %47 %uint_0
-               OpCooperativeMatrixStoreKHR %49 %46 %uint_0 %uint_64 NonPrivatePointer
+         %59 = OpFunctionCall %13 %subgroupMatrixLoad_fde5b5
+         %60 = OpAccessChain %_ptr_StorageBuffer__arr_uint_uint_1024 %1 %uint_0
+         %62 = OpAccessChain %_ptr_StorageBuffer_uint %60 %uint_0
+               OpCooperativeMatrixStoreKHR %62 %59 %uint_0 %uint_64 NonPrivatePointer
                OpReturn
                OpFunctionEnd
-%compute_main = OpFunction %void None %59
-         %60 = OpLabel
-         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %62 = OpFunctionCall %void %compute_main_inner %61
+%compute_main = OpFunction %void None %71
+         %72 = OpLabel
+         %73 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %74 = OpFunctionCall %void %compute_main_inner %73
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.msl
index 7de35fa..574e4b9 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0f;
+      (*tint_module_vars.arg_0)[v_4] = 0.0f;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_0268e9(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.spvasm
index 588cd86..cf2058d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/0268e9.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 55
+; Bound: 64
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,11 +42,12 @@
     %float_0 = OpConstant %float 0
          %16 = OpConstantComposite %17 %float_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_float = OpTypePointer Workgroup %float
-         %33 = OpTypeFunction %void %uint
+         %43 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_0268e9 = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -57,41 +59,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_float %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_float %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %46 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %48 None
-               OpBranchConditional %46 %49 %48
-         %49 = OpLabel
-               OpBranch %39
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-         %50 = OpAccessChain %_ptr_Workgroup_float %arg_0 %40
-               OpStore %50 %float_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %56 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %57 None
+               OpBranchConditional %56 %58 %57
+         %58 = OpLabel
+               OpBranch %49
+         %57 = OpLabel
+         %59 = OpAccessChain %_ptr_Workgroup_float %arg_0 %50
+               OpStore %59 %float_0 NonPrivatePointer
+               OpBranch %47
+         %47 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %45 = OpFunctionCall %void %subgroupMatrixStore_0268e9
+         %55 = OpFunctionCall %void %subgroupMatrixStore_0268e9
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %52 = OpLabel
-         %53 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %54 = OpFunctionCall %void %compute_main_inner %53
+         %61 = OpLabel
+         %62 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %63 = OpFunctionCall %void %compute_main_inner %62
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.msl
index ea6d640..92e5b5a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0h;
+      (*tint_module_vars.arg_0)[v_4] = 0.0h;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_0413f0(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.spvasm
index 2a0a4b3..66a6f95 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/0413f0.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -43,11 +44,12 @@
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %16 = OpConstantComposite %17 %half_0x0p_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_half = OpTypePointer Workgroup %half
-         %32 = OpTypeFunction %void %uint
+         %42 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_0413f0 = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -59,41 +61,51 @@
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %17 %arg_2 None
          %26 = OpLoad %uint %arg_4 None
-         %27 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
-               OpCooperativeMatrixStoreKHR %27 %25 %uint_1 %26 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %32
-%tint_local_index = OpFunctionParameter %uint
-         %33 = OpLabel
-               OpBranch %34
-         %34 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-         %39 = OpPhi %uint %tint_local_index %34 %40 %36
-               OpLoopMerge %38 %36 None
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_Workgroup_half %arg_0 %24
+               OpCooperativeMatrixStoreKHR %37 %25 %uint_1 %27 NonPrivatePointer
                OpBranch %35
          %35 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %39 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
-         %48 = OpLabel
-               OpBranch %38
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %42
+%tint_local_index = OpFunctionParameter %uint
+         %43 = OpLabel
+               OpBranch %44
+         %44 = OpLabel
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_half %arg_0 %39
-               OpStore %49 %half_0x0p_0 NonPrivatePointer
-               OpBranch %36
-         %36 = OpLabel
-         %40 = OpIAdd %uint %39 %uint_1
-               OpBranch %37
-         %38 = OpLabel
+         %49 = OpPhi %uint %tint_local_index %44 %50 %46
+               OpLoopMerge %48 %46 None
+               OpBranch %45
+         %45 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %49 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %48
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_half %arg_0 %49
+               OpStore %58 %half_0x0p_0 NonPrivatePointer
+               OpBranch %46
+         %46 = OpLabel
+         %50 = OpIAdd %uint %49 %uint_1
+               OpBranch %47
+         %48 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_0413f0
+         %54 = OpFunctionCall %void %subgroupMatrixStore_0413f0
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.msl
index 732bc3e..319660a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.spvasm
index ff8a86a..8c895a8 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/1383a5.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -50,6 +51,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_1383a5 = OpFunction %void None %11
          %12 = OpLabel
@@ -63,12 +66,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_half %24 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_half %24 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_1383a5
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_1383a5
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/1433f9.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/1433f9.wgsl.expected.spvasm
index b5b4dc5..0138c51 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/1433f9.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/1433f9.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 55
+; Bound: 64
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,11 +42,12 @@
       %int_0 = OpConstant %int 0
          %16 = OpConstantComposite %17 %int_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_int = OpTypePointer Workgroup %int
-         %33 = OpTypeFunction %void %uint
+         %43 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_1433f9 = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -57,41 +59,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_int %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_int %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %46 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %48 None
-               OpBranchConditional %46 %49 %48
-         %49 = OpLabel
-               OpBranch %39
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-         %50 = OpAccessChain %_ptr_Workgroup_int %arg_0 %40
-               OpStore %50 %int_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %56 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %57 None
+               OpBranchConditional %56 %58 %57
+         %58 = OpLabel
+               OpBranch %49
+         %57 = OpLabel
+         %59 = OpAccessChain %_ptr_Workgroup_int %arg_0 %50
+               OpStore %59 %int_0 NonPrivatePointer
+               OpBranch %47
+         %47 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %45 = OpFunctionCall %void %subgroupMatrixStore_1433f9
+         %55 = OpFunctionCall %void %subgroupMatrixStore_1433f9
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %52 = OpLabel
-         %53 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %54 = OpFunctionCall %void %compute_main_inner %53
+         %61 = OpLabel
+         %62 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %63 = OpFunctionCall %void %compute_main_inner %62
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.msl
index a12acb8..170ff50 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.spvasm
index 366056a..41d392d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/22077b.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 37
+; Bound: 47
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %32 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -51,6 +52,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_22077b = OpFunction %void None %11
          %12 = OpLabel
@@ -64,12 +67,22 @@
          %28 = OpLoad %uint %arg_1 None
          %29 = OpLoad %17 %arg_2 None
          %30 = OpLoad %uint %arg_4 None
-         %31 = OpAccessChain %_ptr_StorageBuffer_half %25 %28
-               OpCooperativeMatrixStoreKHR %31 %29 %uint_1 %30 NonPrivatePointer
+         %31 = OpExtInst %uint %32 UMax %30 %uint_8
+         %33 = OpIMul %uint %31 %uint_7
+         %35 = OpIAdd %uint %28 %33
+         %36 = OpIAdd %uint %35 %uint_8
+         %37 = OpULessThanEqual %bool %36 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %37 %40 %39
+         %40 = OpLabel
+         %41 = OpAccessChain %_ptr_StorageBuffer_half %25 %28
+               OpCooperativeMatrixStoreKHR %41 %29 %uint_1 %31 NonPrivatePointer
+               OpBranch %39
+         %39 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %35 = OpLabel
-         %36 = OpFunctionCall %void %subgroupMatrixStore_22077b
+         %45 = OpLabel
+         %46 = OpFunctionCall %void %subgroupMatrixStore_22077b
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
index 3c39608..9173c77 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/2d78d3.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 32
+; Bound: 43
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,6 +42,8 @@
          %13 = OpConstantComposite %14 %uint_0
 %_ptr_Function_14 = OpTypePointer Function %14
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_2d78d3 = OpFunction %void None %8
           %9 = OpLabel
@@ -54,12 +57,23 @@
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %14 %arg_2 None
          %25 = OpLoad %uint %arg_4 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %23
-               OpCooperativeMatrixStoreKHR %26 %24 %uint_1 %25 NonPrivatePointer
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_rw 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %23 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_uint %21 %23
+               OpCooperativeMatrixStoreKHR %37 %24 %uint_1 %26 NonPrivatePointer
+               OpBranch %35
+         %35 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %30 = OpLabel
-         %31 = OpFunctionCall %void %subgroupMatrixStore_2d78d3
+         %41 = OpLabel
+         %42 = OpFunctionCall %void %subgroupMatrixStore_2d78d3
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
index f932011..52941c2 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/3ea76e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -43,6 +44,8 @@
          %14 = OpConstantComposite %15 %int_0
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_3ea76e = OpFunction %void None %8
           %9 = OpLabel
@@ -56,12 +59,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_int %23 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_3ea76e
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_3ea76e
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.msl
index 5901858..1cc8607 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0h;
+      (*tint_module_vars.arg_0)[v_4] = 0.0h;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_45de0c(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.spvasm
index 7f4a451..b8df920 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/45de0c.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 55
+; Bound: 64
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -44,11 +45,12 @@
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %16 = OpConstantComposite %17 %half_0x0p_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_half = OpTypePointer Workgroup %half
-         %33 = OpTypeFunction %void %uint
+         %43 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_45de0c = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -60,41 +62,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_half %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_half %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %46 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %48 None
-               OpBranchConditional %46 %49 %48
-         %49 = OpLabel
-               OpBranch %39
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-         %50 = OpAccessChain %_ptr_Workgroup_half %arg_0 %40
-               OpStore %50 %half_0x0p_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %56 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %57 None
+               OpBranchConditional %56 %58 %57
+         %58 = OpLabel
+               OpBranch %49
+         %57 = OpLabel
+         %59 = OpAccessChain %_ptr_Workgroup_half %arg_0 %50
+               OpStore %59 %half_0x0p_0 NonPrivatePointer
+               OpBranch %47
+         %47 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %45 = OpFunctionCall %void %subgroupMatrixStore_45de0c
+         %55 = OpFunctionCall %void %subgroupMatrixStore_45de0c
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %52 = OpLabel
-         %53 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %54 = OpFunctionCall %void %compute_main_inner %53
+         %61 = OpLabel
+         %62 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %63 = OpFunctionCall %void %compute_main_inner %62
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/476cdf.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
index 5148c6e..ed21f72 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/476cdf.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 35
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %30 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -44,6 +45,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_476cdf = OpFunction %void None %8
           %9 = OpLabel
@@ -57,12 +60,23 @@
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %15 %arg_2 None
          %28 = OpLoad %uint %arg_4 None
-         %29 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
-               OpCooperativeMatrixStoreKHR %29 %27 %uint_1 %28 NonPrivatePointer
+         %29 = OpExtInst %uint %30 UMax %28 %uint_8
+         %31 = OpArrayLength %uint %sb_rw 0
+         %32 = OpIMul %uint %29 %uint_7
+         %34 = OpIAdd %uint %26 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %31
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_int %23 %26
+               OpCooperativeMatrixStoreKHR %40 %27 %uint_1 %29 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %33 = OpLabel
-         %34 = OpFunctionCall %void %subgroupMatrixStore_476cdf
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_476cdf
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/47cd26.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/47cd26.wgsl.expected.spvasm
index 14055d7..9077f48 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/47cd26.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/47cd26.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,6 +48,8 @@
          %16 = OpConstantComposite %17 %int_0
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_47cd26 = OpFunction %void None %11
          %12 = OpLabel
@@ -60,12 +63,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_int %25 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_int %25 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_47cd26
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_47cd26
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/4980a5.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/4980a5.wgsl.expected.spvasm
index 249a7c2..2a8fc10 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/4980a5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/4980a5.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 35
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %30 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -46,6 +47,8 @@
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_4980a5 = OpFunction %void None %10
          %11 = OpLabel
@@ -59,12 +62,22 @@
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %16 %arg_2 None
          %28 = OpLoad %uint %arg_4 None
-         %29 = OpAccessChain %_ptr_StorageBuffer_uint %24 %26
-               OpCooperativeMatrixStoreKHR %29 %27 %uint_1 %28 NonPrivatePointer
+         %29 = OpExtInst %uint %30 UMax %28 %uint_8
+         %31 = OpIMul %uint %29 %uint_7
+         %33 = OpIAdd %uint %26 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %uint_1024
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_uint %24 %26
+               OpCooperativeMatrixStoreKHR %39 %27 %uint_1 %29 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %33 = OpLabel
-         %34 = OpFunctionCall %void %subgroupMatrixStore_4980a5
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_4980a5
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.msl
index 7f42af8..ed2c089 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_543411(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_543411(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.spvasm
index b711750..22a40c4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/543411.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -43,6 +44,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_543411 = OpFunction %void None %8
           %9 = OpLabel
@@ -56,12 +59,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_float %22 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_543411
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_543411
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/62c1b6.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/62c1b6.wgsl.expected.spvasm
index 7496baf..9105363 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/62c1b6.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/62c1b6.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 53
+; Bound: 62
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -39,11 +40,12 @@
          %16 = OpTypeCooperativeMatrixKHR %uint %uint_3 %uint_8 %uint_8 %uint_0
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
-         %31 = OpTypeFunction %void %uint
+         %41 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_62c1b6 = OpFunction %void None %10
          %11 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -55,41 +57,51 @@
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %16 %arg_2 None
          %25 = OpLoad %uint %arg_4 None
-         %26 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
-               OpCooperativeMatrixStoreKHR %26 %24 %uint_1 %25 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %31
-%tint_local_index = OpFunctionParameter %uint
-         %32 = OpLabel
-               OpBranch %33
-         %33 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-         %38 = OpPhi %uint %tint_local_index %33 %39 %35
-               OpLoopMerge %37 %35 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %23 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %34 None
+               OpBranchConditional %32 %35 %34
+         %35 = OpLabel
+         %36 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
+               OpCooperativeMatrixStoreKHR %36 %24 %uint_1 %26 NonPrivatePointer
                OpBranch %34
          %34 = OpLabel
-         %44 = OpUGreaterThanEqual %bool %38 %uint_1024
-               OpSelectionMerge %46 None
-               OpBranchConditional %44 %47 %46
-         %47 = OpLabel
-               OpBranch %37
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %41
+%tint_local_index = OpFunctionParameter %uint
+         %42 = OpLabel
+               OpBranch %43
+         %43 = OpLabel
+               OpBranch %46
          %46 = OpLabel
-         %48 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %38
-               OpStore %48 %uint_0 NonPrivatePointer
-               OpBranch %35
-         %35 = OpLabel
-         %39 = OpIAdd %uint %38 %uint_1
-               OpBranch %36
-         %37 = OpLabel
+         %48 = OpPhi %uint %tint_local_index %43 %49 %45
+               OpLoopMerge %47 %45 None
+               OpBranch %44
+         %44 = OpLabel
+         %54 = OpUGreaterThanEqual %bool %48 %uint_1024
+               OpSelectionMerge %55 None
+               OpBranchConditional %54 %56 %55
+         %56 = OpLabel
+               OpBranch %47
+         %55 = OpLabel
+         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %48
+               OpStore %57 %uint_0 NonPrivatePointer
+               OpBranch %45
+         %45 = OpLabel
+         %49 = OpIAdd %uint %48 %uint_1
+               OpBranch %46
+         %47 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %43 = OpFunctionCall %void %subgroupMatrixStore_62c1b6
+         %53 = OpFunctionCall %void %subgroupMatrixStore_62c1b6
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %50 = OpLabel
-         %51 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %52 = OpFunctionCall %void %compute_main_inner %51
+         %59 = OpLabel
+         %60 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %61 = OpFunctionCall %void %compute_main_inner %60
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/6578da.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/6578da.wgsl.expected.spvasm
index 11b49a0..e95ee59 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/6578da.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/6578da.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,10 +42,11 @@
       %int_0 = OpConstant %int 0
          %16 = OpConstantComposite %17 %int_0
 %_ptr_Function_17 = OpTypePointer Function %17
-%_ptr_Workgroup_int = OpTypePointer Workgroup %int
-         %33 = OpTypeFunction %void %uint
- %uint_24840 = OpConstant %uint 24840
+     %uint_7 = OpConstant %uint 7
        %bool = OpTypeBool
+%_ptr_Workgroup_int = OpTypePointer Workgroup %int
+         %43 = OpTypeFunction %void %uint
+ %uint_24840 = OpConstant %uint 24840
 %subgroupMatrixStore_6578da = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -56,41 +58,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_int %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_int %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-               OpBranch %39
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_int %arg_0 %50
+               OpStore %58 %int_0 NonPrivatePointer
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_int %arg_0 %40
-               OpStore %49 %int_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_6578da
+         %54 = OpFunctionCall %void %subgroupMatrixStore_6578da
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.msl
index 080be72..3f9dc5a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_6d8de7(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_6d8de7(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
index 9d7a8e7..1571d51 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/6d8de7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -46,6 +47,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_6d8de7 = OpFunction %void None %8
           %9 = OpLabel
@@ -59,12 +62,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_half %22 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_6d8de7
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_6d8de7
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/716ef7.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/716ef7.wgsl.expected.spvasm
index 3f7c8f3..2a3bd7a 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/716ef7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/716ef7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 53
+; Bound: 62
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -39,11 +40,12 @@
      %uint_0 = OpConstant %uint 0
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
-         %31 = OpTypeFunction %void %uint
+         %41 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_716ef7 = OpFunction %void None %10
          %11 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -55,41 +57,51 @@
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %16 %arg_2 None
          %25 = OpLoad %uint %arg_4 None
-         %26 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
-               OpCooperativeMatrixStoreKHR %26 %24 %uint_1 %25 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %31
-%tint_local_index = OpFunctionParameter %uint
-         %32 = OpLabel
-               OpBranch %33
-         %33 = OpLabel
-               OpBranch %36
-         %36 = OpLabel
-         %38 = OpPhi %uint %tint_local_index %33 %39 %35
-               OpLoopMerge %37 %35 None
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpIMul %uint %26 %uint_7
+         %30 = OpIAdd %uint %23 %28
+         %31 = OpIAdd %uint %30 %uint_8
+         %32 = OpULessThanEqual %bool %31 %uint_1024
+               OpSelectionMerge %34 None
+               OpBranchConditional %32 %35 %34
+         %35 = OpLabel
+         %36 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %23
+               OpCooperativeMatrixStoreKHR %36 %24 %uint_1 %26 NonPrivatePointer
                OpBranch %34
          %34 = OpLabel
-         %44 = OpUGreaterThanEqual %bool %38 %uint_1024
-               OpSelectionMerge %46 None
-               OpBranchConditional %44 %47 %46
-         %47 = OpLabel
-               OpBranch %37
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %41
+%tint_local_index = OpFunctionParameter %uint
+         %42 = OpLabel
+               OpBranch %43
+         %43 = OpLabel
+               OpBranch %46
          %46 = OpLabel
-         %48 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %38
-               OpStore %48 %uint_0 NonPrivatePointer
-               OpBranch %35
-         %35 = OpLabel
-         %39 = OpIAdd %uint %38 %uint_1
-               OpBranch %36
-         %37 = OpLabel
+         %48 = OpPhi %uint %tint_local_index %43 %49 %45
+               OpLoopMerge %47 %45 None
+               OpBranch %44
+         %44 = OpLabel
+         %54 = OpUGreaterThanEqual %bool %48 %uint_1024
+               OpSelectionMerge %55 None
+               OpBranchConditional %54 %56 %55
+         %56 = OpLabel
+               OpBranch %47
+         %55 = OpLabel
+         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %48
+               OpStore %57 %uint_0 NonPrivatePointer
+               OpBranch %45
+         %45 = OpLabel
+         %49 = OpIAdd %uint %48 %uint_1
+               OpBranch %46
+         %47 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %43 = OpFunctionCall %void %subgroupMatrixStore_716ef7
+         %53 = OpFunctionCall %void %subgroupMatrixStore_716ef7
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %50 = OpLabel
-         %51 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %52 = OpFunctionCall %void %compute_main_inner %51
+         %59 = OpLabel
+         %60 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %61 = OpFunctionCall %void %compute_main_inner %60
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/72f3af.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/72f3af.wgsl.expected.spvasm
index 3bbdc48..35c9e0f 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/72f3af.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/72f3af.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 44
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -45,6 +46,8 @@
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_72f3af = OpFunction %void None %10
          %11 = OpLabel
@@ -58,12 +61,22 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %16 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_uint %23 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_uint %23 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %36
+         %36 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_72f3af
+         %42 = OpLabel
+         %43 = OpFunctionCall %void %subgroupMatrixStore_72f3af
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.msl
index a20348b..da65775 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0h;
+      (*tint_module_vars.arg_0)[v_4] = 0.0h;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_840822(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.spvasm
index 2070218..6b2e7d3 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/840822.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -44,10 +45,11 @@
 %half_0x0p_0 = OpConstant %half 0x0p+0
          %16 = OpConstantComposite %17 %half_0x0p_0
 %_ptr_Function_17 = OpTypePointer Function %17
-%_ptr_Workgroup_half = OpTypePointer Workgroup %half
-         %33 = OpTypeFunction %void %uint
- %uint_24840 = OpConstant %uint 24840
+     %uint_7 = OpConstant %uint 7
        %bool = OpTypeBool
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %43 = OpTypeFunction %void %uint
+ %uint_24840 = OpConstant %uint 24840
 %subgroupMatrixStore_840822 = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -59,41 +61,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_half %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_half %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-               OpBranch %39
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_half %arg_0 %50
+               OpStore %58 %half_0x0p_0 NonPrivatePointer
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_half %arg_0 %40
-               OpStore %49 %half_0x0p_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_840822
+         %54 = OpFunctionCall %void %subgroupMatrixStore_840822
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/865820.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/865820.wgsl.expected.spvasm
index 493257b..0d435f0 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/865820.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/865820.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 53
+; Bound: 62
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -40,10 +41,11 @@
      %uint_0 = OpConstant %uint 0
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
-%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
-         %32 = OpTypeFunction %void %uint
- %uint_24840 = OpConstant %uint 24840
+     %uint_7 = OpConstant %uint 7
        %bool = OpTypeBool
+%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
+         %42 = OpTypeFunction %void %uint
+ %uint_24840 = OpConstant %uint 24840
 %subgroupMatrixStore_865820 = OpFunction %void None %10
          %11 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -55,41 +57,51 @@
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %16 %arg_2 None
          %26 = OpLoad %uint %arg_4 None
-         %27 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %24
-               OpCooperativeMatrixStoreKHR %27 %25 %uint_1 %26 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %32
-%tint_local_index = OpFunctionParameter %uint
-         %33 = OpLabel
-               OpBranch %34
-         %34 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-         %39 = OpPhi %uint %tint_local_index %34 %40 %36
-               OpLoopMerge %38 %36 None
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %24
+               OpCooperativeMatrixStoreKHR %37 %25 %uint_1 %27 NonPrivatePointer
                OpBranch %35
          %35 = OpLabel
-         %44 = OpUGreaterThanEqual %bool %39 %uint_1024
-               OpSelectionMerge %46 None
-               OpBranchConditional %44 %47 %46
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %42
+%tint_local_index = OpFunctionParameter %uint
+         %43 = OpLabel
+               OpBranch %44
+         %44 = OpLabel
+               OpBranch %47
          %47 = OpLabel
-               OpBranch %38
+         %49 = OpPhi %uint %tint_local_index %44 %50 %46
+               OpLoopMerge %48 %46 None
+               OpBranch %45
+         %45 = OpLabel
+         %54 = OpUGreaterThanEqual %bool %49 %uint_1024
+               OpSelectionMerge %55 None
+               OpBranchConditional %54 %56 %55
+         %56 = OpLabel
+               OpBranch %48
+         %55 = OpLabel
+         %57 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %49
+               OpStore %57 %uint_0 NonPrivatePointer
+               OpBranch %46
          %46 = OpLabel
-         %48 = OpAccessChain %_ptr_Workgroup_uint %arg_0 %39
-               OpStore %48 %uint_0 NonPrivatePointer
-               OpBranch %36
-         %36 = OpLabel
-         %40 = OpIAdd %uint %39 %uint_1
-               OpBranch %37
-         %38 = OpLabel
+         %50 = OpIAdd %uint %49 %uint_1
+               OpBranch %47
+         %48 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %43 = OpFunctionCall %void %subgroupMatrixStore_865820
+         %53 = OpFunctionCall %void %subgroupMatrixStore_865820
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %50 = OpLabel
-         %51 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %52 = OpFunctionCall %void %compute_main_inner %51
+         %59 = OpLabel
+         %60 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %61 = OpFunctionCall %void %compute_main_inner %60
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
index 8a51a27..7fa4f83 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/8c59ed.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 32
+; Bound: 43
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %27 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,6 +42,8 @@
          %13 = OpConstantComposite %14 %uint_0
 %_ptr_Function_14 = OpTypePointer Function %14
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_8c59ed = OpFunction %void None %8
           %9 = OpLabel
@@ -54,12 +57,23 @@
          %23 = OpLoad %uint %arg_1 None
          %24 = OpLoad %14 %arg_2 None
          %25 = OpLoad %uint %arg_4 None
-         %26 = OpAccessChain %_ptr_StorageBuffer_uint %21 %23
-               OpCooperativeMatrixStoreKHR %26 %24 %uint_1 %25 NonPrivatePointer
+         %26 = OpExtInst %uint %27 UMax %25 %uint_8
+         %28 = OpArrayLength %uint %sb_rw 0
+         %29 = OpIMul %uint %26 %uint_7
+         %31 = OpIAdd %uint %23 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %28
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_uint %21 %23
+               OpCooperativeMatrixStoreKHR %37 %24 %uint_1 %26 NonPrivatePointer
+               OpBranch %35
+         %35 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %30 = OpLabel
-         %31 = OpFunctionCall %void %subgroupMatrixStore_8c59ed
+         %41 = OpLabel
+         %42 = OpFunctionCall %void %subgroupMatrixStore_8c59ed
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/9991c3.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/9991c3.wgsl.expected.spvasm
index 3244939..a16a495 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/9991c3.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/9991c3.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,6 +48,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_9991c3 = OpFunction %void None %11
          %12 = OpLabel
@@ -60,12 +63,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_int %24 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_int %24 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_9991c3
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_9991c3
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.msl
index 5274b6a..5e9dce2 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0f;
+      (*tint_module_vars.arg_0)[v_4] = 0.0f;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_a3ed32(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.spvasm
index ff7eaf8..2bcbdb1 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/a3ed32.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -41,10 +42,11 @@
     %float_0 = OpConstant %float 0
          %16 = OpConstantComposite %17 %float_0
 %_ptr_Function_17 = OpTypePointer Function %17
-%_ptr_Workgroup_float = OpTypePointer Workgroup %float
-         %33 = OpTypeFunction %void %uint
- %uint_24840 = OpConstant %uint 24840
+     %uint_7 = OpConstant %uint 7
        %bool = OpTypeBool
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+         %43 = OpTypeFunction %void %uint
+ %uint_24840 = OpConstant %uint 24840
 %subgroupMatrixStore_a3ed32 = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -56,41 +58,51 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %17 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_Workgroup_float %arg_0 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %33
-%tint_local_index = OpFunctionParameter %uint
-         %34 = OpLabel
-               OpBranch %35
-         %35 = OpLabel
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpPhi %uint %tint_local_index %35 %41 %37
-               OpLoopMerge %39 %37 None
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_Workgroup_float %arg_0 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
                OpBranch %36
          %36 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %40 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %43
+%tint_local_index = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpBranch %45
+         %45 = OpLabel
+               OpBranch %48
          %48 = OpLabel
-               OpBranch %39
+         %50 = OpPhi %uint %tint_local_index %45 %51 %47
+               OpLoopMerge %49 %47 None
+               OpBranch %46
+         %46 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %50 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_float %arg_0 %50
+               OpStore %58 %float_0 NonPrivatePointer
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_float %arg_0 %40
-               OpStore %49 %float_0 NonPrivatePointer
-               OpBranch %37
-         %37 = OpLabel
-         %41 = OpIAdd %uint %40 %uint_1
-               OpBranch %38
-         %39 = OpLabel
+         %51 = OpIAdd %uint %50 %uint_1
+               OpBranch %48
+         %49 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_a3ed32
+         %54 = OpFunctionCall %void %subgroupMatrixStore_a3ed32
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.msl
index ab48098..ff918da 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.spvasm
index 6d0f3a6d..559b45d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/a50ea0.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,6 +48,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_a50ea0 = OpFunction %void None %11
          %12 = OpLabel
@@ -60,12 +63,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_float %24 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_float %24 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_a50ea0
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_a50ea0
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.msl
index 76d60d5..c758ada 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.spvasm
index 23bef35..5ba6cf1 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/acb4b7.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 37
+; Bound: 47
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %32 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,6 +49,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_acb4b7 = OpFunction %void None %11
          %12 = OpLabel
@@ -61,12 +64,22 @@
          %28 = OpLoad %uint %arg_1 None
          %29 = OpLoad %17 %arg_2 None
          %30 = OpLoad %uint %arg_4 None
-         %31 = OpAccessChain %_ptr_StorageBuffer_float %25 %28
-               OpCooperativeMatrixStoreKHR %31 %29 %uint_1 %30 NonPrivatePointer
+         %31 = OpExtInst %uint %32 UMax %30 %uint_8
+         %33 = OpIMul %uint %31 %uint_7
+         %35 = OpIAdd %uint %28 %33
+         %36 = OpIAdd %uint %35 %uint_8
+         %37 = OpULessThanEqual %bool %36 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %37 %40 %39
+         %40 = OpLabel
+         %41 = OpAccessChain %_ptr_StorageBuffer_float %25 %28
+               OpCooperativeMatrixStoreKHR %41 %29 %uint_1 %31 NonPrivatePointer
+               OpBranch %39
+         %39 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %35 = OpLabel
-         %36 = OpFunctionCall %void %subgroupMatrixStore_acb4b7
+         %45 = OpLabel
+         %46 = OpFunctionCall %void %subgroupMatrixStore_acb4b7
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.msl
index fd0895f..23a2980 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_adbc3e(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_adbc3e(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
index 9d27703..6e32d03 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/adbc3e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -46,6 +47,8 @@
          %14 = OpConstantComposite %15 %half_0x0p_0
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_adbc3e = OpFunction %void None %8
           %9 = OpLabel
@@ -59,12 +62,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_half %23 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_adbc3e
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_adbc3e
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.msl
index 766c862..8f7c950 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bb2478(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bb2478(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
index 2604407..d75cabc 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bb2478.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 35
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %30 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -44,6 +45,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_bb2478 = OpFunction %void None %8
           %9 = OpLabel
@@ -57,12 +60,23 @@
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %15 %arg_2 None
          %28 = OpLoad %uint %arg_4 None
-         %29 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
-               OpCooperativeMatrixStoreKHR %29 %27 %uint_1 %28 NonPrivatePointer
+         %29 = OpExtInst %uint %30 UMax %28 %uint_8
+         %31 = OpArrayLength %uint %sb_rw 0
+         %32 = OpIMul %uint %29 %uint_7
+         %34 = OpIAdd %uint %26 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %31
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_float %23 %26
+               OpCooperativeMatrixStoreKHR %40 %27 %uint_1 %29 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %33 = OpLabel
-         %34 = OpFunctionCall %void %subgroupMatrixStore_bb2478
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_bb2478
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.msl
index b7c72b9..ef12ba8 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bb5d49(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 4u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bb5d49(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
index e0c79ab..09ffcea 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bb5d49.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -43,6 +44,8 @@
          %14 = OpConstantComposite %15 %float_0
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_bb5d49 = OpFunction %void None %8
           %9 = OpLabel
@@ -56,12 +59,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_float %23 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_bb5d49
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_bb5d49
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.msl
index dd092a2..e79f110 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.msl
@@ -19,16 +19,22 @@
 
 struct tint_module_vars_struct {
   device SB_RW* sb_rw;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void subgroupMatrixStore_bfe106(tint_module_vars_struct tint_module_vars) {
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x - 0u) / 2u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
-kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw};
+kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.sb_rw=sb_rw, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   subgroupMatrixStore_bfe106(tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
index e1c2057..f8c77e4 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/bfe106.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 35
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %30 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,6 +48,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_half = OpTypePointer StorageBuffer %_runtimearr_half
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_bfe106 = OpFunction %void None %8
           %9 = OpLabel
@@ -60,12 +63,23 @@
          %26 = OpLoad %uint %arg_1 None
          %27 = OpLoad %15 %arg_2 None
          %28 = OpLoad %uint %arg_4 None
-         %29 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
-               OpCooperativeMatrixStoreKHR %29 %27 %uint_1 %28 NonPrivatePointer
+         %29 = OpExtInst %uint %30 UMax %28 %uint_8
+         %31 = OpArrayLength %uint %sb_rw 0
+         %32 = OpIMul %uint %29 %uint_7
+         %34 = OpIAdd %uint %26 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %31
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_half %23 %26
+               OpCooperativeMatrixStoreKHR %40 %27 %uint_1 %29 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %33 = OpLabel
-         %34 = OpFunctionCall %void %subgroupMatrixStore_bfe106
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_bfe106
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/c28145.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/c28145.wgsl.expected.spvasm
index 5dd10a0..48f10e2 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/c28145.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/c28145.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 37
+; Bound: 47
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %32 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -48,6 +49,8 @@
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_int_uint_1024 = OpTypePointer StorageBuffer %_arr_int_uint_1024
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_c28145 = OpFunction %void None %11
          %12 = OpLabel
@@ -61,12 +64,22 @@
          %28 = OpLoad %uint %arg_1 None
          %29 = OpLoad %17 %arg_2 None
          %30 = OpLoad %uint %arg_4 None
-         %31 = OpAccessChain %_ptr_StorageBuffer_int %25 %28
-               OpCooperativeMatrixStoreKHR %31 %29 %uint_1 %30 NonPrivatePointer
+         %31 = OpExtInst %uint %32 UMax %30 %uint_8
+         %33 = OpIMul %uint %31 %uint_7
+         %35 = OpIAdd %uint %28 %33
+         %36 = OpIAdd %uint %35 %uint_8
+         %37 = OpULessThanEqual %bool %36 %uint_1024
+               OpSelectionMerge %39 None
+               OpBranchConditional %37 %40 %39
+         %40 = OpLabel
+         %41 = OpAccessChain %_ptr_StorageBuffer_int %25 %28
+               OpCooperativeMatrixStoreKHR %41 %29 %uint_1 %31 NonPrivatePointer
+               OpBranch %39
+         %39 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %35 = OpLabel
-         %36 = OpFunctionCall %void %subgroupMatrixStore_c28145
+         %45 = OpLabel
+         %46 = OpFunctionCall %void %subgroupMatrixStore_c28145
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.msl
index f64ebdc..4ed99b0 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.msl
@@ -25,21 +25,26 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.arg_0)[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.arg_0)[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 void compute_main_inner(uint tint_local_index, tint_module_vars_struct tint_module_vars) {
   {
-    uint v = 0u;
-    v = tint_local_index;
+    uint v_3 = 0u;
+    v_3 = tint_local_index;
     while(true) {
-      uint const v_1 = v;
-      if ((v_1 >= 1024u)) {
+      uint const v_4 = v_3;
+      if ((v_4 >= 1024u)) {
         break;
       }
-      (*tint_module_vars.arg_0)[v_1] = 0.0f;
+      (*tint_module_vars.arg_0)[v_4] = 0.0f;
       {
-        v = (v_1 + 1u);
+        v_3 = (v_4 + 1u);
       }
       continue;
     }
@@ -48,7 +53,7 @@
   subgroupMatrixStore_c4a7ce(tint_module_vars);
 }
 
-kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_2 [[threadgroup(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_2).tint_symbol)};
+kernel void compute_main(uint tint_local_index [[thread_index_in_threadgroup]], threadgroup tint_symbol_1* v_5 [[threadgroup(0)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.arg_0=(&(*v_5).tint_symbol)};
   compute_main_inner(tint_local_index, tint_module_vars);
 }
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.spvasm
index 9c09203..1b93a9d 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/c4a7ce.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -40,11 +41,12 @@
     %float_0 = OpConstant %float 0
          %16 = OpConstantComposite %17 %float_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_float = OpTypePointer Workgroup %float
-         %32 = OpTypeFunction %void %uint
+         %42 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_c4a7ce = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -56,41 +58,51 @@
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %17 %arg_2 None
          %26 = OpLoad %uint %arg_4 None
-         %27 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
-               OpCooperativeMatrixStoreKHR %27 %25 %uint_1 %26 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %32
-%tint_local_index = OpFunctionParameter %uint
-         %33 = OpLabel
-               OpBranch %34
-         %34 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-         %39 = OpPhi %uint %tint_local_index %34 %40 %36
-               OpLoopMerge %38 %36 None
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_Workgroup_float %arg_0 %24
+               OpCooperativeMatrixStoreKHR %37 %25 %uint_1 %27 NonPrivatePointer
                OpBranch %35
          %35 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %39 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
-         %48 = OpLabel
-               OpBranch %38
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %42
+%tint_local_index = OpFunctionParameter %uint
+         %43 = OpLabel
+               OpBranch %44
+         %44 = OpLabel
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_float %arg_0 %39
-               OpStore %49 %float_0 NonPrivatePointer
-               OpBranch %36
-         %36 = OpLabel
-         %40 = OpIAdd %uint %39 %uint_1
-               OpBranch %37
-         %38 = OpLabel
+         %49 = OpPhi %uint %tint_local_index %44 %50 %46
+               OpLoopMerge %48 %46 None
+               OpBranch %45
+         %45 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %49 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %48
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_float %arg_0 %49
+               OpStore %58 %float_0 NonPrivatePointer
+               OpBranch %46
+         %46 = OpLabel
+         %50 = OpIAdd %uint %49 %uint_1
+               OpBranch %47
+         %48 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_c4a7ce
+         %54 = OpFunctionCall %void %subgroupMatrixStore_c4a7ce
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/d55153.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/d55153.wgsl.expected.spvasm
index 97d5067..eafac3e 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/d55153.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/d55153.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 33
+; Bound: 44
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -42,6 +43,8 @@
          %13 = OpConstantComposite %14 %uint_0
 %_ptr_Function_14 = OpTypePointer Function %14
 %_ptr_StorageBuffer__runtimearr_uint = OpTypePointer StorageBuffer %_runtimearr_uint
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_d55153 = OpFunction %void None %8
           %9 = OpLabel
@@ -55,12 +58,23 @@
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %14 %arg_2 None
          %26 = OpLoad %uint %arg_4 None
-         %27 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
-               OpCooperativeMatrixStoreKHR %27 %25 %uint_1 %26 NonPrivatePointer
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpArrayLength %uint %sb_rw 0
+         %30 = OpIMul %uint %27 %uint_7
+         %32 = OpIAdd %uint %24 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %29
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_uint %22 %24
+               OpCooperativeMatrixStoreKHR %38 %25 %uint_1 %27 NonPrivatePointer
+               OpBranch %36
+         %36 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %31 = OpLabel
-         %32 = OpFunctionCall %void %subgroupMatrixStore_d55153
+         %42 = OpLabel
+         %43 = OpFunctionCall %void %subgroupMatrixStore_d55153
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.msl
index 8581c30..3ac21e1 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_float8x8 arg_2 = simdgroup_float8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_float8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.spvasm
index e5108ce..d58bcd9 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/eac52e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -47,6 +48,8 @@
          %16 = OpConstantComposite %17 %float_0
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_float_uint_1024 = OpTypePointer StorageBuffer %_arr_float_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %subgroupMatrixStore_eac52e = OpFunction %void None %11
          %12 = OpLabel
@@ -60,12 +63,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_float %25 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_float %25 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_eac52e
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_eac52e
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
index 47e317b..f7b4966 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/eae7d8.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 45
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -43,6 +44,8 @@
 %_ptr_Function_15 = OpTypePointer Function %15
 %_ptr_StorageBuffer__runtimearr_int = OpTypePointer StorageBuffer %_runtimearr_int
      %uint_0 = OpConstant %uint 0
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
 %subgroupMatrixStore_eae7d8 = OpFunction %void None %8
           %9 = OpLabel
@@ -56,12 +59,23 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %15 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpArrayLength %uint %sb_rw 0
+         %31 = OpIMul %uint %28 %uint_7
+         %33 = OpIAdd %uint %25 %31
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %30
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %39 = OpAccessChain %_ptr_StorageBuffer_int %22 %25
+               OpCooperativeMatrixStoreKHR %39 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %8
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_eae7d8
+         %43 = OpLabel
+         %44 = OpFunctionCall %void %subgroupMatrixStore_eae7d8
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/f71f4f.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/f71f4f.wgsl.expected.spvasm
index 4fb5aab..77bdc79 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/f71f4f.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/f71f4f.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 34
+; Bound: 44
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %29 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -45,6 +46,8 @@
          %15 = OpConstantComposite %16 %uint_0
 %_ptr_Function_16 = OpTypePointer Function %16
 %_ptr_StorageBuffer__arr_uint_uint_1024 = OpTypePointer StorageBuffer %_arr_uint_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
 %subgroupMatrixStore_f71f4f = OpFunction %void None %10
          %11 = OpLabel
@@ -58,12 +61,22 @@
          %25 = OpLoad %uint %arg_1 None
          %26 = OpLoad %16 %arg_2 None
          %27 = OpLoad %uint %arg_4 None
-         %28 = OpAccessChain %_ptr_StorageBuffer_uint %23 %25
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_1 %27 NonPrivatePointer
+         %28 = OpExtInst %uint %29 UMax %27 %uint_8
+         %30 = OpIMul %uint %28 %uint_7
+         %32 = OpIAdd %uint %25 %30
+         %33 = OpIAdd %uint %32 %uint_8
+         %34 = OpULessThanEqual %bool %33 %uint_1024
+               OpSelectionMerge %36 None
+               OpBranchConditional %34 %37 %36
+         %37 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_uint %23 %25
+               OpCooperativeMatrixStoreKHR %38 %26 %uint_1 %28 NonPrivatePointer
+               OpBranch %36
+         %36 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %10
-         %32 = OpLabel
-         %33 = OpFunctionCall %void %subgroupMatrixStore_f71f4f
+         %42 = OpLabel
+         %43 = OpFunctionCall %void %subgroupMatrixStore_f71f4f
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.msl b/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.msl
index f748734..c82ef29 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.msl
@@ -25,7 +25,12 @@
   uint arg_1 = 1u;
   simdgroup_half8x8 arg_2 = simdgroup_half8x8();
   uint arg_4 = 8u;
-  simdgroup_store(arg_2, (&(*tint_module_vars.sb_rw).arg_0[arg_1]), ulong(arg_4), ulong2(0ul), true);
+  uint const v = arg_1;
+  simdgroup_half8x8 const v_1 = arg_2;
+  uint const v_2 = max(arg_4, 8u);
+  if ((((v + (v_2 * 7u)) + 8u) <= 1024u)) {
+    simdgroup_store(v_1, (&(*tint_module_vars.sb_rw).arg_0[v]), ulong(v_2), ulong2(0ul), true);
+  }
 }
 
 kernel void compute_main(device SB_RW* sb_rw [[buffer(0)]]) {
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.spvasm
index 585d96f..09a3dac 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/fb7663.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 36
+; Bound: 46
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -12,6 +12,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %31 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main"
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -50,6 +51,8 @@
          %16 = OpConstantComposite %17 %half_0x0p_0
 %_ptr_Function_17 = OpTypePointer Function %17
 %_ptr_StorageBuffer__arr_half_uint_1024 = OpTypePointer StorageBuffer %_arr_half_uint_1024
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
 %subgroupMatrixStore_fb7663 = OpFunction %void None %11
          %12 = OpLabel
@@ -63,12 +66,22 @@
          %27 = OpLoad %uint %arg_1 None
          %28 = OpLoad %17 %arg_2 None
          %29 = OpLoad %uint %arg_4 None
-         %30 = OpAccessChain %_ptr_StorageBuffer_half %25 %27
-               OpCooperativeMatrixStoreKHR %30 %28 %uint_1 %29 NonPrivatePointer
+         %30 = OpExtInst %uint %31 UMax %29 %uint_8
+         %32 = OpIMul %uint %30 %uint_7
+         %34 = OpIAdd %uint %27 %32
+         %35 = OpIAdd %uint %34 %uint_8
+         %36 = OpULessThanEqual %bool %35 %uint_1024
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %39 %38
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_StorageBuffer_half %25 %27
+               OpCooperativeMatrixStoreKHR %40 %28 %uint_1 %30 NonPrivatePointer
+               OpBranch %38
+         %38 = OpLabel
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %34 = OpLabel
-         %35 = OpFunctionCall %void %subgroupMatrixStore_fb7663
+         %44 = OpLabel
+         %45 = OpFunctionCall %void %subgroupMatrixStore_fb7663
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/builtins/gen/var/subgroupMatrixStore/feca1e.wgsl.expected.spvasm b/test/tint/builtins/gen/var/subgroupMatrixStore/feca1e.wgsl.expected.spvasm
index 816c6a8..7de885b 100644
--- a/test/tint/builtins/gen/var/subgroupMatrixStore/feca1e.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/subgroupMatrixStore/feca1e.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 54
+; Bound: 63
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -9,6 +9,7 @@
                OpCapability CooperativeMatrixKHR
                OpExtension "SPV_KHR_vulkan_memory_model"
                OpExtension "SPV_KHR_cooperative_matrix"
+         %28 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical Vulkan
                OpEntryPoint GLCompute %compute_main "compute_main" %compute_main_local_invocation_index_Input
                OpExecutionMode %compute_main LocalSize 1 1 1
@@ -40,11 +41,12 @@
       %int_0 = OpConstant %int 0
          %16 = OpConstantComposite %17 %int_0
 %_ptr_Function_17 = OpTypePointer Function %17
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
 %_ptr_Workgroup_int = OpTypePointer Workgroup %int
-         %32 = OpTypeFunction %void %uint
+         %42 = OpTypeFunction %void %uint
      %uint_2 = OpConstant %uint 2
  %uint_24840 = OpConstant %uint 24840
-       %bool = OpTypeBool
 %subgroupMatrixStore_feca1e = OpFunction %void None %11
          %12 = OpLabel
       %arg_1 = OpVariable %_ptr_Function_uint Function
@@ -56,41 +58,51 @@
          %24 = OpLoad %uint %arg_1 None
          %25 = OpLoad %17 %arg_2 None
          %26 = OpLoad %uint %arg_4 None
-         %27 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
-               OpCooperativeMatrixStoreKHR %27 %25 %uint_1 %26 NonPrivatePointer
-               OpReturn
-               OpFunctionEnd
-%compute_main_inner = OpFunction %void None %32
-%tint_local_index = OpFunctionParameter %uint
-         %33 = OpLabel
-               OpBranch %34
-         %34 = OpLabel
-               OpBranch %37
-         %37 = OpLabel
-         %39 = OpPhi %uint %tint_local_index %34 %40 %36
-               OpLoopMerge %38 %36 None
+         %27 = OpExtInst %uint %28 UMax %26 %uint_8
+         %29 = OpIMul %uint %27 %uint_7
+         %31 = OpIAdd %uint %24 %29
+         %32 = OpIAdd %uint %31 %uint_8
+         %33 = OpULessThanEqual %bool %32 %uint_1024
+               OpSelectionMerge %35 None
+               OpBranchConditional %33 %36 %35
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_Workgroup_int %arg_0 %24
+               OpCooperativeMatrixStoreKHR %37 %25 %uint_1 %27 NonPrivatePointer
                OpBranch %35
          %35 = OpLabel
-         %45 = OpUGreaterThanEqual %bool %39 %uint_1024
-               OpSelectionMerge %47 None
-               OpBranchConditional %45 %48 %47
-         %48 = OpLabel
-               OpBranch %38
+               OpReturn
+               OpFunctionEnd
+%compute_main_inner = OpFunction %void None %42
+%tint_local_index = OpFunctionParameter %uint
+         %43 = OpLabel
+               OpBranch %44
+         %44 = OpLabel
+               OpBranch %47
          %47 = OpLabel
-         %49 = OpAccessChain %_ptr_Workgroup_int %arg_0 %39
-               OpStore %49 %int_0 NonPrivatePointer
-               OpBranch %36
-         %36 = OpLabel
-         %40 = OpIAdd %uint %39 %uint_1
-               OpBranch %37
-         %38 = OpLabel
+         %49 = OpPhi %uint %tint_local_index %44 %50 %46
+               OpLoopMerge %48 %46 None
+               OpBranch %45
+         %45 = OpLabel
+         %55 = OpUGreaterThanEqual %bool %49 %uint_1024
+               OpSelectionMerge %56 None
+               OpBranchConditional %55 %57 %56
+         %57 = OpLabel
+               OpBranch %48
+         %56 = OpLabel
+         %58 = OpAccessChain %_ptr_Workgroup_int %arg_0 %49
+               OpStore %58 %int_0 NonPrivatePointer
+               OpBranch %46
+         %46 = OpLabel
+         %50 = OpIAdd %uint %49 %uint_1
+               OpBranch %47
+         %48 = OpLabel
                OpControlBarrier %uint_2 %uint_2 %uint_24840
-         %44 = OpFunctionCall %void %subgroupMatrixStore_feca1e
+         %54 = OpFunctionCall %void %subgroupMatrixStore_feca1e
                OpReturn
                OpFunctionEnd
 %compute_main = OpFunction %void None %11
-         %51 = OpLabel
-         %52 = OpLoad %uint %compute_main_local_invocation_index_Input None
-         %53 = OpFunctionCall %void %compute_main_inner %52
+         %60 = OpLabel
+         %61 = OpLoad %uint %compute_main_local_invocation_index_Input None
+         %62 = OpFunctionCall %void %compute_main_inner %61
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.msl
index dd2b7ad..90c8f26 100644
--- a/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.msl
@@ -15,6 +15,7 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 struct S {
@@ -26,22 +27,52 @@
   S s;
 };
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
-  simdgroup_store(simdgroup_float8x8(), (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(tint_array<simdgroup_float8x8, 4>{}[1u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(tint_array<tint_array<simdgroup_float8x8, 4>, 4>{}[2u][3u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(S{}.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(S_Nested{}.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(simdgroup_float8x8(42.0f), (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_float8x8 const v_2 = simdgroup_float8x8(42.0f);
-  simdgroup_store(tint_array<simdgroup_float8x8, 2>{v_2, simdgroup_float8x8(100.0f)}[1u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_float8x8 const v_3 = simdgroup_float8x8(42.0f);
-  tint_array<simdgroup_float8x8, 2> const v_4 = tint_array<simdgroup_float8x8, 2>{v_3, simdgroup_float8x8(100.0f)};
-  simdgroup_float8x8 const v_5 = simdgroup_float8x8(-7.0f);
-  simdgroup_store(tint_array<tint_array<simdgroup_float8x8, 2>, 2>{v_4, tint_array<simdgroup_float8x8, 2>{v_5, simdgroup_float8x8(-42.0f)}}[1u][0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_float8x8 const v_6 = simdgroup_float8x8(42.0f);
-  simdgroup_store(S{.l=v_6, .r=simdgroup_float8x8(100.0f)}.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
+  simdgroup_float8x8 const v_2 = simdgroup_float8x8();
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_2, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_3 = tint_array<simdgroup_float8x8, 4>{}[1u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_3, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_4 = tint_array<tint_array<simdgroup_float8x8, 4>, 4>{}[2u][3u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_4, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_5 = S{}.l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_5, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_6 = S_Nested{}.s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_6, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
   simdgroup_float8x8 const v_7 = simdgroup_float8x8(42.0f);
-  simdgroup_store(S_Nested{.s=S{.l=v_7, .r=simdgroup_float8x8(100.0f)}}.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_7, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_8 = simdgroup_float8x8(42.0f);
+  simdgroup_float8x8 const v_9 = tint_array<simdgroup_float8x8, 2>{v_8, simdgroup_float8x8(100.0f)}[1u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_9, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_10 = simdgroup_float8x8(42.0f);
+  tint_array<simdgroup_float8x8, 2> const v_11 = tint_array<simdgroup_float8x8, 2>{v_10, simdgroup_float8x8(100.0f)};
+  simdgroup_float8x8 const v_12 = simdgroup_float8x8(-7.0f);
+  simdgroup_float8x8 const v_13 = tint_array<tint_array<simdgroup_float8x8, 2>, 2>{v_11, tint_array<simdgroup_float8x8, 2>{v_12, simdgroup_float8x8(-42.0f)}}[1u][0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_13, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_14 = simdgroup_float8x8(42.0f);
+  simdgroup_float8x8 const v_15 = S{.l=v_14, .r=simdgroup_float8x8(100.0f)}.l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_15, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_16 = simdgroup_float8x8(42.0f);
+  simdgroup_float8x8 const v_17 = S_Nested{.s=S{.l=v_16, .r=simdgroup_float8x8(100.0f)}}.s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_17, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
diff --git a/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.spvasm
index 666d6f6..a452711 100644
--- a/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/construct.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 95
+; Bound: 177
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -40,20 +40,21 @@
     %float_0 = OpConstant %float 0
          %10 = OpConstantComposite %11 %float_0
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
      %uint_4 = OpConstant %uint 4
 %_arr_11_uint_4 = OpTypeArray %11 %uint_4
-         %23 = OpConstantComposite %_arr_11_uint_4 %10 %10 %10 %10
+         %29 = OpConstantComposite %_arr_11_uint_4 %10 %10 %10 %10
 %_arr__arr_11_uint_4_uint_4 = OpTypeArray %_arr_11_uint_4 %uint_4
-         %30 = OpConstantComposite %_arr__arr_11_uint_4_uint_4 %23 %23 %23 %23
+         %41 = OpConstantComposite %_arr__arr_11_uint_4_uint_4 %29 %29 %29 %29
      %uint_1 = OpConstant %uint 1
-         %38 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_1
-          %S = OpTypeStruct %11 %38
-         %40 = OpConstantComposite %38 %float_0
-         %36 = OpConstantComposite %S %10 %40
+         %54 = OpTypeCooperativeMatrixKHR %float %uint_3 %uint_8 %uint_8 %uint_1
+          %S = OpTypeStruct %11 %54
+         %56 = OpConstantComposite %54 %float_0
+         %52 = OpConstantComposite %S %10 %56
    %S_Nested = OpTypeStruct %S
-         %45 = OpConstantComposite %S_Nested %36
+         %66 = OpConstantComposite %S_Nested %52
    %float_42 = OpConstant %float 42
   %float_100 = OpConstant %float 100
      %uint_2 = OpConstant %uint 2
@@ -61,63 +62,174 @@
    %float_n7 = OpConstant %float -7
   %float_n42 = OpConstant %float -42
 %_arr__arr_11_uint_2_uint_2 = OpTypeArray %_arr_11_uint_2 %uint_2
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %main = OpFunction %void None %8
           %9 = OpLabel
          %17 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %19 = OpAccessChain %_ptr_StorageBuffer_float %17 %uint_0
-               OpCooperativeMatrixStoreKHR %19 %10 %uint_0 %uint_64 NonPrivatePointer
-         %26 = OpCompositeExtract %11 %23 1
-         %27 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %28 = OpAccessChain %_ptr_StorageBuffer_float %27 %uint_0
-               OpCooperativeMatrixStoreKHR %28 %26 %uint_0 %uint_64 NonPrivatePointer
-         %32 = OpCompositeExtract %11 %30 2 3
+         %19 = OpArrayLength %uint %1 0
+         %20 = OpIMul %uint %uint_64 %uint_7
+         %23 = OpIAdd %uint %uint_0 %20
+         %24 = OpIAdd %uint %23 %uint_8
+         %25 = OpULessThanEqual %bool %24 %19
+               OpSelectionMerge %27 None
+               OpBranchConditional %25 %28 %27
+         %28 = OpLabel
+        %146 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %147 = OpAccessChain %_ptr_StorageBuffer_float %146 %uint_0
+               OpCooperativeMatrixStoreKHR %147 %10 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %27
+         %27 = OpLabel
+         %32 = OpCompositeExtract %11 %29 1
          %33 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %34 = OpAccessChain %_ptr_StorageBuffer_float %33 %uint_0
-               OpCooperativeMatrixStoreKHR %34 %32 %uint_0 %uint_64 NonPrivatePointer
-         %41 = OpCompositeExtract %11 %36 0
-         %42 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %43 = OpAccessChain %_ptr_StorageBuffer_float %42 %uint_0
-               OpCooperativeMatrixStoreKHR %43 %41 %uint_0 %uint_64 NonPrivatePointer
-         %47 = OpCompositeExtract %38 %45 0 1
-         %48 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %49 = OpAccessChain %_ptr_StorageBuffer_float %48 %uint_0
-               OpCooperativeMatrixStoreKHR %49 %47 %uint_0 %uint_64 NonPrivatePointer
-         %51 = OpCompositeConstruct %11 %float_42
-         %53 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %54 = OpAccessChain %_ptr_StorageBuffer_float %53 %uint_0
-               OpCooperativeMatrixStoreKHR %54 %51 %uint_0 %uint_64 NonPrivatePointer
-         %56 = OpCompositeConstruct %11 %float_42
-         %57 = OpCompositeConstruct %11 %float_100
-         %61 = OpCompositeConstruct %_arr_11_uint_2 %56 %57
-         %62 = OpCompositeExtract %11 %61 1
-         %63 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %64 = OpAccessChain %_ptr_StorageBuffer_float %63 %uint_0
-               OpCooperativeMatrixStoreKHR %64 %62 %uint_0 %uint_64 NonPrivatePointer
-         %66 = OpCompositeConstruct %11 %float_42
-         %67 = OpCompositeConstruct %11 %float_100
-         %68 = OpCompositeConstruct %_arr_11_uint_2 %66 %67
-         %69 = OpCompositeConstruct %11 %float_n7
-         %71 = OpCompositeConstruct %11 %float_n42
-         %73 = OpCompositeConstruct %_arr_11_uint_2 %69 %71
-         %75 = OpCompositeConstruct %_arr__arr_11_uint_2_uint_2 %68 %73
-         %76 = OpCompositeExtract %11 %75 1 0
-         %77 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %78 = OpAccessChain %_ptr_StorageBuffer_float %77 %uint_0
-               OpCooperativeMatrixStoreKHR %78 %76 %uint_0 %uint_64 NonPrivatePointer
-         %80 = OpCompositeConstruct %11 %float_42
-         %81 = OpCompositeConstruct %38 %float_100
-         %82 = OpCompositeConstruct %S %80 %81
-         %83 = OpCompositeExtract %11 %82 0
-         %84 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %85 = OpAccessChain %_ptr_StorageBuffer_float %84 %uint_0
-               OpCooperativeMatrixStoreKHR %85 %83 %uint_0 %uint_64 NonPrivatePointer
+         %34 = OpArrayLength %uint %1 0
+         %35 = OpIMul %uint %uint_64 %uint_7
+         %36 = OpIAdd %uint %uint_0 %35
+         %37 = OpIAdd %uint %36 %uint_8
+         %38 = OpULessThanEqual %bool %37 %34
+               OpSelectionMerge %39 None
+               OpBranchConditional %38 %40 %39
+         %40 = OpLabel
+        %150 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %151 = OpAccessChain %_ptr_StorageBuffer_float %150 %uint_0
+               OpCooperativeMatrixStoreKHR %151 %32 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %39
+         %39 = OpLabel
+         %43 = OpCompositeExtract %11 %41 2 3
+         %44 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %45 = OpArrayLength %uint %1 0
+         %46 = OpIMul %uint %uint_64 %uint_7
+         %47 = OpIAdd %uint %uint_0 %46
+         %48 = OpIAdd %uint %47 %uint_8
+         %49 = OpULessThanEqual %bool %48 %45
+               OpSelectionMerge %50 None
+               OpBranchConditional %49 %51 %50
+         %51 = OpLabel
+        %153 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %154 = OpAccessChain %_ptr_StorageBuffer_float %153 %uint_0
+               OpCooperativeMatrixStoreKHR %154 %43 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %50
+         %50 = OpLabel
+         %57 = OpCompositeExtract %11 %52 0
+         %58 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %59 = OpArrayLength %uint %1 0
+         %60 = OpIMul %uint %uint_64 %uint_7
+         %61 = OpIAdd %uint %uint_0 %60
+         %62 = OpIAdd %uint %61 %uint_8
+         %63 = OpULessThanEqual %bool %62 %59
+               OpSelectionMerge %64 None
+               OpBranchConditional %63 %65 %64
+         %65 = OpLabel
+        %156 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %157 = OpAccessChain %_ptr_StorageBuffer_float %156 %uint_0
+               OpCooperativeMatrixStoreKHR %157 %57 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %64
+         %64 = OpLabel
+         %68 = OpCompositeExtract %54 %66 0 1
+         %69 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %70 = OpArrayLength %uint %1 0
+         %71 = OpIMul %uint %uint_64 %uint_7
+         %72 = OpIAdd %uint %uint_0 %71
+         %73 = OpIAdd %uint %72 %uint_8
+         %74 = OpULessThanEqual %bool %73 %70
+               OpSelectionMerge %75 None
+               OpBranchConditional %74 %76 %75
+         %76 = OpLabel
+        %159 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %160 = OpAccessChain %_ptr_StorageBuffer_float %159 %uint_0
+               OpCooperativeMatrixStoreKHR %160 %68 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %75
+         %75 = OpLabel
+         %77 = OpCompositeConstruct %11 %float_42
+         %79 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %80 = OpArrayLength %uint %1 0
+         %81 = OpIMul %uint %uint_64 %uint_7
+         %82 = OpIAdd %uint %uint_0 %81
+         %83 = OpIAdd %uint %82 %uint_8
+         %84 = OpULessThanEqual %bool %83 %80
+               OpSelectionMerge %85 None
+               OpBranchConditional %84 %86 %85
+         %86 = OpLabel
+        %162 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %163 = OpAccessChain %_ptr_StorageBuffer_float %162 %uint_0
+               OpCooperativeMatrixStoreKHR %163 %77 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %85
+         %85 = OpLabel
          %87 = OpCompositeConstruct %11 %float_42
-         %88 = OpCompositeConstruct %38 %float_100
-         %89 = OpCompositeConstruct %S %87 %88
-         %90 = OpCompositeConstruct %S_Nested %89
-         %91 = OpCompositeExtract %38 %90 0 1
-         %92 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %93 = OpAccessChain %_ptr_StorageBuffer_float %92 %uint_0
-               OpCooperativeMatrixStoreKHR %93 %91 %uint_0 %uint_64 NonPrivatePointer
+         %88 = OpCompositeConstruct %11 %float_100
+         %92 = OpCompositeConstruct %_arr_11_uint_2 %87 %88
+         %93 = OpCompositeExtract %11 %92 1
+         %94 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %95 = OpArrayLength %uint %1 0
+         %96 = OpIMul %uint %uint_64 %uint_7
+         %97 = OpIAdd %uint %uint_0 %96
+         %98 = OpIAdd %uint %97 %uint_8
+         %99 = OpULessThanEqual %bool %98 %95
+               OpSelectionMerge %100 None
+               OpBranchConditional %99 %101 %100
+        %101 = OpLabel
+        %165 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %166 = OpAccessChain %_ptr_StorageBuffer_float %165 %uint_0
+               OpCooperativeMatrixStoreKHR %166 %93 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %100
+        %100 = OpLabel
+        %102 = OpCompositeConstruct %11 %float_42
+        %103 = OpCompositeConstruct %11 %float_100
+        %104 = OpCompositeConstruct %_arr_11_uint_2 %102 %103
+        %105 = OpCompositeConstruct %11 %float_n7
+        %107 = OpCompositeConstruct %11 %float_n42
+        %109 = OpCompositeConstruct %_arr_11_uint_2 %105 %107
+        %111 = OpCompositeConstruct %_arr__arr_11_uint_2_uint_2 %104 %109
+        %112 = OpCompositeExtract %11 %111 1 0
+        %113 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %114 = OpArrayLength %uint %1 0
+        %115 = OpIMul %uint %uint_64 %uint_7
+        %116 = OpIAdd %uint %uint_0 %115
+        %117 = OpIAdd %uint %116 %uint_8
+        %118 = OpULessThanEqual %bool %117 %114
+               OpSelectionMerge %119 None
+               OpBranchConditional %118 %120 %119
+        %120 = OpLabel
+        %168 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %169 = OpAccessChain %_ptr_StorageBuffer_float %168 %uint_0
+               OpCooperativeMatrixStoreKHR %169 %112 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %119
+        %119 = OpLabel
+        %121 = OpCompositeConstruct %11 %float_42
+        %122 = OpCompositeConstruct %54 %float_100
+        %123 = OpCompositeConstruct %S %121 %122
+        %124 = OpCompositeExtract %11 %123 0
+        %125 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %126 = OpArrayLength %uint %1 0
+        %127 = OpIMul %uint %uint_64 %uint_7
+        %128 = OpIAdd %uint %uint_0 %127
+        %129 = OpIAdd %uint %128 %uint_8
+        %130 = OpULessThanEqual %bool %129 %126
+               OpSelectionMerge %131 None
+               OpBranchConditional %130 %132 %131
+        %132 = OpLabel
+        %171 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %172 = OpAccessChain %_ptr_StorageBuffer_float %171 %uint_0
+               OpCooperativeMatrixStoreKHR %172 %124 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %131
+        %131 = OpLabel
+        %133 = OpCompositeConstruct %11 %float_42
+        %134 = OpCompositeConstruct %54 %float_100
+        %135 = OpCompositeConstruct %S %133 %134
+        %136 = OpCompositeConstruct %S_Nested %135
+        %137 = OpCompositeExtract %54 %136 0 1
+        %138 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %139 = OpArrayLength %uint %1 0
+        %140 = OpIMul %uint %uint_64 %uint_7
+        %141 = OpIAdd %uint %uint_0 %140
+        %142 = OpIAdd %uint %141 %uint_8
+        %143 = OpULessThanEqual %bool %142 %139
+               OpSelectionMerge %144 None
+               OpBranchConditional %143 %145 %144
+        %145 = OpLabel
+        %174 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %175 = OpAccessChain %_ptr_StorageBuffer_float %174 %uint_0
+               OpCooperativeMatrixStoreKHR %175 %137 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %144
+        %144 = OpLabel
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.msl
index 3a0f56d..1ec7cae 100644
--- a/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.msl
@@ -24,18 +24,29 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void foo(simdgroup_float8x8 m, tint_array<simdgroup_float8x8, 4> m_array, tint_array<tint_array<simdgroup_float8x8, 4>, 4> m_nested_array, S m_struct, S_Nested m_nested_struct, tint_module_vars_struct tint_module_vars) {
-  simdgroup_store(m, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_array[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_array[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_struct.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_struct.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_array[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_nested_array[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_struct.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_nested_struct.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
+kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_float8x8 m = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
   tint_array<simdgroup_float8x8, 4> m_array = {};
   tint_array<tint_array<simdgroup_float8x8, 4>, 4> m_nested_array = {};
diff --git a/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.spvasm
index 226b1ef..d15ac22 100644
--- a/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/function_param.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 75
+; Bound: 117
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -56,21 +56,23 @@
    %S_Nested = OpTypeStruct %S
          %25 = OpTypeFunction %void %8 %_arr_8_uint_4 %_arr__arr_8_uint_4_uint_4 %S %S_Nested
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
-         %50 = OpTypeFunction %void
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+         %92 = OpTypeFunction %void
 %_ptr_Function_8 = OpTypePointer Function %8
     %float_0 = OpConstant %float 0
-         %54 = OpConstantComposite %8 %float_0
+         %96 = OpConstantComposite %8 %float_0
 %_ptr_Function__arr_8_uint_4 = OpTypePointer Function %_arr_8_uint_4
-         %58 = OpConstantComposite %_arr_8_uint_4 %54 %54 %54 %54
+        %100 = OpConstantComposite %_arr_8_uint_4 %96 %96 %96 %96
 %_ptr_Function__arr__arr_8_uint_4_uint_4 = OpTypePointer Function %_arr__arr_8_uint_4_uint_4
-         %61 = OpConstantComposite %_arr__arr_8_uint_4_uint_4 %58 %58 %58 %58
+        %103 = OpConstantComposite %_arr__arr_8_uint_4_uint_4 %100 %100 %100 %100
 %_ptr_Function_S = OpTypePointer Function %S
-         %65 = OpConstantComposite %20 %float_0
-         %64 = OpConstantComposite %S %54 %65
+        %107 = OpConstantComposite %20 %float_0
+        %106 = OpConstantComposite %S %96 %107
 %_ptr_Function_S_Nested = OpTypePointer Function %S_Nested
-         %68 = OpConstantComposite %S_Nested %64
+        %110 = OpConstantComposite %S_Nested %106
         %foo = OpFunction %void None %25
           %m = OpFunctionParameter %8
     %m_array = OpFunctionParameter %_arr_8_uint_4
@@ -79,38 +81,93 @@
 %m_nested_struct = OpFunctionParameter %S_Nested
          %26 = OpLabel
          %27 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %29 = OpAccessChain %_ptr_StorageBuffer_float %27 %uint_0
-               OpCooperativeMatrixStoreKHR %29 %m %uint_0 %uint_64 NonPrivatePointer
-         %33 = OpCompositeExtract %8 %m_array 0
-         %34 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float %34 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %33 %uint_0 %uint_64 NonPrivatePointer
-         %37 = OpCompositeExtract %8 %m_nested_array 1 2
-         %38 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %39 = OpAccessChain %_ptr_StorageBuffer_float %38 %uint_0
-               OpCooperativeMatrixStoreKHR %39 %37 %uint_0 %uint_64 NonPrivatePointer
-         %41 = OpCompositeExtract %8 %m_struct 0
-         %42 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %43 = OpAccessChain %_ptr_StorageBuffer_float %42 %uint_0
-               OpCooperativeMatrixStoreKHR %43 %41 %uint_0 %uint_64 NonPrivatePointer
-         %45 = OpCompositeExtract %20 %m_nested_struct 0 1
-         %46 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %47 = OpAccessChain %_ptr_StorageBuffer_float %46 %uint_0
-               OpCooperativeMatrixStoreKHR %47 %45 %uint_0 %uint_64 NonPrivatePointer
+         %29 = OpArrayLength %uint %1 0
+         %30 = OpIMul %uint %uint_64 %uint_7
+         %33 = OpIAdd %uint %uint_0 %30
+         %34 = OpIAdd %uint %33 %uint_8
+         %35 = OpULessThanEqual %bool %34 %29
+               OpSelectionMerge %37 None
+               OpBranchConditional %35 %38 %37
+         %38 = OpLabel
+         %75 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %76 = OpAccessChain %_ptr_StorageBuffer_float %75 %uint_0
+               OpCooperativeMatrixStoreKHR %76 %m %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpCompositeExtract %8 %m_array 0
+         %40 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %41 = OpArrayLength %uint %1 0
+         %42 = OpIMul %uint %uint_64 %uint_7
+         %43 = OpIAdd %uint %uint_0 %42
+         %44 = OpIAdd %uint %43 %uint_8
+         %45 = OpULessThanEqual %bool %44 %41
+               OpSelectionMerge %46 None
+               OpBranchConditional %45 %47 %46
+         %47 = OpLabel
+         %79 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %80 = OpAccessChain %_ptr_StorageBuffer_float %79 %uint_0
+               OpCooperativeMatrixStoreKHR %80 %39 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %46
+         %46 = OpLabel
+         %48 = OpCompositeExtract %8 %m_nested_array 1 2
+         %49 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %50 = OpArrayLength %uint %1 0
+         %51 = OpIMul %uint %uint_64 %uint_7
+         %52 = OpIAdd %uint %uint_0 %51
+         %53 = OpIAdd %uint %52 %uint_8
+         %54 = OpULessThanEqual %bool %53 %50
+               OpSelectionMerge %55 None
+               OpBranchConditional %54 %56 %55
+         %56 = OpLabel
+         %82 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %83 = OpAccessChain %_ptr_StorageBuffer_float %82 %uint_0
+               OpCooperativeMatrixStoreKHR %83 %48 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %55
+         %55 = OpLabel
+         %57 = OpCompositeExtract %8 %m_struct 0
+         %58 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %59 = OpArrayLength %uint %1 0
+         %60 = OpIMul %uint %uint_64 %uint_7
+         %61 = OpIAdd %uint %uint_0 %60
+         %62 = OpIAdd %uint %61 %uint_8
+         %63 = OpULessThanEqual %bool %62 %59
+               OpSelectionMerge %64 None
+               OpBranchConditional %63 %65 %64
+         %65 = OpLabel
+         %85 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %86 = OpAccessChain %_ptr_StorageBuffer_float %85 %uint_0
+               OpCooperativeMatrixStoreKHR %86 %57 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %64
+         %64 = OpLabel
+         %66 = OpCompositeExtract %20 %m_nested_struct 0 1
+         %67 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %68 = OpArrayLength %uint %1 0
+         %69 = OpIMul %uint %uint_64 %uint_7
+         %70 = OpIAdd %uint %uint_0 %69
+         %71 = OpIAdd %uint %70 %uint_8
+         %72 = OpULessThanEqual %bool %71 %68
+               OpSelectionMerge %73 None
+               OpBranchConditional %72 %74 %73
+         %74 = OpLabel
+         %88 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %89 = OpAccessChain %_ptr_StorageBuffer_float %88 %uint_0
+               OpCooperativeMatrixStoreKHR %89 %66 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %73
+         %73 = OpLabel
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %50
-         %51 = OpLabel
-        %m_0 = OpVariable %_ptr_Function_8 Function %54
-  %m_array_0 = OpVariable %_ptr_Function__arr_8_uint_4 Function %58
-%m_nested_array_0 = OpVariable %_ptr_Function__arr__arr_8_uint_4_uint_4 Function %61
- %m_struct_0 = OpVariable %_ptr_Function_S Function %64
-%m_nested_struct_0 = OpVariable %_ptr_Function_S_Nested Function %68
-         %69 = OpLoad %8 %m_0 None
-         %70 = OpLoad %_arr_8_uint_4 %m_array_0 None
-         %71 = OpLoad %_arr__arr_8_uint_4_uint_4 %m_nested_array_0 None
-         %72 = OpLoad %S %m_struct_0 None
-         %73 = OpLoad %S_Nested %m_nested_struct_0 None
-         %74 = OpFunctionCall %void %foo %69 %70 %71 %72 %73
+       %main = OpFunction %void None %92
+         %93 = OpLabel
+        %m_0 = OpVariable %_ptr_Function_8 Function %96
+  %m_array_0 = OpVariable %_ptr_Function__arr_8_uint_4 Function %100
+%m_nested_array_0 = OpVariable %_ptr_Function__arr__arr_8_uint_4_uint_4 Function %103
+ %m_struct_0 = OpVariable %_ptr_Function_S Function %106
+%m_nested_struct_0 = OpVariable %_ptr_Function_S_Nested Function %110
+        %111 = OpLoad %8 %m_0 None
+        %112 = OpLoad %_arr_8_uint_4 %m_array_0 None
+        %113 = OpLoad %_arr__arr_8_uint_4_uint_4 %m_nested_array_0 None
+        %114 = OpLoad %S %m_struct_0 None
+        %115 = OpLoad %S_Nested %m_nested_struct_0 None
+        %116 = OpFunctionCall %void %foo %111 %112 %113 %114 %115
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.msl
index d0722b4..58ab434 100644
--- a/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.msl
@@ -24,6 +24,7 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 simdgroup_float8x8 make_matrix() {
@@ -51,11 +52,26 @@
   return m_nested_struct;
 }
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
-  simdgroup_store(make_matrix(), (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_array()[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_nested_array()[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_struct().l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_nested_struct().s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
+  simdgroup_float8x8 const v_2 = make_matrix();
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_2, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_3 = make_array()[0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_3, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_4 = make_nested_array()[1u][2u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_4, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_5 = make_struct().l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_5, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_6 = make_nested_struct().s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_6, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
diff --git a/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.spvasm
index c667e55..1b5b675 100644
--- a/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/function_return_value.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 86
+; Bound: 128
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -72,8 +72,10 @@
        %void = OpTypeVoid
          %57 = OpTypeFunction %void
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
 %make_matrix = OpFunction %7 None %12
          %13 = OpLabel
           %m = OpVariable %_ptr_Function_7 Function %16
@@ -108,27 +110,82 @@
          %58 = OpLabel
          %59 = OpFunctionCall %7 %make_matrix
          %60 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %62 = OpAccessChain %_ptr_StorageBuffer_float %60 %uint_0
-               OpCooperativeMatrixStoreKHR %62 %59 %uint_0 %uint_64 NonPrivatePointer
-         %66 = OpFunctionCall %_arr_7_uint_4 %make_array
-         %67 = OpCompositeExtract %7 %66 0
-         %68 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %69 = OpAccessChain %_ptr_StorageBuffer_float %68 %uint_0
-               OpCooperativeMatrixStoreKHR %69 %67 %uint_0 %uint_64 NonPrivatePointer
-         %71 = OpFunctionCall %_arr__arr_7_uint_4_uint_4 %make_nested_array
-         %72 = OpCompositeExtract %7 %71 1 2
-         %73 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %74 = OpAccessChain %_ptr_StorageBuffer_float %73 %uint_0
-               OpCooperativeMatrixStoreKHR %74 %72 %uint_0 %uint_64 NonPrivatePointer
-         %76 = OpFunctionCall %S %make_struct
-         %77 = OpCompositeExtract %7 %76 0
-         %78 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %79 = OpAccessChain %_ptr_StorageBuffer_float %78 %uint_0
-               OpCooperativeMatrixStoreKHR %79 %77 %uint_0 %uint_64 NonPrivatePointer
-         %81 = OpFunctionCall %S_Nested %make_nested_struct
-         %82 = OpCompositeExtract %38 %81 0 1
-         %83 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %84 = OpAccessChain %_ptr_StorageBuffer_float %83 %uint_0
-               OpCooperativeMatrixStoreKHR %84 %82 %uint_0 %uint_64 NonPrivatePointer
+         %62 = OpArrayLength %uint %1 0
+         %63 = OpIMul %uint %uint_64 %uint_7
+         %66 = OpIAdd %uint %uint_0 %63
+         %67 = OpIAdd %uint %66 %uint_8
+         %68 = OpULessThanEqual %bool %67 %62
+               OpSelectionMerge %70 None
+               OpBranchConditional %68 %71 %70
+         %71 = OpLabel
+        %112 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %113 = OpAccessChain %_ptr_StorageBuffer_float %112 %uint_0
+               OpCooperativeMatrixStoreKHR %113 %59 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %70
+         %70 = OpLabel
+         %72 = OpFunctionCall %_arr_7_uint_4 %make_array
+         %73 = OpCompositeExtract %7 %72 0
+         %74 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %75 = OpArrayLength %uint %1 0
+         %76 = OpIMul %uint %uint_64 %uint_7
+         %77 = OpIAdd %uint %uint_0 %76
+         %78 = OpIAdd %uint %77 %uint_8
+         %79 = OpULessThanEqual %bool %78 %75
+               OpSelectionMerge %80 None
+               OpBranchConditional %79 %81 %80
+         %81 = OpLabel
+        %116 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %117 = OpAccessChain %_ptr_StorageBuffer_float %116 %uint_0
+               OpCooperativeMatrixStoreKHR %117 %73 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpFunctionCall %_arr__arr_7_uint_4_uint_4 %make_nested_array
+         %83 = OpCompositeExtract %7 %82 1 2
+         %84 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %85 = OpArrayLength %uint %1 0
+         %86 = OpIMul %uint %uint_64 %uint_7
+         %87 = OpIAdd %uint %uint_0 %86
+         %88 = OpIAdd %uint %87 %uint_8
+         %89 = OpULessThanEqual %bool %88 %85
+               OpSelectionMerge %90 None
+               OpBranchConditional %89 %91 %90
+         %91 = OpLabel
+        %119 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %120 = OpAccessChain %_ptr_StorageBuffer_float %119 %uint_0
+               OpCooperativeMatrixStoreKHR %120 %83 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpFunctionCall %S %make_struct
+         %93 = OpCompositeExtract %7 %92 0
+         %94 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %95 = OpArrayLength %uint %1 0
+         %96 = OpIMul %uint %uint_64 %uint_7
+         %97 = OpIAdd %uint %uint_0 %96
+         %98 = OpIAdd %uint %97 %uint_8
+         %99 = OpULessThanEqual %bool %98 %95
+               OpSelectionMerge %100 None
+               OpBranchConditional %99 %101 %100
+        %101 = OpLabel
+        %122 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %123 = OpAccessChain %_ptr_StorageBuffer_float %122 %uint_0
+               OpCooperativeMatrixStoreKHR %123 %93 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %100
+        %100 = OpLabel
+        %102 = OpFunctionCall %S_Nested %make_nested_struct
+        %103 = OpCompositeExtract %38 %102 0 1
+        %104 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %105 = OpArrayLength %uint %1 0
+        %106 = OpIMul %uint %uint_64 %uint_7
+        %107 = OpIAdd %uint %uint_0 %106
+        %108 = OpIAdd %uint %107 %uint_8
+        %109 = OpULessThanEqual %bool %108 %105
+               OpSelectionMerge %110 None
+               OpBranchConditional %109 %111 %110
+        %111 = OpLabel
+        %125 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %126 = OpAccessChain %_ptr_StorageBuffer_float %125 %uint_0
+               OpCooperativeMatrixStoreKHR %126 %103 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %110
+        %110 = OpLabel
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.msl
index c69f2f6..4455fa8 100644
--- a/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.msl
@@ -94,15 +94,30 @@
 
 void main_inner(uint idx, tint_module_vars_struct tint_module_vars) {
   (*tint_module_vars.non_uniform_condition) = ((*tint_module_vars.tint_member)[min(idx, (((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u) - 1u))] == 0.0f);
-  simdgroup_store(make_matrix(tint_module_vars), (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_array(tint_module_vars)[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_nested_array(tint_module_vars)[1u][0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_struct(tint_module_vars).l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(make_nested_struct(tint_module_vars).s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  simdgroup_float8x8 const v_12 = make_matrix(tint_module_vars);
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_12, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_13 = make_array(tint_module_vars)[0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_13, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_14 = make_nested_array(tint_module_vars)[1u][0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_14, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_15 = make_struct(tint_module_vars).l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_15, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_16 = make_nested_struct(tint_module_vars).s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_16, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
 
-kernel void v_12(uint idx [[thread_index_in_threadgroup]], device tint_array<float, 1>* v_13 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+kernel void v_17(uint idx [[thread_index_in_threadgroup]], device tint_array<float, 1>* v_18 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
   thread bool non_uniform_condition = false;
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_13, .non_uniform_condition=(&non_uniform_condition), .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_18, .non_uniform_condition=(&non_uniform_condition), .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   main_inner(idx, tint_module_vars);
 }
diff --git a/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.spvasm
index e3a99b2..f773c19 100644
--- a/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/function_return_value_non_uniform.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 173
+; Bound: 214
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -97,7 +97,8 @@
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
 %_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
-        %169 = OpTypeFunction %void
+     %uint_7 = OpConstant %uint 7
+        %210 = OpTypeFunction %void
 %make_matrix = OpFunction %14 None %18
          %19 = OpLabel
 %return_value = OpVariable %_ptr_Function_14 Function %22
@@ -229,33 +230,88 @@
                OpStore %non_uniform_condition %142 None
         %143 = OpFunctionCall %14 %make_matrix
         %144 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-        %145 = OpAccessChain %_ptr_StorageBuffer_float %144 %uint_0
-               OpCooperativeMatrixStoreKHR %145 %143 %uint_0 %uint_64 NonPrivatePointer
-        %148 = OpFunctionCall %_arr_14_uint_2 %make_array
-        %149 = OpCompositeExtract %14 %148 0
-        %150 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-        %151 = OpAccessChain %_ptr_StorageBuffer_float %150 %uint_0
-               OpCooperativeMatrixStoreKHR %151 %149 %uint_0 %uint_64 NonPrivatePointer
-        %153 = OpFunctionCall %_arr__arr_14_uint_2_uint_2 %make_nested_array
-        %154 = OpCompositeExtract %14 %153 1 0
-        %155 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-        %156 = OpAccessChain %_ptr_StorageBuffer_float %155 %uint_0
-               OpCooperativeMatrixStoreKHR %156 %154 %uint_0 %uint_64 NonPrivatePointer
-        %158 = OpFunctionCall %S %make_struct
-        %159 = OpCompositeExtract %14 %158 0
-        %160 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-        %161 = OpAccessChain %_ptr_StorageBuffer_float %160 %uint_0
-               OpCooperativeMatrixStoreKHR %161 %159 %uint_0 %uint_64 NonPrivatePointer
-        %163 = OpFunctionCall %S_Nested %make_nested_struct
-        %164 = OpCompositeExtract %89 %163 0 1
-        %165 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-        %166 = OpAccessChain %_ptr_StorageBuffer_float %165 %uint_0
-               OpCooperativeMatrixStoreKHR %166 %164 %uint_0 %uint_64 NonPrivatePointer
+        %145 = OpArrayLength %uint %1 0
+        %146 = OpIMul %uint %uint_64 %uint_7
+        %149 = OpIAdd %uint %uint_0 %146
+        %150 = OpIAdd %uint %149 %uint_8
+        %151 = OpULessThanEqual %bool %150 %145
+               OpSelectionMerge %152 None
+               OpBranchConditional %151 %153 %152
+        %153 = OpLabel
+        %194 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %195 = OpAccessChain %_ptr_StorageBuffer_float %194 %uint_0
+               OpCooperativeMatrixStoreKHR %195 %143 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %152
+        %152 = OpLabel
+        %154 = OpFunctionCall %_arr_14_uint_2 %make_array
+        %155 = OpCompositeExtract %14 %154 0
+        %156 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %157 = OpArrayLength %uint %1 0
+        %158 = OpIMul %uint %uint_64 %uint_7
+        %159 = OpIAdd %uint %uint_0 %158
+        %160 = OpIAdd %uint %159 %uint_8
+        %161 = OpULessThanEqual %bool %160 %157
+               OpSelectionMerge %162 None
+               OpBranchConditional %161 %163 %162
+        %163 = OpLabel
+        %197 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %198 = OpAccessChain %_ptr_StorageBuffer_float %197 %uint_0
+               OpCooperativeMatrixStoreKHR %198 %155 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %162
+        %162 = OpLabel
+        %164 = OpFunctionCall %_arr__arr_14_uint_2_uint_2 %make_nested_array
+        %165 = OpCompositeExtract %14 %164 1 0
+        %166 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %167 = OpArrayLength %uint %1 0
+        %168 = OpIMul %uint %uint_64 %uint_7
+        %169 = OpIAdd %uint %uint_0 %168
+        %170 = OpIAdd %uint %169 %uint_8
+        %171 = OpULessThanEqual %bool %170 %167
+               OpSelectionMerge %172 None
+               OpBranchConditional %171 %173 %172
+        %173 = OpLabel
+        %200 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %201 = OpAccessChain %_ptr_StorageBuffer_float %200 %uint_0
+               OpCooperativeMatrixStoreKHR %201 %165 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %172
+        %172 = OpLabel
+        %174 = OpFunctionCall %S %make_struct
+        %175 = OpCompositeExtract %14 %174 0
+        %176 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %177 = OpArrayLength %uint %1 0
+        %178 = OpIMul %uint %uint_64 %uint_7
+        %179 = OpIAdd %uint %uint_0 %178
+        %180 = OpIAdd %uint %179 %uint_8
+        %181 = OpULessThanEqual %bool %180 %177
+               OpSelectionMerge %182 None
+               OpBranchConditional %181 %183 %182
+        %183 = OpLabel
+        %203 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %204 = OpAccessChain %_ptr_StorageBuffer_float %203 %uint_0
+               OpCooperativeMatrixStoreKHR %204 %175 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %182
+        %182 = OpLabel
+        %184 = OpFunctionCall %S_Nested %make_nested_struct
+        %185 = OpCompositeExtract %89 %184 0 1
+        %186 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %187 = OpArrayLength %uint %1 0
+        %188 = OpIMul %uint %uint_64 %uint_7
+        %189 = OpIAdd %uint %uint_0 %188
+        %190 = OpIAdd %uint %189 %uint_8
+        %191 = OpULessThanEqual %bool %190 %187
+               OpSelectionMerge %192 None
+               OpBranchConditional %191 %193 %192
+        %193 = OpLabel
+        %206 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %207 = OpAccessChain %_ptr_StorageBuffer_float %206 %uint_0
+               OpCooperativeMatrixStoreKHR %207 %185 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %192
+        %192 = OpLabel
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %169
-        %170 = OpLabel
-        %171 = OpLoad %uint %main_local_invocation_index_Input None
-        %172 = OpFunctionCall %void %main_inner %171
+       %main = OpFunction %void None %210
+        %211 = OpLabel
+        %212 = OpLoad %uint %main_local_invocation_index_Input None
+        %213 = OpFunctionCall %void %main_inner %212
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.msl
index 949cc01..033231a 100644
--- a/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.msl
@@ -15,6 +15,7 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 struct S {
@@ -26,16 +27,31 @@
   S s;
 };
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
+kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_float8x8 m = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
   tint_array<simdgroup_float8x8, 4> m_array = {};
   tint_array<tint_array<simdgroup_float8x8, 4>, 4> m_nested_array = {};
   S m_struct = {};
   S_Nested m_nested_struct = {};
-  simdgroup_store(m, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_array[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_array[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_struct.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_struct.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  simdgroup_float8x8 const v_2 = m;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_2, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_3 = m_array[0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_3, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_4 = m_nested_array[1u][2u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_4, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_5 = m_struct.l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_5, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_6 = m_nested_struct.s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_6, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
diff --git a/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.spvasm
index 787ba05..97814d5 100644
--- a/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/function_var.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 68
+; Bound: 110
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -62,10 +62,12 @@
 %_ptr_Function_S_Nested = OpTypePointer Function %S_Nested
          %38 = OpConstantComposite %S_Nested %33
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
      %uint_2 = OpConstant %uint 2
 %_ptr_Function_31 = OpTypePointer Function %31
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %main = OpFunction %void None %8
           %9 = OpLabel
           %m = OpVariable %_ptr_Function_12 Function %17
@@ -75,27 +77,82 @@
 %m_nested_struct = OpVariable %_ptr_Function_S_Nested Function %38
          %39 = OpLoad %12 %m None
          %40 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %42 = OpAccessChain %_ptr_StorageBuffer_float %40 %uint_0
-               OpCooperativeMatrixStoreKHR %42 %39 %uint_0 %uint_64 NonPrivatePointer
-         %46 = OpAccessChain %_ptr_Function_12 %m_array %uint_0
-         %47 = OpLoad %12 %46 None
-         %48 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %49 = OpAccessChain %_ptr_StorageBuffer_float %48 %uint_0
-               OpCooperativeMatrixStoreKHR %49 %47 %uint_0 %uint_64 NonPrivatePointer
-         %51 = OpAccessChain %_ptr_Function_12 %m_nested_array %uint_1 %uint_2
-         %53 = OpLoad %12 %51 None
+         %42 = OpArrayLength %uint %1 0
+         %43 = OpIMul %uint %uint_64 %uint_7
+         %46 = OpIAdd %uint %uint_0 %43
+         %47 = OpIAdd %uint %46 %uint_8
+         %48 = OpULessThanEqual %bool %47 %42
+               OpSelectionMerge %50 None
+               OpBranchConditional %48 %51 %50
+         %51 = OpLabel
+         %94 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %95 = OpAccessChain %_ptr_StorageBuffer_float %94 %uint_0
+               OpCooperativeMatrixStoreKHR %95 %39 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %50
+         %50 = OpLabel
+         %52 = OpAccessChain %_ptr_Function_12 %m_array %uint_0
+         %53 = OpLoad %12 %52 None
          %54 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %55 = OpAccessChain %_ptr_StorageBuffer_float %54 %uint_0
-               OpCooperativeMatrixStoreKHR %55 %53 %uint_0 %uint_64 NonPrivatePointer
-         %57 = OpAccessChain %_ptr_Function_12 %m_struct %uint_0
-         %58 = OpLoad %12 %57 None
-         %59 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %60 = OpAccessChain %_ptr_StorageBuffer_float %59 %uint_0
-               OpCooperativeMatrixStoreKHR %60 %58 %uint_0 %uint_64 NonPrivatePointer
-         %62 = OpAccessChain %_ptr_Function_31 %m_nested_struct %uint_0 %uint_1
-         %64 = OpLoad %31 %62 None
+         %55 = OpArrayLength %uint %1 0
+         %56 = OpIMul %uint %uint_64 %uint_7
+         %57 = OpIAdd %uint %uint_0 %56
+         %58 = OpIAdd %uint %57 %uint_8
+         %59 = OpULessThanEqual %bool %58 %55
+               OpSelectionMerge %60 None
+               OpBranchConditional %59 %61 %60
+         %61 = OpLabel
+         %98 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %99 = OpAccessChain %_ptr_StorageBuffer_float %98 %uint_0
+               OpCooperativeMatrixStoreKHR %99 %53 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpAccessChain %_ptr_Function_12 %m_nested_array %uint_1 %uint_2
+         %64 = OpLoad %12 %62 None
          %65 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %66 = OpAccessChain %_ptr_StorageBuffer_float %65 %uint_0
-               OpCooperativeMatrixStoreKHR %66 %64 %uint_0 %uint_64 NonPrivatePointer
+         %66 = OpArrayLength %uint %1 0
+         %67 = OpIMul %uint %uint_64 %uint_7
+         %68 = OpIAdd %uint %uint_0 %67
+         %69 = OpIAdd %uint %68 %uint_8
+         %70 = OpULessThanEqual %bool %69 %66
+               OpSelectionMerge %71 None
+               OpBranchConditional %70 %72 %71
+         %72 = OpLabel
+        %101 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %102 = OpAccessChain %_ptr_StorageBuffer_float %101 %uint_0
+               OpCooperativeMatrixStoreKHR %102 %64 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %71
+         %71 = OpLabel
+         %73 = OpAccessChain %_ptr_Function_12 %m_struct %uint_0
+         %74 = OpLoad %12 %73 None
+         %75 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %76 = OpArrayLength %uint %1 0
+         %77 = OpIMul %uint %uint_64 %uint_7
+         %78 = OpIAdd %uint %uint_0 %77
+         %79 = OpIAdd %uint %78 %uint_8
+         %80 = OpULessThanEqual %bool %79 %76
+               OpSelectionMerge %81 None
+               OpBranchConditional %80 %82 %81
+         %82 = OpLabel
+        %104 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %105 = OpAccessChain %_ptr_StorageBuffer_float %104 %uint_0
+               OpCooperativeMatrixStoreKHR %105 %74 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %81
+         %81 = OpLabel
+         %83 = OpAccessChain %_ptr_Function_31 %m_nested_struct %uint_0 %uint_1
+         %85 = OpLoad %31 %83 None
+         %86 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %87 = OpArrayLength %uint %1 0
+         %88 = OpIMul %uint %uint_64 %uint_7
+         %89 = OpIAdd %uint %uint_0 %88
+         %90 = OpIAdd %uint %89 %uint_8
+         %91 = OpULessThanEqual %bool %90 %87
+               OpSelectionMerge %92 None
+               OpBranchConditional %91 %93 %92
+         %93 = OpLabel
+        %107 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %108 = OpAccessChain %_ptr_StorageBuffer_float %107 %uint_0
+               OpCooperativeMatrixStoreKHR %108 %85 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %92
+         %92 = OpLabel
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/let.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/let.wgsl.expected.msl
index 6d27de6..3ee3d97 100644
--- a/test/tint/extensions/subgroup_matrix/let.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/let.wgsl.expected.msl
@@ -15,6 +15,7 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 struct S {
@@ -26,8 +27,8 @@
   S s;
 };
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
+kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_float8x8 m = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
   tint_array<simdgroup_float8x8, 4> m_array = {};
   tint_array<tint_array<simdgroup_float8x8, 4>, 4> m_nested_array = {};
@@ -38,9 +39,19 @@
   tint_array<tint_array<simdgroup_float8x8, 4>, 4> const m_nested_array_let = m_nested_array;
   S const m_struct_let = m_struct;
   S_Nested const m_nested_struct_let = m_nested_struct;
-  simdgroup_store(m_let, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_array_let[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_array_let[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_struct_let.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store(m_nested_struct_let.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_let, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_array_let[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_nested_array_let[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_struct_let.l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(m_nested_struct_let.s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
diff --git a/test/tint/extensions/subgroup_matrix/let.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/let.wgsl.expected.spvasm
index 9963b62..f41527d 100644
--- a/test/tint/extensions/subgroup_matrix/let.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/let.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 66
+; Bound: 108
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -67,8 +67,10 @@
 %_ptr_Function_S_Nested = OpTypePointer Function %S_Nested
          %38 = OpConstantComposite %S_Nested %33
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
        %main = OpFunction %void None %8
           %9 = OpLabel
           %m = OpVariable %_ptr_Function_12 Function %17
@@ -82,23 +84,78 @@
 %m_struct_let = OpLoad %S %m_struct None
 %m_nested_struct_let = OpLoad %S_Nested %m_nested_struct None
          %44 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %46 = OpAccessChain %_ptr_StorageBuffer_float %44 %uint_0
-               OpCooperativeMatrixStoreKHR %46 %m_let %uint_0 %uint_64 NonPrivatePointer
-         %50 = OpCompositeExtract %12 %m_array_let 0
-         %51 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %52 = OpAccessChain %_ptr_StorageBuffer_float %51 %uint_0
-               OpCooperativeMatrixStoreKHR %52 %50 %uint_0 %uint_64 NonPrivatePointer
-         %54 = OpCompositeExtract %12 %m_nested_array_let 1 2
-         %55 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %56 = OpAccessChain %_ptr_StorageBuffer_float %55 %uint_0
-               OpCooperativeMatrixStoreKHR %56 %54 %uint_0 %uint_64 NonPrivatePointer
-         %58 = OpCompositeExtract %12 %m_struct_let 0
-         %59 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %60 = OpAccessChain %_ptr_StorageBuffer_float %59 %uint_0
-               OpCooperativeMatrixStoreKHR %60 %58 %uint_0 %uint_64 NonPrivatePointer
-         %62 = OpCompositeExtract %31 %m_nested_struct_let 0 1
-         %63 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %64 = OpAccessChain %_ptr_StorageBuffer_float %63 %uint_0
-               OpCooperativeMatrixStoreKHR %64 %62 %uint_0 %uint_64 NonPrivatePointer
+         %46 = OpArrayLength %uint %1 0
+         %47 = OpIMul %uint %uint_64 %uint_7
+         %50 = OpIAdd %uint %uint_0 %47
+         %51 = OpIAdd %uint %50 %uint_8
+         %52 = OpULessThanEqual %bool %51 %46
+               OpSelectionMerge %54 None
+               OpBranchConditional %52 %55 %54
+         %55 = OpLabel
+         %92 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %93 = OpAccessChain %_ptr_StorageBuffer_float %92 %uint_0
+               OpCooperativeMatrixStoreKHR %93 %m_let %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %54
+         %54 = OpLabel
+         %56 = OpCompositeExtract %12 %m_array_let 0
+         %57 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %58 = OpArrayLength %uint %1 0
+         %59 = OpIMul %uint %uint_64 %uint_7
+         %60 = OpIAdd %uint %uint_0 %59
+         %61 = OpIAdd %uint %60 %uint_8
+         %62 = OpULessThanEqual %bool %61 %58
+               OpSelectionMerge %63 None
+               OpBranchConditional %62 %64 %63
+         %64 = OpLabel
+         %96 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %97 = OpAccessChain %_ptr_StorageBuffer_float %96 %uint_0
+               OpCooperativeMatrixStoreKHR %97 %56 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %63
+         %63 = OpLabel
+         %65 = OpCompositeExtract %12 %m_nested_array_let 1 2
+         %66 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %67 = OpArrayLength %uint %1 0
+         %68 = OpIMul %uint %uint_64 %uint_7
+         %69 = OpIAdd %uint %uint_0 %68
+         %70 = OpIAdd %uint %69 %uint_8
+         %71 = OpULessThanEqual %bool %70 %67
+               OpSelectionMerge %72 None
+               OpBranchConditional %71 %73 %72
+         %73 = OpLabel
+         %99 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %100 = OpAccessChain %_ptr_StorageBuffer_float %99 %uint_0
+               OpCooperativeMatrixStoreKHR %100 %65 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %72
+         %72 = OpLabel
+         %74 = OpCompositeExtract %12 %m_struct_let 0
+         %75 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %76 = OpArrayLength %uint %1 0
+         %77 = OpIMul %uint %uint_64 %uint_7
+         %78 = OpIAdd %uint %uint_0 %77
+         %79 = OpIAdd %uint %78 %uint_8
+         %80 = OpULessThanEqual %bool %79 %76
+               OpSelectionMerge %81 None
+               OpBranchConditional %80 %82 %81
+         %82 = OpLabel
+        %102 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %103 = OpAccessChain %_ptr_StorageBuffer_float %102 %uint_0
+               OpCooperativeMatrixStoreKHR %103 %74 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %81
+         %81 = OpLabel
+         %83 = OpCompositeExtract %31 %m_nested_struct_let 0 1
+         %84 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %85 = OpArrayLength %uint %1 0
+         %86 = OpIMul %uint %uint_64 %uint_7
+         %87 = OpIAdd %uint %uint_0 %86
+         %88 = OpIAdd %uint %87 %uint_8
+         %89 = OpULessThanEqual %bool %88 %85
+               OpSelectionMerge %90 None
+               OpBranchConditional %89 %91 %90
+         %91 = OpLabel
+        %105 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %106 = OpAccessChain %_ptr_StorageBuffer_float %105 %uint_0
+               OpCooperativeMatrixStoreKHR %106 %83 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %90
+         %90 = OpLabel
                OpReturn
                OpFunctionEnd
diff --git a/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.msl b/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.msl
index 7176f7f..56911b8 100644
--- a/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.msl
+++ b/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.msl
@@ -24,18 +24,34 @@
 
 struct tint_module_vars_struct {
   device tint_array<float, 1>* tint_member;
+  const constant tint_array<uint4, 1>* tint_storage_buffer_sizes;
 };
 
 void foo(thread simdgroup_float8x8* const m, thread tint_array<simdgroup_float8x8, 4>* const m_array, thread tint_array<tint_array<simdgroup_float8x8, 4>, 4>* const m_nested_array, thread S* const m_struct, thread S_Nested* const m_nested_struct, tint_module_vars_struct tint_module_vars) {
-  simdgroup_store((*m), (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store((*m_array)[0u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store((*m_nested_array)[1u][2u], (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store((*m_struct).l, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
-  simdgroup_store((*m_nested_struct).s.r, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  simdgroup_float8x8 const v = (*m);
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_1 = (*m_array)[0u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_1, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_2 = (*m_nested_array)[1u][2u];
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_2, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_3 = (*m_struct).l;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_3, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
+  simdgroup_float8x8 const v_4 = (*m_nested_struct).s.r;
+  if ((((0u + (64u * 7u)) + 8u) <= ((*tint_module_vars.tint_storage_buffer_sizes)[0u].x / 4u))) {
+    simdgroup_store(v_4, (&(*tint_module_vars.tint_member)[0u]), ulong(64u), ulong2(0ul), false);
+  }
 }
 
-kernel void v(device tint_array<float, 1>* v_1 [[buffer(0)]]) {
-  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_1};
+kernel void v_5(device tint_array<float, 1>* v_6 [[buffer(0)]], const constant tint_array<uint4, 1>* tint_storage_buffer_sizes [[buffer(30)]]) {
+  tint_module_vars_struct const tint_module_vars = tint_module_vars_struct{.tint_member=v_6, .tint_storage_buffer_sizes=tint_storage_buffer_sizes};
   simdgroup_float8x8 m = make_filled_simdgroup_matrix<float, 8, 8>(0.0f);
   tint_array<simdgroup_float8x8, 4> m_array = {};
   tint_array<tint_array<simdgroup_float8x8, 4>, 4> m_nested_array = {};
diff --git a/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.spvasm b/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.spvasm
index 36d1d2e..6de6ec6 100644
--- a/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.spvasm
+++ b/test/tint/extensions/subgroup_matrix/pointers.wgsl.expected.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 1
-; Bound: 77
+; Bound: 119
 ; Schema: 0
                OpCapability Shader
                OpCapability VulkanMemoryModel
@@ -61,18 +61,20 @@
 %_ptr_Function_S_Nested = OpTypePointer Function %S_Nested
          %30 = OpTypeFunction %void %_ptr_Function_9 %_ptr_Function__arr_9_uint_4 %_ptr_Function__arr__arr_9_uint_4_uint_4 %_ptr_Function_S %_ptr_Function_S_Nested
 %_ptr_StorageBuffer__runtimearr_float = OpTypePointer StorageBuffer %_runtimearr_float
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
     %uint_64 = OpConstant %uint 64
+     %uint_7 = OpConstant %uint 7
+       %bool = OpTypeBool
      %uint_2 = OpConstant %uint 2
 %_ptr_Function_24 = OpTypePointer Function %24
-         %62 = OpTypeFunction %void
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+        %104 = OpTypeFunction %void
     %float_0 = OpConstant %float 0
-         %65 = OpConstantComposite %9 %float_0
-         %68 = OpConstantComposite %_arr_9_uint_4 %65 %65 %65 %65
-         %70 = OpConstantComposite %_arr__arr_9_uint_4_uint_4 %68 %68 %68 %68
-         %73 = OpConstantComposite %24 %float_0
-         %72 = OpConstantComposite %S %65 %73
-         %75 = OpConstantComposite %S_Nested %72
+        %107 = OpConstantComposite %9 %float_0
+        %110 = OpConstantComposite %_arr_9_uint_4 %107 %107 %107 %107
+        %112 = OpConstantComposite %_arr__arr_9_uint_4_uint_4 %110 %110 %110 %110
+        %115 = OpConstantComposite %24 %float_0
+        %114 = OpConstantComposite %S %107 %115
+        %117 = OpConstantComposite %S_Nested %114
         %foo = OpFunction %void None %30
      %m_root = OpFunctionParameter %_ptr_Function_9
 %m_array_root = OpFunctionParameter %_ptr_Function__arr_9_uint_4
@@ -82,37 +84,92 @@
          %31 = OpLabel
          %32 = OpLoad %9 %m_root None
          %33 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %35 = OpAccessChain %_ptr_StorageBuffer_float %33 %uint_0
-               OpCooperativeMatrixStoreKHR %35 %32 %uint_0 %uint_64 NonPrivatePointer
-         %39 = OpAccessChain %_ptr_Function_9 %m_array_root %uint_0
-         %40 = OpLoad %9 %39 None
-         %41 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %42 = OpAccessChain %_ptr_StorageBuffer_float %41 %uint_0
-               OpCooperativeMatrixStoreKHR %42 %40 %uint_0 %uint_64 NonPrivatePointer
-         %44 = OpAccessChain %_ptr_Function_9 %m_nested_array_root %uint_1 %uint_2
-         %46 = OpLoad %9 %44 None
+         %35 = OpArrayLength %uint %1 0
+         %36 = OpIMul %uint %uint_64 %uint_7
+         %39 = OpIAdd %uint %uint_0 %36
+         %40 = OpIAdd %uint %39 %uint_8
+         %41 = OpULessThanEqual %bool %40 %35
+               OpSelectionMerge %43 None
+               OpBranchConditional %41 %44 %43
+         %44 = OpLabel
+         %87 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %88 = OpAccessChain %_ptr_StorageBuffer_float %87 %uint_0
+               OpCooperativeMatrixStoreKHR %88 %32 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpAccessChain %_ptr_Function_9 %m_array_root %uint_0
+         %46 = OpLoad %9 %45 None
          %47 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %48 = OpAccessChain %_ptr_StorageBuffer_float %47 %uint_0
-               OpCooperativeMatrixStoreKHR %48 %46 %uint_0 %uint_64 NonPrivatePointer
-         %50 = OpAccessChain %_ptr_Function_9 %m_struct_root %uint_0
-         %51 = OpLoad %9 %50 None
-         %52 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %53 = OpAccessChain %_ptr_StorageBuffer_float %52 %uint_0
-               OpCooperativeMatrixStoreKHR %53 %51 %uint_0 %uint_64 NonPrivatePointer
-         %55 = OpAccessChain %_ptr_Function_24 %m_nested_struct_root %uint_0 %uint_1
-         %57 = OpLoad %24 %55 None
+         %48 = OpArrayLength %uint %1 0
+         %49 = OpIMul %uint %uint_64 %uint_7
+         %50 = OpIAdd %uint %uint_0 %49
+         %51 = OpIAdd %uint %50 %uint_8
+         %52 = OpULessThanEqual %bool %51 %48
+               OpSelectionMerge %53 None
+               OpBranchConditional %52 %54 %53
+         %54 = OpLabel
+         %91 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %92 = OpAccessChain %_ptr_StorageBuffer_float %91 %uint_0
+               OpCooperativeMatrixStoreKHR %92 %46 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %53
+         %53 = OpLabel
+         %55 = OpAccessChain %_ptr_Function_9 %m_nested_array_root %uint_1 %uint_2
+         %57 = OpLoad %9 %55 None
          %58 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
-         %59 = OpAccessChain %_ptr_StorageBuffer_float %58 %uint_0
-               OpCooperativeMatrixStoreKHR %59 %57 %uint_0 %uint_64 NonPrivatePointer
+         %59 = OpArrayLength %uint %1 0
+         %60 = OpIMul %uint %uint_64 %uint_7
+         %61 = OpIAdd %uint %uint_0 %60
+         %62 = OpIAdd %uint %61 %uint_8
+         %63 = OpULessThanEqual %bool %62 %59
+               OpSelectionMerge %64 None
+               OpBranchConditional %63 %65 %64
+         %65 = OpLabel
+         %94 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %95 = OpAccessChain %_ptr_StorageBuffer_float %94 %uint_0
+               OpCooperativeMatrixStoreKHR %95 %57 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %64
+         %64 = OpLabel
+         %66 = OpAccessChain %_ptr_Function_9 %m_struct_root %uint_0
+         %67 = OpLoad %9 %66 None
+         %68 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %69 = OpArrayLength %uint %1 0
+         %70 = OpIMul %uint %uint_64 %uint_7
+         %71 = OpIAdd %uint %uint_0 %70
+         %72 = OpIAdd %uint %71 %uint_8
+         %73 = OpULessThanEqual %bool %72 %69
+               OpSelectionMerge %74 None
+               OpBranchConditional %73 %75 %74
+         %75 = OpLabel
+         %97 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %98 = OpAccessChain %_ptr_StorageBuffer_float %97 %uint_0
+               OpCooperativeMatrixStoreKHR %98 %67 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %74
+         %74 = OpLabel
+         %76 = OpAccessChain %_ptr_Function_24 %m_nested_struct_root %uint_0 %uint_1
+         %78 = OpLoad %24 %76 None
+         %79 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+         %80 = OpArrayLength %uint %1 0
+         %81 = OpIMul %uint %uint_64 %uint_7
+         %82 = OpIAdd %uint %uint_0 %81
+         %83 = OpIAdd %uint %82 %uint_8
+         %84 = OpULessThanEqual %bool %83 %80
+               OpSelectionMerge %85 None
+               OpBranchConditional %84 %86 %85
+         %86 = OpLabel
+        %100 = OpAccessChain %_ptr_StorageBuffer__runtimearr_float %1 %uint_0
+        %101 = OpAccessChain %_ptr_StorageBuffer_float %100 %uint_0
+               OpCooperativeMatrixStoreKHR %101 %78 %uint_0 %uint_64 NonPrivatePointer
+               OpBranch %85
+         %85 = OpLabel
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %62
-         %63 = OpLabel
-          %m = OpVariable %_ptr_Function_9 Function %65
-    %m_array = OpVariable %_ptr_Function__arr_9_uint_4 Function %68
-%m_nested_array = OpVariable %_ptr_Function__arr__arr_9_uint_4_uint_4 Function %70
-   %m_struct = OpVariable %_ptr_Function_S Function %72
-%m_nested_struct = OpVariable %_ptr_Function_S_Nested Function %75
-         %76 = OpFunctionCall %void %foo %m %m_array %m_nested_array %m_struct %m_nested_struct
+       %main = OpFunction %void None %104
+        %105 = OpLabel
+          %m = OpVariable %_ptr_Function_9 Function %107
+    %m_array = OpVariable %_ptr_Function__arr_9_uint_4 Function %110
+%m_nested_array = OpVariable %_ptr_Function__arr__arr_9_uint_4_uint_4 Function %112
+   %m_struct = OpVariable %_ptr_Function_S Function %114
+%m_nested_struct = OpVariable %_ptr_Function_S_Nested Function %117
+        %118 = OpFunctionCall %void %foo %m %m_array %m_nested_array %m_struct %m_nested_struct
                OpReturn
                OpFunctionEnd