tint: Add and use new Std140 transform

This transform breaks up matNx2<f32> matrices used in uniform buffers
into column vectors, which fixes std140 layout rules.

Used by the SPIR-V and GLSL backends.

Re-enable tests that were disabled for these cases.

Bug: tint:1632
Change-Id: I596d016582b4189a0b413d762b3e7eabd3504b22
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/100907
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Dan Sinclair <dsinclair@chromium.org>
Commit-Queue: Ben Clayton <bclayton@chromium.org>
diff --git a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
index 9fa3af9..b75c9ba 100644
--- a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
+++ b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
@@ -137,9 +137,6 @@
 };
 
 TEST_P(ComputeLayoutMemoryBufferTests, Fields) {
-    // TODO(tint:1632) Re-enable after the bug is fixed for Vulkan.
-    DAWN_SUPPRESS_TEST_IF(IsVulkan());
-
     // Sentinel value markers codes used to check that the start and end of
     // structures are correctly aligned. Each of these codes are distinct and
     // are not likely to be confused with data.
diff --git a/src/tint/BUILD.gn b/src/tint/BUILD.gn
index 61bdea6..c1e84d1 100644
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn
@@ -533,6 +533,8 @@
     "transform/single_entry_point.h",
     "transform/spirv_atomic.cc",
     "transform/spirv_atomic.h",
+    "transform/std140.cc",
+    "transform/std140.h",
     "transform/substitute_override.cc",
     "transform/substitute_override.h",
     "transform/transform.cc",
@@ -1213,6 +1215,7 @@
       "transform/simplify_pointers_test.cc",
       "transform/single_entry_point_test.cc",
       "transform/spirv_atomic_test.cc",
+      "transform/std140_test.cc",
       "transform/substitute_override_test.cc",
       "transform/test_helper.h",
       "transform/transform_test.cc",
diff --git a/src/tint/CMakeLists.txt b/src/tint/CMakeLists.txt
index 62809ab..e8f875b 100644
--- a/src/tint/CMakeLists.txt
+++ b/src/tint/CMakeLists.txt
@@ -445,6 +445,8 @@
   transform/single_entry_point.h
   transform/spirv_atomic.cc
   transform/spirv_atomic.h
+  transform/std140.cc
+  transform/std140.h
   transform/substitute_override.cc
   transform/substitute_override.h
   transform/transform.cc
@@ -1128,6 +1130,7 @@
       transform/simplify_pointers_test.cc
       transform/single_entry_point_test.cc
       transform/spirv_atomic_test.cc
+      transform/std140_test.cc
       transform/substitute_override_test.cc
       transform/test_helper.h
       transform/unshadow_test.cc
diff --git a/src/tint/transform/std140.cc b/src/tint/transform/std140.cc
new file mode 100644
index 0000000..57da4da
--- /dev/null
+++ b/src/tint/transform/std140.cc
@@ -0,0 +1,950 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <variant>
+
+#include "src/tint/program_builder.h"
+#include "src/tint/sem/index_accessor_expression.h"
+#include "src/tint/sem/member_accessor_expression.h"
+#include "src/tint/sem/module.h"
+#include "src/tint/sem/struct.h"
+#include "src/tint/sem/variable.h"
+#include "src/tint/utils/hashmap.h"
+#include "src/tint/utils/transform.h"
+
+TINT_INSTANTIATE_TYPEINFO(tint::transform::Std140);
+
+using namespace tint::number_suffixes;  // NOLINT
+
+namespace {
+
+/// DynamicIndex is used by Std140::State::AccessIndex to indicate a runtime-expression index
+struct DynamicIndex {
+    size_t slot;  // The index of the expression in Std140::State::AccessChain::dynamic_indices
+};
+
+/// Inequality operator for DynamicIndex
+bool operator!=(const DynamicIndex& a, const DynamicIndex& b) {
+    return a.slot != b.slot;
+}
+
+}  // namespace
+
+namespace tint::utils {
+
+/// Hasher specialization for DynamicIndex
+template <>
+struct Hasher<DynamicIndex> {
+    /// The hash function for the DynamicIndex
+    /// @param d the DynamicIndex to hash
+    /// @return the hash for the given DynamicIndex
+    uint64_t operator()(const DynamicIndex& d) const { return utils::Hash(d.slot); }
+};
+
+}  // namespace tint::utils
+
+namespace tint::transform {
+
+/// The PIMPL state for the Std140 transform
+struct Std140::State {
+    /// Constructor
+    /// @param c the CloneContext
+    explicit State(CloneContext& c) : ctx(c) {}
+
+    /// Runs the transform
+    void Run() {
+        // Begin by creating forked structures for any struct that is used as a uniform buffer, that
+        // either directly or transitively contains a matrix that needs splitting for std140 layout.
+        ForkStructs();
+
+        // Next, replace all the uniform variables to use the forked types.
+        ReplaceUniformVarTypes();
+
+        // Finally, replace all expression chains that used the authored types with those that
+        // correctly use the forked types.
+        ctx.ReplaceAll([&](const ast::Expression* expr) -> const ast::Expression* {
+            if (auto access = AccessChainFor(expr)) {
+                if (!access->std140_mat_idx.has_value()) {
+                    // loading a std140 type, which is not a whole or partial decomposed matrix
+                    return LoadWithConvert(access.value());
+                }
+                if (!access->IsMatrixSubset() ||  // loading a whole matrix
+                    std::holds_alternative<DynamicIndex>(
+                        access->indices[*access->std140_mat_idx + 1])) {
+                    // Whole object or matrix is loaded, or the matrix column is indexed with a
+                    // non-constant index. Build a helper function to load the expression chain.
+                    return LoadMatrixWithFn(access.value());
+                }
+                // Matrix column is statically indexed. Can be emitted as an inline expression.
+                return LoadSubMatrixInline(access.value());
+            }
+            // Expression isn't an access to a std140-layout uniform buffer.
+            // Just clone.
+            return nullptr;
+        });
+
+        ctx.Clone();
+    }
+
+    /// @returns true if this transform should be run for the given program
+    /// @param program the program to inspect
+    static bool ShouldRun(const Program* program) {
+        for (auto* ty : program->Types()) {
+            if (auto* str = ty->As<sem::Struct>()) {
+                if (str->UsedAs(ast::StorageClass::kUniform)) {
+                    for (auto* member : str->Members()) {
+                        if (auto* mat = member->Type()->As<sem::Matrix>()) {
+                            if (MatrixNeedsDecomposing(mat)) {
+                                return true;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+  private:
+    /// Swizzle describes a vector swizzle
+    using Swizzle = utils::Vector<uint32_t, 4>;
+
+    /// AccessIndex describes a single access in an access chain.
+    /// The access is one of:
+    /// u32          - a static member index on a struct, static array index, static matrix column
+    ///                index, static vector element index.
+    /// DynamicIndex - a runtime-expression index on an array, matrix column selection, or vector
+    ///                element index.
+    /// Swizzle      - a static vector swizzle.
+    using AccessIndex = std::variant<u32, DynamicIndex, Swizzle>;
+
+    /// A vector of AccessIndex.
+    using AccessIndices = utils::Vector<AccessIndex, 8>;
+
+    /// A key used to cache load functions for an access chain.
+    struct LoadFnKey {
+        /// The root uniform buffer variable for the access chain.
+        const sem::GlobalVariable* var;
+
+        /// The chain of accesses indices.
+        AccessIndices indices;
+
+        /// Hash function for LoadFnKey.
+        struct Hasher {
+            /// @param fn the LoadFnKey to hash
+            /// @return the hash for the given LoadFnKey
+            uint64_t operator()(const LoadFnKey& fn) const {
+                return utils::Hash(fn.var, fn.indices);
+            }
+        };
+
+        /// Equality operator
+        bool operator==(const LoadFnKey& other) const {
+            return var == other.var && indices == other.indices;
+        }
+    };
+
+    /// The clone context
+    CloneContext& ctx;
+    /// Alias to the semantic info in ctx.src
+    const sem::Info& sem = ctx.src->Sem();
+    /// Alias to the symbols in ctx.src
+    const SymbolTable& sym = ctx.src->Symbols();
+    /// Alias to the ctx.dst program builder
+    ProgramBuilder& b = *ctx.dst;
+
+    /// Map of load function signature, to the generated function
+    utils::Hashmap<LoadFnKey, Symbol, 8, LoadFnKey::Hasher> load_fns;
+
+    /// Map of std140-forked type to converter function name
+    utils::Hashmap<const sem::Type*, Symbol, 8> conv_fns;
+
+    // Uniform variables that have been modified to use a std140 type
+    utils::Hashset<const sem::Variable*, 8> std140_uniforms;
+
+    // Map of original structure to 'std140' forked structure
+    utils::Hashmap<const sem::Struct*, Symbol, 8> std140_structs;
+
+    // Map of structure member in ctx.src of a matrix type, to list of decomposed column
+    // members in ctx.dst.
+    utils::Hashmap<const sem::StructMember*, utils::Vector<const ast::StructMember*, 4>, 8>
+        std140_mats;
+
+    /// AccessChain describes a chain of access expressions to uniform buffer variable.
+    struct AccessChain {
+        /// The uniform buffer variable.
+        const sem::GlobalVariable* var;
+        /// The chain of access indices, starting with the first access on #var.
+        AccessIndices indices;
+        /// The runtime-evaluated expressions. This vector is indexed by the DynamicIndex::slot
+        utils::Vector<const sem::Expression*, 8> dynamic_indices;
+        /// The type of the std140-decomposed matrix being accessed.
+        /// May be nullptr if the chain does not pass through a std140-decomposed matrix.
+        const sem::Matrix* std140_mat_ty = nullptr;
+        /// The index in #indices of the access that resolves to the std140-decomposed matrix.
+        /// May hold no value if the chain does not pass through a std140-decomposed matrix.
+        std::optional<size_t> std140_mat_idx;
+
+        /// @returns true if the access chain is to part of (not the whole) std140-decomposed matrix
+        bool IsMatrixSubset() const {
+            return std140_mat_idx.has_value() && (std140_mat_idx.value() + 1 != indices.Length());
+        }
+    };
+
+    /// @returns true if the given matrix needs decomposing to column vectors for std140 layout.
+    /// TODO(crbug.com/tint/1502): This may need adjusting for `f16` matrices.
+    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() == 8; }
+
+    /// ForkStructs walks the structures in dependency order, forking structures that are used as
+    /// uniform buffers which (transitively) use matrices that need std140 decomposition to column
+    /// vectors.
+    /// Populates the #std140_mats map and #std140_structs set.
+    void ForkStructs() {
+        // For each module scope declaration...
+        for (auto* global : ctx.src->Sem().Module()->DependencyOrderedDeclarations()) {
+            // Check to see if this is a structure used by a uniform buffer...
+            auto* str = sem.Get<sem::Struct>(global);
+            if (str && str->UsedAs(ast::StorageClass::kUniform)) {
+                // Should this uniform buffer be forked for std140 usage?
+                bool fork_std140 = false;
+                utils::Vector<const ast::StructMember*, 8> members;
+                for (auto* member : str->Members()) {
+                    if (auto* mat = member->Type()->As<sem::Matrix>()) {
+                        // Is this member a matrix that needs decomposition for std140-layout?
+                        if (MatrixNeedsDecomposing(mat)) {
+                            // Structure member of matrix type needs decomposition.
+                            fork_std140 = true;
+                            // Replace the member with column vectors.
+                            const auto num_columns = mat->columns();
+                            const auto name_prefix = PrefixForUniqueNames(
+                                str->Declaration(), member->Name(), num_columns);
+                            // Build a struct member for each column of the matrix
+                            utils::Vector<const ast::StructMember*, 4> column_members;
+                            for (uint32_t i = 0; i < num_columns; i++) {
+                                utils::Vector<const ast::Attribute*, 1> attributes;
+                                if ((i == 0) && mat->Align() != member->Align()) {
+                                    // The matrix was @align() annotated with a larger alignment
+                                    // than the natural alignment for the matrix. This extra padding
+                                    // needs to be applied to the first column vector.
+                                    attributes.Push(b.MemberAlign(u32(member->Align())));
+                                }
+                                if ((i == num_columns - 1) && mat->Size() != member->Size()) {
+                                    // The matrix was @size() annotated with a larger size than the
+                                    // natural size for the matrix. This extra padding needs to be
+                                    // applied to the last column vector.
+                                    attributes.Push(
+                                        b.MemberSize(member->Size() - mat->ColumnType()->Size() *
+                                                                          (num_columns - 1)));
+                                }
+
+                                // Build the member
+                                const auto col_name = name_prefix + std::to_string(i);
+                                const auto* col_ty = CreateASTTypeFor(ctx, mat->ColumnType());
+                                const auto* col_member =
+                                    ctx.dst->Member(col_name, col_ty, std::move(attributes));
+                                // Add the member to the forked structure
+                                members.Push(col_member);
+                                // Record the member for std140_mats
+                                column_members.Push(col_member);
+                            }
+                            std140_mats.Add(member, std::move(column_members));
+                            continue;
+                        }
+                    }
+
+                    // Is the member part of a struct that has been forked for std140-layout?
+                    if (auto* std140_ty = Std140Type(member->Type())) {
+                        // Yes - use this type for the forked structure member.
+                        fork_std140 = true;
+                        auto attrs = ctx.Clone(member->Declaration()->attributes);
+                        members.Push(
+                            b.Member(sym.NameFor(member->Name()), std140_ty, std::move(attrs)));
+                        continue;
+                    }
+
+                    // Nothing special about this member.
+                    // Push the member in src to members without first cloning. We'll replace this
+                    // with a cloned member once we know whether we need to fork the structure or
+                    // not.
+                    members.Push(member->Declaration());
+                }
+
+                // Did any of the members require forking the structure?
+                if (fork_std140) {
+                    // Clone any members that have not already been cloned.
+                    for (auto& member : members) {
+                        if (member->program_id == ctx.src->ID()) {
+                            member = ctx.Clone(member);
+                        }
+                    }
+                    // Create a new forked structure, and insert it just under the original
+                    // structure.
+                    auto name = b.Symbols().New(sym.NameFor(str->Name()) + "_std140");
+                    auto* std140 = b.create<ast::Struct>(name, std::move(members),
+                                                         ctx.Clone(str->Declaration()->attributes));
+                    ctx.InsertAfter(ctx.src->AST().GlobalDeclarations(), global, std140);
+                    std140_structs.Add(str, name);
+                }
+            }
+        }
+    }
+
+    /// Walks the global variables, replacing the type of those that are a uniform buffer with a
+    /// type that has been forked for std140-layout.
+    /// Populates the #std140_uniforms set.
+    void ReplaceUniformVarTypes() {
+        for (auto* global : ctx.src->AST().GlobalVariables()) {
+            if (auto* var = global->As<ast::Var>()) {
+                if (var->declared_storage_class == ast::StorageClass::kUniform) {
+                    auto* v = sem.Get(var);
+                    if (auto* std140_ty = Std140Type(v->Type()->UnwrapRef())) {
+                        ctx.Replace(global->type, std140_ty);
+                        std140_uniforms.Add(v);
+                    }
+                }
+            }
+        }
+    }
+
+    /// @returns a unique structure member prefix for the splitting of a matrix member into @p count
+    /// column vector members. The new members must be suffixed with a zero-based index ranging from
+    /// `[0..count)`.
+    /// @param str the structure that will hold the uniquely named member.
+    /// @param unsuffixed the common name prefix to use for the new members.
+    /// @param count the number of members that need to be created.
+    std::string PrefixForUniqueNames(const ast::Struct* str,
+                                     Symbol unsuffixed,
+                                     uint32_t count) const {
+        auto prefix = sym.NameFor(unsuffixed);
+        // Keep on inserting '_' between the unsuffixed name and the suffix numbers until the name
+        // is unique.
+        while (true) {
+            prefix += "_";
+
+            utils::Hashset<std::string, 4> strings;
+            for (uint32_t i = 0; i < count; i++) {
+                strings.Add(prefix + std::to_string(i));
+            }
+
+            bool unique = true;
+            for (auto* member : str->members) {
+                // The member name must be unique over the entire set of `count` suffixed names.
+                if (strings.Contains(sym.NameFor(member->symbol))) {
+                    unique = false;
+                    break;
+                }
+            }
+
+            if (unique) {
+                return prefix;
+            }
+        }
+    }
+
+    /// @returns a new, forked std140 AST type for the corresponding non-forked semantic type. If
+    /// the
+    ///          semantic type is not split for std140-layout, then nullptr is returned.
+    const ast::Type* Std140Type(const sem::Type* ty) const {
+        return Switch(
+            ty,  //
+            [&](const sem::Struct* str) -> const ast::Type* {
+                if (auto* std140 = std140_structs.Find(str)) {
+                    return b.create<ast::TypeName>(*std140);
+                }
+                return nullptr;
+            },
+            [&](const sem::Array* arr) -> const ast::Type* {
+                if (auto* std140 = Std140Type(arr->ElemType())) {
+                    utils::Vector<const ast::Attribute*, 1> attrs;
+                    if (!arr->IsStrideImplicit()) {
+                        attrs.Push(ctx.dst->create<ast::StrideAttribute>(arr->Stride()));
+                    }
+                    return b.create<ast::Array>(std140, b.Expr(u32(arr->Count())),
+                                                std::move(attrs));
+                }
+                return nullptr;
+            });
+    }
+
+    /// Walks the @p ast_expr, constructing and returning an AccessChain.
+    /// @returns an AccessChain if the expression is an access to a std140-forked uniform buffer,
+    ///          otherwise returns a std::nullopt.
+    std::optional<AccessChain> AccessChainFor(const ast::Expression* ast_expr) {
+        auto* expr = sem.Get(ast_expr);
+        if (!expr) {
+            return std::nullopt;
+        }
+
+        AccessChain access;
+
+        // Start by looking at the source variable. This must be a std140-forked uniform buffer.
+        access.var = tint::As<sem::GlobalVariable>(expr->SourceVariable());
+        if (!access.var || !std140_uniforms.Contains(access.var)) {
+            // Not at std140-forked uniform buffer access chain.
+            return std::nullopt;
+        }
+
+        // Walk from the outer-most expression, inwards towards the source variable.
+        while (true) {
+            enum class Action { kStop, kContinue, kError };
+            Action action = Switch(
+                expr,  //
+                [&](const sem::VariableUser* user) {
+                    if (user->Variable() == access.var) {
+                        // Walked all the way to the source variable. We're done traversing.
+                        return Action::kStop;
+                    }
+                    if (user->Variable()->Type()->Is<sem::Pointer>()) {
+                        // Found a pointer. As the source variable is a uniform buffer variable,
+                        // this must be a pointer-let. Continue traversing from the let initializer.
+                        expr = user->Variable()->Constructor();
+                        return Action::kContinue;
+                    }
+                    TINT_ICE(Transform, b.Diagnostics())
+                        << "unexpected variable found walking access chain: "
+                        << sym.NameFor(user->Variable()->Declaration()->symbol);
+                    return Action::kError;
+                },
+                [&](const sem::StructMemberAccess* a) {
+                    // Is this a std140 decomposed matrix?
+                    if (!access.std140_mat_ty && std140_mats.Contains(a->Member())) {
+                        // Record this on the access.
+                        access.std140_mat_idx = access.indices.Length();
+                        access.std140_mat_ty = expr->Type()->UnwrapRef()->As<sem::Matrix>();
+                    }
+                    // Structure member accesses are always statically indexed
+                    access.indices.Push(u32(a->Member()->Index()));
+                    expr = a->Object();
+                    return Action::kContinue;
+                },
+                [&](const sem::IndexAccessorExpression* a) {
+                    // Array, matrix or vector index.
+                    if (auto* val = a->Index()->ConstantValue()) {
+                        access.indices.Push(val->As<u32>());
+                    } else {
+                        access.indices.Push(DynamicIndex{access.dynamic_indices.Length()});
+                        access.dynamic_indices.Push(a->Index());
+                    }
+                    expr = a->Object();
+                    return Action::kContinue;
+                },
+                [&](const sem::Swizzle* s) {
+                    // Vector swizzle.
+                    if (s->Indices().Length() == 1) {
+                        access.indices.Push(u32(s->Indices()[0]));
+                    } else {
+                        access.indices.Push(s->Indices());
+                    }
+                    expr = s->Object();
+                    return Action::kContinue;
+                },
+                [&](const sem::Expression* e) {
+                    // Walk past indirection and address-of unary ops.
+                    return Switch(e->Declaration(),  //
+                                  [&](const ast::UnaryOpExpression* u) {
+                                      switch (u->op) {
+                                          case ast::UnaryOp::kAddressOf:
+                                          case ast::UnaryOp::kIndirection:
+                                              expr = sem.Get(u->expr);
+                                              return Action::kContinue;
+                                          default:
+                                              TINT_ICE(Transform, b.Diagnostics())
+                                                  << "unhandled unary op for access chain: "
+                                                  << u->op;
+                                              return Action::kError;
+                                      }
+                                  });
+                },
+                [&](Default) {
+                    TINT_ICE(Transform, b.Diagnostics())
+                        << "unhandled expression type for access chain\n"
+                        << "AST: " << expr->Declaration()->TypeInfo().name << "\n"
+                        << "SEM: " << expr->TypeInfo().name;
+                    return Action::kError;
+                });
+
+            switch (action) {
+                case Action::kContinue:
+                    continue;
+                case Action::kStop:
+                    break;
+                case Action::kError:
+                    return std::nullopt;
+            }
+
+            break;
+        }
+
+        // As the access walked from RHS to LHS, the last index operation applies to the source
+        // variable. We want this the other way around, so reverse the arrays and fix indicies.
+        std::reverse(access.indices.begin(), access.indices.end());
+        std::reverse(access.dynamic_indices.begin(), access.dynamic_indices.end());
+        if (access.std140_mat_idx.has_value()) {
+            access.std140_mat_idx = access.indices.Length() - *access.std140_mat_idx - 1;
+        }
+        for (auto& index : access.indices) {
+            if (auto* dyn_idx = std::get_if<DynamicIndex>(&index)) {
+                dyn_idx->slot = access.dynamic_indices.Length() - dyn_idx->slot - 1;
+            }
+        }
+
+        return access;
+    }
+
+    /// @returns a name suffix for a std140 -> non-std140 conversion function based on the type
+    ///          being converted.
+    const std::string ConvertSuffix(const sem::Type* ty) const {
+        return Switch(
+            ty,  //
+            [&](const sem::Struct* str) { return sym.NameFor(str->Name()); },
+            [&](const sem::Array* arr) {
+                return "arr_" + std::to_string(arr->Count()) + "_" + ConvertSuffix(arr->ElemType());
+            },
+            [&](Default) {
+                TINT_ICE(Transform, b.Diagnostics())
+                    << "unhandled type for conversion name: " << b.FriendlyName(ty);
+                return "";
+            });
+    }
+
+    /// Generates and returns an expression that loads the value from a std140 uniform buffer,
+    /// converting the final result to a non-std140 type.
+    /// @param access the access chain from a uniform buffer to the value to load.
+    const ast::Expression* LoadWithConvert(const AccessChain& access) {
+        const ast::Expression* expr = b.Expr(sym.NameFor(access.var->Declaration()->symbol));
+        const sem::Type* ty = access.var->Type()->UnwrapRef();
+        auto dynamic_index = [&](size_t idx) {
+            return ctx.Clone(access.dynamic_indices[idx]->Declaration());
+        };
+        for (auto index : access.indices) {
+            auto [new_expr, new_ty, _] = BuildAccessExpr(expr, ty, index, dynamic_index);
+            expr = new_expr;
+            ty = new_ty;
+        }
+        return Convert(ty, expr);
+    }
+
+    /// Generates and returns an expression that converts the expression @p expr of the
+    /// std140-forked type to the type @p ty. If @p expr is not a std140-forked type, then Convert()
+    /// will simply return @p expr.
+    /// @returns the converted value expression.
+    const ast::Expression* Convert(const sem::Type* ty, const ast::Expression* expr) {
+        // Get an existing, or create a new function for converting the std140 type to ty.
+        auto fn = conv_fns.GetOrCreate(ty, [&] {
+            auto std140_ty = Std140Type(ty);
+            if (!std140_ty) {
+                // ty was not forked for std140.
+                return Symbol{};
+            }
+
+            // The converter function takes a single argument of the std140 type.
+            auto* param = b.Param("val", std140_ty);
+
+            utils::Vector<const ast::Statement*, 3> stmts;
+
+            Switch(
+                ty,  //
+                [&](const sem::Struct* str) {
+                    // Convert each of the structure members using either a converter function call,
+                    // or by reassembling a std140 matrix from column vector members.
+                    utils::Vector<const ast::Expression*, 8> args;
+                    for (auto* member : str->Members()) {
+                        if (auto* col_members = std140_mats.Find(member)) {
+                            // std140 decomposed matrix. Reassemble.
+                            auto* mat_ty = CreateASTTypeFor(ctx, member->Type());
+                            auto mat_args =
+                                utils::Transform(*col_members, [&](const ast::StructMember* m) {
+                                    return b.MemberAccessor(param, m->symbol);
+                                });
+                            args.Push(b.Construct(mat_ty, std::move(mat_args)));
+                        } else {
+                            // Convert the member
+                            args.Push(
+                                Convert(member->Type(),
+                                        b.MemberAccessor(param, sym.NameFor(member->Name()))));
+                        }
+                    }
+                    auto* converted = b.Construct(CreateASTTypeFor(ctx, ty), std::move(args));
+                    stmts.Push(b.Return(converted));
+                },  //
+                [&](const sem::Array* arr) {
+                    // Converting an array. Create a function var for the converted array, and loop
+                    // over the input elements, converting each and assigning the result to the
+                    // local array.
+                    auto* var = b.Var("arr", CreateASTTypeFor(ctx, ty));
+                    auto* i = b.Var("i", b.ty.u32());
+                    auto* dst_el = b.IndexAccessor(var, i);
+                    auto* src_el = Convert(arr->ElemType(), b.IndexAccessor(param, i));
+                    stmts.Push(b.Decl(var));
+                    stmts.Push(b.For(b.Decl(i),                         //
+                                     b.LessThan(i, u32(arr->Count())),  //
+                                     b.Assign(i, b.Add(i, 1_a)),        //
+                                     b.Block(b.Assign(dst_el, src_el))));
+                    stmts.Push(b.Return(var));
+                },
+                [&](Default) {
+                    TINT_ICE(Transform, b.Diagnostics())
+                        << "unhandled type for conversion: " << b.FriendlyName(ty);
+                });
+
+            // Generate the function
+            auto* ret_ty = CreateASTTypeFor(ctx, ty);
+            auto fn_sym = b.Symbols().New("conv_" + ConvertSuffix(ty));
+            b.Func(fn_sym, utils::Vector{param}, ret_ty, std::move(stmts));
+            return fn_sym;
+        });
+
+        if (!fn.IsValid()) {
+            // Not a std140 type, nothing to convert.
+            return expr;
+        }
+
+        // Call the helper
+        return b.Call(fn, utils::Vector{expr});
+    }
+
+    /// Loads a part of, or a whole std140-decomposed matrix from a uniform buffer, using a helper
+    /// function which will be generated if it hasn't been already.
+    /// @param access the access chain from the uniform buffer to either the whole matrix or part of
+    ///        the matrix (column, column-swizzle, or element).
+    /// @returns the loaded value expression.
+    const ast::Expression* LoadMatrixWithFn(const AccessChain& access) {
+        // Get an existing, or create a new function for loading the uniform buffer value.
+        // This function is keyed off the uniform buffer variable and the access chain.
+        auto fn = load_fns.GetOrCreate(LoadFnKey{access.var, access.indices}, [&] {
+            if (access.IsMatrixSubset()) {
+                // Access chain passes through the matrix, but ends either at a column vector,
+                // column swizzle, or element.
+                return BuildLoadPartialMatrixFn(access);
+            }
+            // Access is to the whole matrix.
+            return BuildLoadWholeMatrixFn(access);
+        });
+
+        // Build the arguments
+        auto args = utils::Transform(access.dynamic_indices, [&](const sem::Expression* e) {
+            return b.Construct(b.ty.u32(), ctx.Clone(e->Declaration()));
+        });
+
+        // Call the helper
+        return b.Call(fn, std::move(args));
+    }
+
+    /// Loads a part of a std140-decomposed matrix from a uniform buffer, inline (without calling a
+    /// helper function).
+    /// @param access the access chain from the uniform buffer to part of the matrix (column,
+    ///               column-swizzle, or element).
+    /// @note The matrix column must be statically indexed to use this method.
+    /// @returns the loaded value expression.
+    const ast::Expression* LoadSubMatrixInline(const AccessChain& access) {
+        const ast::Expression* expr = b.Expr(ctx.Clone(access.var->Declaration()->symbol));
+        const sem::Type* ty = access.var->Type()->UnwrapRef();
+        // Method for generating dynamic index expressions.
+        // As this is inline, we can just clone the expression.
+        auto dynamic_index = [&](size_t idx) {
+            return ctx.Clone(access.dynamic_indices[idx]->Declaration());
+        };
+        for (size_t i = 0; i < access.indices.Length(); i++) {
+            if (i == access.std140_mat_idx) {
+                // Access is to the std140 decomposed matrix.
+                // As this is accessing only part of the matrix, we just need to pick the right
+                // column vector member.
+                auto mat_member_idx = std::get<u32>(access.indices[i]);
+                auto* mat_member = ty->As<sem::Struct>()->Members()[mat_member_idx];
+                auto mat_columns = *std140_mats.Get(mat_member);
+                auto column_idx = std::get<u32>(access.indices[i + 1]);
+                expr = b.MemberAccessor(expr, mat_columns[column_idx]->symbol);
+                ty = mat_member->Type()->As<sem::Matrix>()->ColumnType();
+                // We've consumed both the matrix member access and the column access. Increment i.
+                i++;
+            } else {
+                // Access is to something that is not a decomposed matrix.
+                auto [new_expr, new_ty, _] =
+                    BuildAccessExpr(expr, ty, access.indices[i], dynamic_index);
+                expr = new_expr;
+                ty = new_ty;
+            }
+        }
+        return expr;
+    }
+
+    /// Generates a function to load part of a std140-decomposed matrix from a uniform buffer.
+    /// The generated function will have a parameter per dynamic (runtime-evaluated) index in the
+    /// access chain.
+    /// The generated function uses a WGSL switch statement to dynamically select the decomposed
+    /// matrix column.
+    /// @param access the access chain from the uniform buffer to part of the matrix (column,
+    ///               column-swizzle, or element).
+    /// @note The matrix column must be dynamically indexed to use this method.
+    /// @returns the generated function name.
+    Symbol BuildLoadPartialMatrixFn(const AccessChain& access) {
+        // Build the dynamic index parameters
+        auto dynamic_index_params = utils::Transform(access.dynamic_indices, [&](auto*, size_t i) {
+            return b.Param("p" + std::to_string(i), b.ty.u32());
+        });
+        // Method for generating dynamic index expressions.
+        // These are passed in as arguments to the function.
+        auto dynamic_index = [&](size_t idx) { return b.Expr(dynamic_index_params[idx]->symbol); };
+
+        // Fetch the access chain indices of the matrix access and the parameter index that holds
+        // the matrix column index.
+        auto std140_mat_idx = *access.std140_mat_idx;
+        auto column_param_idx = std::get<DynamicIndex>(access.indices[std140_mat_idx + 1]).slot;
+
+        // Begin building the function name. This is extended with logic in the loop below
+        // (when column_idx == 0).
+        std::string name = "load_" + sym.NameFor(access.var->Declaration()->symbol);
+
+        // The switch cases
+        utils::Vector<const ast::CaseStatement*, 4> cases;
+
+        // The function return type.
+        const sem::Type* ret_ty = nullptr;
+
+        // Build switch() cases for each column of the matrix
+        auto num_columns = access.std140_mat_ty->columns();
+        for (uint32_t column_idx = 0; column_idx < num_columns; column_idx++) {
+            const ast::Expression* expr = b.Expr(ctx.Clone(access.var->Declaration()->symbol));
+            const sem::Type* ty = access.var->Type()->UnwrapRef();
+            // Build the expression up to, but not including the matrix member
+            for (size_t i = 0; i < access.std140_mat_idx; i++) {
+                auto [new_expr, new_ty, access_name] =
+                    BuildAccessExpr(expr, ty, access.indices[i], dynamic_index);
+                expr = new_expr;
+                ty = new_ty;
+                if (column_idx == 0) {
+                    name = name + "_" + access_name;
+                }
+            }
+
+            // Get the matrix member that was dynamically accessed.
+            auto mat_member_idx = std::get<u32>(access.indices[std140_mat_idx]);
+            auto* mat_member = ty->As<sem::Struct>()->Members()[mat_member_idx];
+            auto mat_columns = *std140_mats.Get(mat_member);
+            if (column_idx == 0) {
+                name = name + +"_" + sym.NameFor(mat_member->Name()) + "_p" +
+                       std::to_string(column_param_idx);
+            }
+
+            // Build the expression to the column vector member.
+            expr = b.MemberAccessor(expr, mat_columns[column_idx]->symbol);
+            ty = mat_member->Type()->As<sem::Matrix>()->ColumnType();
+            // Build the rest of the expression, skipping over the column index.
+            for (size_t i = std140_mat_idx + 2; i < access.indices.Length(); i++) {
+                auto [new_expr, new_ty, access_name] =
+                    BuildAccessExpr(expr, ty, access.indices[i], dynamic_index);
+                expr = new_expr;
+                ty = new_ty;
+                if (column_idx == 0) {
+                    name = name + "_" + access_name;
+                }
+            }
+
+            if (column_idx == 0) {
+                ret_ty = ty;
+            }
+
+            auto* case_sel = b.Expr(u32(column_idx));
+            auto* case_body = b.Block(utils::Vector{b.Return(expr)});
+            cases.Push(b.Case(case_sel, case_body));
+        }
+
+        // Build the default case (required in WGSL).
+        // This just returns a zero value of the return type, as the index must be out of bounds.
+        cases.Push(b.DefaultCase(b.Block(b.Return(b.Construct(CreateASTTypeFor(ctx, ret_ty))))));
+
+        auto* column_selector = dynamic_index(column_param_idx);
+        auto* stmt = b.Switch(column_selector, std::move(cases));
+
+        auto fn_sym = b.Symbols().New(name);
+        b.Func(fn_sym, std::move(dynamic_index_params), CreateASTTypeFor(ctx, ret_ty),
+               utils::Vector{stmt});
+        return fn_sym;
+    }
+
+    /// Generates a function to load a whole std140-decomposed matrix from a uniform buffer.
+    /// The generated function will have a parameter per dynamic (runtime-evaluated) index in the
+    /// access chain.
+    /// @param access the access chain from the uniform buffer to the whole std140-decomposed
+    ///        matrix.
+    /// @returns the generated function name.
+    Symbol BuildLoadWholeMatrixFn(const AccessChain& access) {
+        // Build the dynamic index parameters
+        auto dynamic_index_params = utils::Transform(access.dynamic_indices, [&](auto*, size_t i) {
+            return b.Param("p" + std::to_string(i), b.ty.u32());
+        });
+        // Method for generating dynamic index expressions.
+        // These are passed in as arguments to the function.
+        auto dynamic_index = [&](size_t idx) { return b.Expr(dynamic_index_params[idx]->symbol); };
+
+        const ast::Expression* expr = b.Expr(ctx.Clone(access.var->Declaration()->symbol));
+        std::string name = sym.NameFor(access.var->Declaration()->symbol);
+        const sem::Type* ty = access.var->Type()->UnwrapRef();
+
+        // Build the expression up to, but not including the matrix member
+        auto std140_mat_idx = *access.std140_mat_idx;
+        for (size_t i = 0; i < std140_mat_idx; i++) {
+            auto [new_expr, new_ty, access_name] =
+                BuildAccessExpr(expr, ty, access.indices[i], dynamic_index);
+            expr = new_expr;
+            ty = new_ty;
+            name = name + "_" + access_name;
+        }
+
+        utils::Vector<const ast::Statement*, 2> stmts;
+
+        // Create a temporary pointer to the structure that holds the matrix columns
+        auto* let = b.Let("s", b.AddressOf(expr));
+        stmts.Push(b.Decl(let));
+
+        // Gather the decomposed matrix columns
+        auto mat_member_idx = std::get<u32>(access.indices[std140_mat_idx]);
+        auto* mat_member = ty->As<sem::Struct>()->Members()[mat_member_idx];
+        auto mat_columns = *std140_mats.Get(mat_member);
+        auto columns = utils::Transform(mat_columns, [&](auto* column_member) {
+            return b.MemberAccessor(b.Deref(let), column_member->symbol);
+        });
+
+        // Reconstruct the matrix from the columns
+        expr = b.Construct(CreateASTTypeFor(ctx, access.std140_mat_ty), std::move(columns));
+        ty = mat_member->Type();
+        name = name + "_" + sym.NameFor(mat_member->Name());
+
+        // Have the function return the constructed matrix
+        stmts.Push(b.Return(expr));
+
+        // Build the function
+        auto* ret_ty = CreateASTTypeFor(ctx, ty);
+        auto fn_sym = b.Symbols().New("load_" + name);
+        b.Func(fn_sym, std::move(dynamic_index_params), ret_ty, std::move(stmts));
+        return fn_sym;
+    }
+
+    /// Return type of BuildAccessExpr()
+    struct ExprTypeName {
+        /// The new, post-access expression
+        const ast::Expression* expr;
+        /// The type of #expr
+        const sem::Type* type;
+        /// A name segment which can be used to build sensible names for helper functions
+        std::string name;
+    };
+
+    /// Builds a single access in an access chain.
+    /// @param lhs the expression to index using @p access
+    /// @param ty the type of the expression @p lhs
+    /// @param access the access index to perform on @p lhs
+    /// @param dynamic_index a function that obtains the i'th dynamic index
+    /// @returns a ExprTypeName which holds the new expression, new type and a name segment which
+    ///          can be used for creating helper function names.
+    ExprTypeName BuildAccessExpr(const ast::Expression* lhs,
+                                 const sem::Type* ty,
+                                 AccessIndex access,
+                                 std::function<const ast::Expression*(size_t)> dynamic_index) {
+        if (auto* dyn_idx = std::get_if<DynamicIndex>(&access)) {
+            /// The access uses a dynamic (runtime-expression) index.
+            auto name = "p" + std::to_string(dyn_idx->slot);
+            return Switch(
+                ty,  //
+                [&](const sem::Array* arr) -> ExprTypeName {
+                    auto* idx = dynamic_index(dyn_idx->slot);
+                    auto* expr = b.IndexAccessor(lhs, idx);
+                    return {expr, arr->ElemType(), name};
+                },  //
+                [&](const sem::Matrix* mat) -> ExprTypeName {
+                    auto* idx = dynamic_index(dyn_idx->slot);
+                    auto* expr = b.IndexAccessor(lhs, idx);
+                    return {expr, mat->ColumnType(), name};
+                },  //
+                [&](const sem::Vector* vec) -> ExprTypeName {
+                    auto* idx = dynamic_index(dyn_idx->slot);
+                    auto* expr = b.IndexAccessor(lhs, idx);
+                    return {expr, vec->type(), name};
+                },  //
+                [&](Default) -> ExprTypeName {
+                    TINT_ICE(Transform, b.Diagnostics())
+                        << "unhandled type for access chain: " << b.FriendlyName(ty);
+                    return {};
+                });
+        }
+        if (auto* swizzle = std::get_if<Swizzle>(&access)) {
+            /// The access is a vector swizzle.
+            return Switch(
+                ty,  //
+                [&](const sem::Vector* vec) -> ExprTypeName {
+                    static const char xyzw[] = {'x', 'y', 'z', 'w'};
+                    std::string rhs;
+                    for (auto el : *swizzle) {
+                        rhs += xyzw[el];
+                    }
+                    auto swizzle_ty = ctx.src->Types().Find<sem::Vector>(
+                        vec->type(), static_cast<uint32_t>(swizzle->Length()));
+                    auto* expr = b.MemberAccessor(lhs, rhs);
+                    return {expr, swizzle_ty, rhs};
+                },  //
+                [&](Default) -> ExprTypeName {
+                    TINT_ICE(Transform, b.Diagnostics())
+                        << "unhandled type for access chain: " << b.FriendlyName(ty);
+                    return {};
+                });
+        }
+        /// The access is a static index.
+        auto idx = std::get<u32>(access);
+        return Switch(
+            ty,  //
+            [&](const sem::Struct* str) -> ExprTypeName {
+                auto* member = str->Members()[idx];
+                auto member_name = sym.NameFor(member->Name());
+                auto* expr = b.MemberAccessor(lhs, member_name);
+                ty = member->Type();
+                return {expr, ty, member_name};
+            },  //
+            [&](const sem::Array* arr) -> ExprTypeName {
+                auto* expr = b.IndexAccessor(lhs, idx);
+                return {expr, arr->ElemType(), std::to_string(idx)};
+            },  //
+            [&](const sem::Matrix* mat) -> ExprTypeName {
+                auto* expr = b.IndexAccessor(lhs, idx);
+                return {expr, mat->ColumnType(), std::to_string(idx)};
+            },  //
+            [&](const sem::Vector* vec) -> ExprTypeName {
+                auto* expr = b.IndexAccessor(lhs, idx);
+                return {expr, vec->type(), std::to_string(idx)};
+            },  //
+            [&](Default) -> ExprTypeName {
+                TINT_ICE(Transform, b.Diagnostics())
+                    << "unhandled type for access chain: " << b.FriendlyName(ty);
+                return {};
+            });
+    }
+};
+
+Std140::Std140() = default;
+
+Std140::~Std140() = default;
+
+bool Std140::ShouldRun(const Program* program, const DataMap&) const {
+    return State::ShouldRun(program);
+}
+
+void Std140::Run(CloneContext& ctx, const DataMap&, DataMap&) const {
+    State(ctx).Run();
+}
+
+}  // namespace tint::transform
diff --git a/src/tint/transform/std140.h b/src/tint/transform/std140.h
new file mode 100644
index 0000000..f987805
--- /dev/null
+++ b/src/tint/transform/std140.h
@@ -0,0 +1,57 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SRC_TINT_TRANSFORM_STD140_H_
+#define SRC_TINT_TRANSFORM_STD140_H_
+
+#include "src/tint/transform/transform.h"
+
+namespace tint::transform {
+
+/// Std140 is a transform that forks structures used in the uniform storage class that contain
+/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors. Structure types that transitively
+/// use these forked structures as members are also forked. `var<uniform>` variables will use these
+/// forked structures, and expressions loading from these variables will do appropriate conversions
+/// to the regular WGSL types. As `matNx2<f32>` matrices are the only type that violate
+/// std140-layout, this transformation is sufficient to have any WGSL structure be std140-layout
+/// conformant.
+///
+/// @note This transform requires the PromoteSideEffectsToDecl transform to have been run first.
+class Std140 final : public Castable<Std140, Transform> {
+  public:
+    /// Constructor
+    Std140();
+    /// Destructor
+    ~Std140() override;
+
+    /// @param program the program to inspect
+    /// @param data optional extra transform-specific input data
+    /// @returns true if this transform should be run for the given program
+    bool ShouldRun(const Program* program, const DataMap& data = {}) const override;
+
+  private:
+    struct State;
+
+    /// Runs the transform using the CloneContext built for transforming a
+    /// program. Run() is responsible for calling Clone() on the CloneContext.
+    /// @param ctx the CloneContext primed with the input program and
+    /// ProgramBuilder
+    /// @param inputs optional extra transform-specific input data
+    /// @param outputs optional extra transform-specific output data
+    void Run(CloneContext& ctx, const DataMap& inputs, DataMap& outputs) const override;
+};
+
+}  // namespace tint::transform
+
+#endif  // SRC_TINT_TRANSFORM_STD140_H_
diff --git a/src/tint/transform/std140_test.cc b/src/tint/transform/std140_test.cc
new file mode 100644
index 0000000..4681cdf
--- /dev/null
+++ b/src/tint/transform/std140_test.cc
@@ -0,0 +1,2082 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "src/tint/transform/test_helper.h"
+#include "src/tint/utils/string.h"
+
+namespace tint::transform {
+namespace {
+
+using Std140Test = TransformTest;
+
+TEST_F(Std140Test, ShouldRunEmptyModule) {
+    auto* src = R"()";
+
+    EXPECT_FALSE(ShouldRun<Std140>(src));
+}
+
+TEST_F(Std140Test, ShouldRunStructMat2x2Unused) {
+    auto* src = R"(
+struct Unused {
+  m : mat2x2<f32>,
+}
+)";
+
+    EXPECT_FALSE(ShouldRun<Std140>(src));
+}
+
+struct ShouldRunCase {
+    uint32_t columns;
+    uint32_t rows;
+    bool should_run;
+
+    std::string Mat() const { return "mat" + std::to_string(columns) + "x" + std::to_string(rows); }
+};
+
+inline std::ostream& operator<<(std::ostream& os, const ShouldRunCase& c) {
+    return os << c.Mat();
+}
+
+using Std140TestShouldRun = TransformTestWithParam<ShouldRunCase>;
+
+TEST_P(Std140TestShouldRun, StructStorage) {
+    std::string src = R"(
+struct S {
+  m : ${mat}<f32>,
+}
+
+@group(0) @binding(0) var<storage> s : S;
+)";
+
+    src = utils::ReplaceAll(src, "${mat}", GetParam().Mat());
+
+    EXPECT_FALSE(ShouldRun<Std140>(src));
+}
+
+TEST_P(Std140TestShouldRun, StructUniform) {
+    std::string src = R"(
+struct S {
+  m : ${mat}<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    src = utils::ReplaceAll(src, "${mat}", GetParam().Mat());
+
+    EXPECT_EQ(ShouldRun<Std140>(src), GetParam().should_run);
+}
+
+TEST_P(Std140TestShouldRun, ArrayStorage) {
+    std::string src = R"(
+@group(0) @binding(0) var<storage> s : array<${mat}<f32>, 2>;
+)";
+
+    src = utils::ReplaceAll(src, "${mat}", GetParam().Mat());
+
+    EXPECT_FALSE(ShouldRun<Std140>(src));
+}
+
+TEST_P(Std140TestShouldRun, ArrayUniform) {
+    if (GetParam().columns == 3u && GetParam().rows == 2u) {
+        // This permutation is invalid. Skip the test:
+        // error: uniform storage requires that array elements be aligned to 16 bytes, but array
+        // element alignment is currently 24. Consider wrapping the element type in a struct and
+        // using the @size attribute.
+        return;
+    }
+
+    std::string src = R"(
+@group(0) @binding(0) var<uniform> s : array<${mat}<f32>, 2>;
+)";
+
+    src = utils::ReplaceAll(src, "${mat}", GetParam().Mat());
+
+    EXPECT_FALSE(ShouldRun<Std140>(src));
+}
+
+INSTANTIATE_TEST_SUITE_P(Std140TestShouldRun,
+                         Std140TestShouldRun,
+                         ::testing::ValuesIn(std::vector<ShouldRunCase>{
+                             {2, 2, true},
+                             {2, 3, false},
+                             {2, 4, false},
+                             {3, 2, true},
+                             {3, 3, false},
+                             {3, 4, false},
+                             {4, 2, true},
+                             {4, 3, false},
+                             {4, 4, false},
+                         }));
+
+TEST_F(Std140Test, EmptyModule) {
+    auto* src = R"()";
+
+    auto* expect = R"()";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, SingleStructMat4x4Uniform) {
+    auto* src = R"(
+struct S {
+  m : mat4x4<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = src;  // Nothing violates std140 layout
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, SingleStructMat2x2Uniform) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, CustomAlignMat3x2) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128u)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, CustomSizeMat3x2) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @size(128) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, CustomAlignAndSizeMat3x2) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128u)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMatricesUniform) {
+    auto* src = R"(
+struct S2x2 {
+  m : mat2x2<f32>,
+}
+struct S3x2 {
+  m : mat3x2<f32>,
+}
+struct S4x2 {
+  m : mat4x2<f32>,
+}
+struct S2x3 {
+  m : mat2x3<f32>,
+}
+struct S3x3 {
+  m : mat3x3<f32>,
+}
+struct S4x3 {
+  m : mat4x3<f32>,
+}
+struct S2x4 {
+  m : mat2x4<f32>,
+}
+struct S3x4 {
+  m : mat3x4<f32>,
+}
+struct S4x4 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2 : S2x2;
+@group(3) @binding(2) var<uniform> s3x2 : S3x2;
+@group(4) @binding(2) var<uniform> s4x2 : S4x2;
+@group(2) @binding(3) var<uniform> s2x3 : S2x3;
+@group(3) @binding(3) var<uniform> s3x3 : S3x3;
+@group(4) @binding(3) var<uniform> s4x3 : S4x3;
+@group(2) @binding(4) var<uniform> s2x4 : S2x4;
+@group(3) @binding(4) var<uniform> s3x4 : S3x4;
+@group(4) @binding(4) var<uniform> s4x4 : S4x4;
+)";
+
+    auto* expect = R"(
+struct S2x2 {
+  m : mat2x2<f32>,
+}
+
+struct S2x2_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+struct S3x2 {
+  m : mat3x2<f32>,
+}
+
+struct S3x2_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+struct S4x2 {
+  m : mat4x2<f32>,
+}
+
+struct S4x2_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  m_3 : vec2<f32>,
+}
+
+struct S2x3 {
+  m : mat2x3<f32>,
+}
+
+struct S3x3 {
+  m : mat3x3<f32>,
+}
+
+struct S4x3 {
+  m : mat4x3<f32>,
+}
+
+struct S2x4 {
+  m : mat2x4<f32>,
+}
+
+struct S3x4 {
+  m : mat3x4<f32>,
+}
+
+struct S4x4 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2 : S2x2_std140;
+
+@group(3) @binding(2) var<uniform> s3x2 : S3x2_std140;
+
+@group(4) @binding(2) var<uniform> s4x2 : S4x2_std140;
+
+@group(2) @binding(3) var<uniform> s2x3 : S2x3;
+
+@group(3) @binding(3) var<uniform> s3x3 : S3x3;
+
+@group(4) @binding(3) var<uniform> s4x3 : S4x3;
+
+@group(2) @binding(4) var<uniform> s2x4 : S2x4;
+
+@group(3) @binding(4) var<uniform> s3x4 : S3x4;
+
+@group(4) @binding(4) var<uniform> s4x4 : S4x4;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_NameCollision) {
+    auto* src = R"(
+struct S {
+  m_1 : i32,
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m_1 : i32,
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_1 : i32,
+  m__0 : vec2<f32>,
+  m__1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadStruct) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x2<f32>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadMatrix) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m() -> mat2x2<f32> {
+  let s = &(s);
+  return mat2x2<f32>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let l = load_s_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadColumn0) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadColumn1) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadColumnI) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return s.m_0;
+    }
+    case 1u: {
+      return s.m_1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar00) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[0][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_0[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar10) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalarI0) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_0(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[0u];
+    }
+    case 1u: {
+      return s.m_1[0u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar01) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[0][1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar11) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalarI1) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_1(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[1u];
+    }
+    case 1u: {
+      return s.m_1[1u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_1(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar0I) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[0][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_0[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalar1I) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[1][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, StructMat2x2Uniform_LoadScalarII) {
+    auto* src = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat2x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[p1];
+    }
+    case 1u: {
+      return s.m_1[p1];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadArray) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr_3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
+  var arr : array<S, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr_3_S(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadStruct0) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(a[0u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadStruct1) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(a[1u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadStructI) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_S(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix0) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[0].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_0_m() -> mat3x2<f32> {
+  let s = &(a[0u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_a_0_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix1) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[1].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_1_m() -> mat3x2<f32> {
+  let s = &(a[1u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_a_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrixI) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix0Column0) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[0].m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[0u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix1Column0) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[1].m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[1u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrixIColumn0) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix0Column1) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[0].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[0u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrix1Column1) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[1].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[1u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_LoadMatrixIColumnI) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].m_0;
+    }
+    case 1u: {
+      return a[p0].m_1;
+    }
+    case 2u: {
+      return a[p0].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructArrayStructMat4x2Uniform_Loads) {
+    auto* src = R"(
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+
+  let l_a             : array<Outer, 4>  = a;
+  let l_a_1           : Outer            = a[1];
+  let l_a_2_a         : array<Inner, 4>  = a[2].a;
+  let l_a_3_a_1       : Inner            = a[3].a[1];
+  let l_a_0_a_2_m     : mat4x2<f32>      = a[0].a[2].m;
+  let l_a_1_a_3_m_0   : vec2<f32>        = a[1].a[3].m[0];
+  let l_a_2_a_0_m_1_2 : f32              = a[2].a[0].m[1][2];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  m_3 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat4x2<f32>(val.m_0, val.m_1, val.m_2, val.m_3));
+}
+
+fn conv_arr_4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr_4_Inner(val.a));
+}
+
+fn conv_arr_4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_0_a_2_m() -> mat4x2<f32> {
+  let s = &(a[0u].a[2u]);
+  return mat4x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2, (*(s)).m_3);
+}
+
+fn f() {
+  let I = 1;
+  let l_a : array<Outer, 4> = conv_arr_4_Outer(a);
+  let l_a_1 : Outer = conv_Outer(a[1u]);
+  let l_a_2_a : array<Inner, 4> = conv_arr_4_Inner(a[2u].a);
+  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
+  let l_a_0_a_2_m : mat4x2<f32> = load_a_0_a_2_m();
+  let l_a_1_a_3_m_0 : vec2<f32> = a[1u].a[3u].m_0;
+  let l_a_2_a_0_m_1_2 : f32 = a[2u].a[0u].m_1[2u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructArrayStructMat4x2Uniform_LoadsViaPtrs) {
+    // Note: Std140Test requires the PromoteSideEffectsToDecl transform to have been run first, so
+    // side-effects in the let-chain will not be a problem.
+    auto* src = R"(
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_2_a_0_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  m_3 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat4x2<f32>(val.m_0, val.m_1, val.m_2, val.m_3));
+}
+
+fn conv_arr_4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr_4_Inner(val.a));
+}
+
+fn conv_arr_4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_3_a_2_m() -> mat4x2<f32> {
+  let s = &(a[3u].a[2u]);
+  return mat4x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2, (*(s)).m_3);
+}
+
+fn f() {
+  let I = 1;
+  let p_a = conv_arr_4_Outer(a);
+  let p_a_3 = conv_Outer(a[3u]);
+  let p_a_3_a = conv_arr_4_Inner(a[3u].a);
+  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
+  let p_a_3_a_2_m = load_a_3_a_2_m();
+  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
+  let l_a : array<Outer, 4> = conv_arr_4_Outer(a);
+  let l_a_3 : Outer = conv_Outer(a[3u]);
+  let l_a_3_a : array<Inner, 4> = conv_arr_4_Inner(a[3u].a);
+  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
+  let l_a_3_a_2_m : mat4x2<f32> = load_a_3_a_2_m();
+  let l_a_3_a_2_m_1 : vec2<f32> = a[3u].a[2u].m_1;
+  let l_a_2_a_0_m_1_0 : f32 = a[3u].a[2u].m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopyArray_UniformToStorage) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+    s = u;
+}
+)";
+
+    auto* expect =
+        R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr_4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
+  var arr : array<S, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  s = conv_arr_4_S(u);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopyStruct_UniformToWorkgroup) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<workgroup> w : array<S, 4>;
+
+fn f() {
+    w[0] = u[1];
+}
+)";
+
+    auto* expect =
+        R"(test:8:38 error: non-resource variables must not have @group or @binding attributes
+@group(0) @binding(1) var<workgroup> w : array<S, 4>;
+                                     ^
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopyMatrix_UniformToPrivate) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+fn f() {
+    p[2].m = u[1].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<private> p : array<S, 4>;
+
+fn load_u_1_m() -> mat3x2<f32> {
+  let s = &(u[1u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  p[2].m = load_u_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopyColumn_UniformToStorage) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+    s[3].m[1] = u[2].m[0];
+}
+)";
+
+    auto* expect =
+        R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopySwizzle_UniformToWorkgroup) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+    w[3].m[1] = u[2].m[0].yx.xy;
+}
+)";
+
+    auto* expect =
+        R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2u].m_0.yx.xy;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, ArrayStructMat3x2Uniform_CopyScalar_UniformToPrivate) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> w : array<S, 4>;
+
+fn f() {
+    w[3].m[1].x = u[2].m[0].y;
+}
+)";
+
+    auto* expect =
+        R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<private> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1].x = u[2u].m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test, MatrixUsageInForLoop) {
+    auto* src = R"(
+struct S {
+  @size(64) m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : S;
+
+fn f() {
+    for (var i = u32(u.m[0][1]); i < u32(u.m[i][2]); i += u32(u.m[1][i])) {
+    }
+}
+)";
+
+    auto* expect =
+        R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : S_std140;
+
+fn load_u_m_p0_2(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return u.m_0[2u];
+    }
+    case 1u: {
+      return u.m_1[2u];
+    }
+    case 2u: {
+      return u.m_2[2u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  for(var i = u32(u.m_0[1u]); (i < u32(load_u_m_p0_2(u32(i)))); i += u32(u.m_1[i])) {
+  }
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+}  // namespace
+}  // namespace tint::transform
diff --git a/src/tint/utils/string.h b/src/tint/utils/string.h
index f10e258..de1f4d6 100644
--- a/src/tint/utils/string.h
+++ b/src/tint/utils/string.h
@@ -24,9 +24,9 @@
 /// @param substr the string to search for
 /// @param replacement the replacement string to use instead of `substr`
 /// @returns `str` with all occurrences of `substr` replaced with `replacement`
-inline std::string ReplaceAll(std::string str,
-                              const std::string& substr,
-                              const std::string& replacement) {
+[[nodiscard]] inline std::string ReplaceAll(std::string str,
+                                            const std::string& substr,
+                                            const std::string& replacement) {
     size_t pos = 0;
     while ((pos = str.find(substr, pos)) != std::string::npos) {
         str.replace(pos, substr.length(), replacement);
diff --git a/src/tint/writer/glsl/generator_impl.cc b/src/tint/writer/glsl/generator_impl.cc
index 31f749f..e783b0b 100644
--- a/src/tint/writer/glsl/generator_impl.cc
+++ b/src/tint/writer/glsl/generator_impl.cc
@@ -64,6 +64,7 @@
 #include "src/tint/transform/renamer.h"
 #include "src/tint/transform/simplify_pointers.h"
 #include "src/tint/transform/single_entry_point.h"
+#include "src/tint/transform/std140.h"
 #include "src/tint/transform/unshadow.h"
 #include "src/tint/transform/unwind_discard_functions.h"
 #include "src/tint/transform/zero_init_workgroup_memory.h"
@@ -221,6 +222,7 @@
     manager.Add<transform::CanonicalizeEntryPointIO>();
     manager.Add<transform::ExpandCompoundAssignment>();
     manager.Add<transform::PromoteSideEffectsToDecl>();
+    manager.Add<transform::Std140>();  // Must come after PromoteSideEffectsToDecl
     manager.Add<transform::UnwindDiscardFunctions>();
     manager.Add<transform::SimplifyPointers>();
 
diff --git a/src/tint/writer/spirv/generator_impl.cc b/src/tint/writer/spirv/generator_impl.cc
index ace5209..d7a2b80 100644
--- a/src/tint/writer/spirv/generator_impl.cc
+++ b/src/tint/writer/spirv/generator_impl.cc
@@ -29,6 +29,7 @@
 #include "src/tint/transform/remove_phonies.h"
 #include "src/tint/transform/remove_unreachable_statements.h"
 #include "src/tint/transform/simplify_pointers.h"
+#include "src/tint/transform/std140.h"
 #include "src/tint/transform/unshadow.h"
 #include "src/tint/transform/unwind_discard_functions.h"
 #include "src/tint/transform/var_for_dynamic_index.h"
@@ -75,6 +76,7 @@
     manager.Add<transform::RemoveUnreachableStatements>();
     manager.Add<transform::ExpandCompoundAssignment>();
     manager.Add<transform::PromoteSideEffectsToDecl>();
+    manager.Add<transform::Std140>();  // Must come after PromoteSideEffectsToDecl
     manager.Add<transform::UnwindDiscardFunctions>();
     manager.Add<transform::SimplifyPointers>();  // Required for arrayLength()
     manager.Add<transform::RemovePhonies>();
diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl
index bc2f943..f9e6f8c 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl
@@ -14,14 +14,39 @@
   ivec4 k[4];
 };
 
+struct Inner_std140 {
+  ivec3 a;
+  int b;
+  uvec3 c;
+  uint d;
+  vec3 e;
+  float f;
+  ivec2 g;
+  ivec2 h;
+  mat2x3 i;
+  vec2 j_0;
+  vec2 j_1;
+  vec2 j_2;
+  ivec4 k[4];
+};
+
 struct S {
   Inner arr[8];
 };
 
-layout(binding = 0) uniform S_1 {
-  Inner arr[8];
+struct S_std140 {
+  Inner_std140 arr[8];
+};
+
+layout(binding = 0) uniform S_std140_1 {
+  Inner_std140 arr[8];
 } s;
 
+mat3x2 load_s_arr_p0_j(uint p0) {
+  uint s_save = p0;
+  return mat3x2(s.arr[s_save].j_0, s.arr[s_save].j_1, s.arr[s_save].j_2);
+}
+
 void tint_symbol(uint idx) {
   ivec3 a = s.arr[idx].a;
   int b = s.arr[idx].b;
@@ -32,7 +57,7 @@
   ivec2 g = s.arr[idx].g;
   ivec2 h = s.arr[idx].h;
   mat2x3 i = s.arr[idx].i;
-  mat3x2 j = s.arr[idx].j;
+  mat3x2 j = load_s_arr_p0_j(uint(idx));
   ivec4 k[4] = s.arr[idx].k;
 }
 
diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm
index 8837281..42889ab 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm
@@ -1,53 +1,55 @@
-SKIP: FAILED
-
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 73
+; Bound: 86
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main" %idx_1
                OpExecutionMode %main LocalSize 1 1 1
                OpName %idx_1 "idx_1"
-               OpName %S "S"
-               OpMemberName %S 0 "arr"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "a"
-               OpMemberName %Inner 1 "b"
-               OpMemberName %Inner 2 "c"
-               OpMemberName %Inner 3 "d"
-               OpMemberName %Inner 4 "e"
-               OpMemberName %Inner 5 "f"
-               OpMemberName %Inner 6 "g"
-               OpMemberName %Inner 7 "h"
-               OpMemberName %Inner 8 "i"
-               OpMemberName %Inner 9 "j"
-               OpMemberName %Inner 10 "k"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "arr"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "a"
+               OpMemberName %Inner_std140 1 "b"
+               OpMemberName %Inner_std140 2 "c"
+               OpMemberName %Inner_std140 3 "d"
+               OpMemberName %Inner_std140 4 "e"
+               OpMemberName %Inner_std140 5 "f"
+               OpMemberName %Inner_std140 6 "g"
+               OpMemberName %Inner_std140 7 "h"
+               OpMemberName %Inner_std140 8 "i"
+               OpMemberName %Inner_std140 9 "j_0"
+               OpMemberName %Inner_std140 10 "j_1"
+               OpMemberName %Inner_std140 11 "j_2"
+               OpMemberName %Inner_std140 12 "k"
                OpName %s "s"
+               OpName %load_s_arr_p0_j "load_s_arr_p0_j"
+               OpName %p0 "p0"
                OpName %main_inner "main_inner"
                OpName %idx "idx"
                OpName %main "main"
                OpDecorate %idx_1 BuiltIn LocalInvocationIndex
-               OpDecorate %S Block
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 1 Offset 12
-               OpMemberDecorate %Inner 2 Offset 16
-               OpMemberDecorate %Inner 3 Offset 28
-               OpMemberDecorate %Inner 4 Offset 32
-               OpMemberDecorate %Inner 5 Offset 44
-               OpMemberDecorate %Inner 6 Offset 48
-               OpMemberDecorate %Inner 7 Offset 56
-               OpMemberDecorate %Inner 8 Offset 64
-               OpMemberDecorate %Inner 8 ColMajor
-               OpMemberDecorate %Inner 8 MatrixStride 16
-               OpMemberDecorate %Inner 9 Offset 96
-               OpMemberDecorate %Inner 9 ColMajor
-               OpMemberDecorate %Inner 9 MatrixStride 8
-               OpMemberDecorate %Inner 10 Offset 128
+               OpDecorate %S_std140 Block
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 12
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 28
+               OpMemberDecorate %Inner_std140 4 Offset 32
+               OpMemberDecorate %Inner_std140 5 Offset 44
+               OpMemberDecorate %Inner_std140 6 Offset 48
+               OpMemberDecorate %Inner_std140 7 Offset 56
+               OpMemberDecorate %Inner_std140 8 Offset 64
+               OpMemberDecorate %Inner_std140 8 ColMajor
+               OpMemberDecorate %Inner_std140 8 MatrixStride 16
+               OpMemberDecorate %Inner_std140 9 Offset 96
+               OpMemberDecorate %Inner_std140 10 Offset 104
+               OpMemberDecorate %Inner_std140 11 Offset 112
+               OpMemberDecorate %Inner_std140 12 Offset 128
                OpDecorate %_arr_v4int_uint_4 ArrayStride 16
-               OpDecorate %_arr_Inner_uint_8 ArrayStride 192
+               OpDecorate %_arr_Inner_std140_uint_8 ArrayStride 192
                OpDecorate %s NonWritable
                OpDecorate %s Binding 0
                OpDecorate %s DescriptorSet 0
@@ -62,19 +64,24 @@
       %v2int = OpTypeVector %int 2
 %mat2v3float = OpTypeMatrix %v3float 2
     %v2float = OpTypeVector %float 2
-%mat3v2float = OpTypeMatrix %v2float 3
       %v4int = OpTypeVector %int 4
      %uint_4 = OpConstant %uint 4
 %_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
-      %Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %mat3v2float %_arr_v4int_uint_4
+%Inner_std140 = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %v2float %v2float %v2float %_arr_v4int_uint_4
      %uint_8 = OpConstant %uint 8
-%_arr_Inner_uint_8 = OpTypeArray %Inner %uint_8
-          %S = OpTypeStruct %_arr_Inner_uint_8
-%_ptr_Uniform_S = OpTypePointer Uniform %S
-          %s = OpVariable %_ptr_Uniform_S Uniform
-       %void = OpTypeVoid
-         %22 = OpTypeFunction %void %uint
+%_arr_Inner_std140_uint_8 = OpTypeArray %Inner_std140 %uint_8
+   %S_std140 = OpTypeStruct %_arr_Inner_std140_uint_8
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+          %s = OpVariable %_ptr_Uniform_S_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+         %21 = OpTypeFunction %mat3v2float %uint
      %uint_0 = OpConstant %uint 0
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
+       %void = OpTypeVoid
+         %38 = OpTypeFunction %void %uint
 %_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
      %uint_1 = OpConstant %uint 1
 %_ptr_Uniform_int = OpTypePointer Uniform %int
@@ -89,44 +96,50 @@
 %_ptr_Uniform_v2int = OpTypePointer Uniform %v2int
      %uint_7 = OpConstant %uint 7
 %_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
-     %uint_9 = OpConstant %uint 9
-%_ptr_Uniform_mat3v2float = OpTypePointer Uniform %mat3v2float
-    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
 %_ptr_Uniform__arr_v4int_uint_4 = OpTypePointer Uniform %_arr_v4int_uint_4
-         %68 = OpTypeFunction %void
- %main_inner = OpFunction %void None %22
+         %81 = OpTypeFunction %void
+%load_s_arr_p0_j = OpFunction %mat3v2float None %21
+         %p0 = OpFunctionParameter %uint
+         %25 = OpLabel
+         %29 = OpAccessChain %_ptr_Uniform_v2float %s %uint_0 %p0 %uint_9
+         %30 = OpLoad %v2float %29
+         %32 = OpAccessChain %_ptr_Uniform_v2float %s %uint_0 %p0 %uint_10
+         %33 = OpLoad %v2float %32
+         %35 = OpAccessChain %_ptr_Uniform_v2float %s %uint_0 %p0 %uint_11
+         %36 = OpLoad %v2float %35
+         %37 = OpCompositeConstruct %mat3v2float %30 %33 %36
+               OpReturnValue %37
+               OpFunctionEnd
+ %main_inner = OpFunction %void None %38
         %idx = OpFunctionParameter %uint
-         %26 = OpLabel
-         %29 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0 %idx %uint_0
-         %30 = OpLoad %v3int %29
-         %33 = OpAccessChain %_ptr_Uniform_int %s %uint_0 %idx %uint_1
-         %34 = OpLoad %int %33
-         %37 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_0 %idx %uint_2
-         %38 = OpLoad %v3uint %37
-         %41 = OpAccessChain %_ptr_Uniform_uint %s %uint_0 %idx %uint_3
-         %42 = OpLoad %uint %41
-         %44 = OpAccessChain %_ptr_Uniform_v3float %s %uint_0 %idx %uint_4
-         %45 = OpLoad %v3float %44
-         %48 = OpAccessChain %_ptr_Uniform_float %s %uint_0 %idx %uint_5
-         %49 = OpLoad %float %48
-         %52 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %idx %uint_6
-         %53 = OpLoad %v2int %52
-         %55 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %idx %uint_7
-         %56 = OpLoad %v2int %55
-         %58 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_0 %idx %uint_8
-         %59 = OpLoad %mat2v3float %58
-         %62 = OpAccessChain %_ptr_Uniform_mat3v2float %s %uint_0 %idx %uint_9
-         %63 = OpLoad %mat3v2float %62
-         %66 = OpAccessChain %_ptr_Uniform__arr_v4int_uint_4 %s %uint_0 %idx %uint_10
-         %67 = OpLoad %_arr_v4int_uint_4 %66
+         %42 = OpLabel
+         %44 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0 %idx %uint_0
+         %45 = OpLoad %v3int %44
+         %48 = OpAccessChain %_ptr_Uniform_int %s %uint_0 %idx %uint_1
+         %49 = OpLoad %int %48
+         %52 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_0 %idx %uint_2
+         %53 = OpLoad %v3uint %52
+         %56 = OpAccessChain %_ptr_Uniform_uint %s %uint_0 %idx %uint_3
+         %57 = OpLoad %uint %56
+         %59 = OpAccessChain %_ptr_Uniform_v3float %s %uint_0 %idx %uint_4
+         %60 = OpLoad %v3float %59
+         %63 = OpAccessChain %_ptr_Uniform_float %s %uint_0 %idx %uint_5
+         %64 = OpLoad %float %63
+         %67 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %idx %uint_6
+         %68 = OpLoad %v2int %67
+         %70 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %idx %uint_7
+         %71 = OpLoad %v2int %70
+         %73 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_0 %idx %uint_8
+         %74 = OpLoad %mat2v3float %73
+         %75 = OpFunctionCall %mat3v2float %load_s_arr_p0_j %idx
+         %79 = OpAccessChain %_ptr_Uniform__arr_v4int_uint_4 %s %uint_0 %idx %uint_12
+         %80 = OpLoad %_arr_v4int_uint_4 %79
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %68
-         %70 = OpLabel
-         %72 = OpLoad %uint %idx_1
-         %71 = OpFunctionCall %void %main_inner %72
+       %main = OpFunction %void None %81
+         %83 = OpLabel
+         %85 = OpLoad %uint %idx_1
+         %84 = OpFunctionCall %void %main_inner %85
                OpReturn
                OpFunctionEnd
-1:1: Structure id 8 decorated as Block for variable in Uniform storage class must follow relaxed uniform buffer layout rules: member 9 is a matrix with stride 8 not satisfying alignment to 16
-  %Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %mat3v2float %_arr_v4int_uint_4
-
diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl
index e43607a..20d5414 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl
@@ -19,7 +19,7 @@
   Inner l[4];
 };
 
-layout(binding = 0) uniform S_1 {
+struct S_std140 {
   ivec3 a;
   int b;
   uvec3 c;
@@ -29,11 +29,34 @@
   ivec2 g;
   ivec2 h;
   mat2x3 i;
-  mat3x2 j;
+  vec2 j_0;
+  vec2 j_1;
+  vec2 j_2;
+  Inner k;
+  Inner l[4];
+};
+
+layout(binding = 0) uniform S_std140_1 {
+  ivec3 a;
+  int b;
+  uvec3 c;
+  uint d;
+  vec3 e;
+  float f;
+  ivec2 g;
+  ivec2 h;
+  mat2x3 i;
+  vec2 j_0;
+  vec2 j_1;
+  vec2 j_2;
   Inner k;
   Inner l[4];
 } s;
 
+mat3x2 load_s_j() {
+  return mat3x2(s.j_0, s.j_1, s.j_2);
+}
+
 void tint_symbol() {
   ivec3 a = s.a;
   int b = s.b;
@@ -44,7 +67,7 @@
   ivec2 g = s.g;
   ivec2 h = s.h;
   mat2x3 i = s.i;
-  mat3x2 j = s.j;
+  mat3x2 j = load_s_j();
   Inner k = s.k;
   Inner l[4] = s.l;
 }
diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm b/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm
index 88be5be..7134d5b 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm
@@ -1,49 +1,50 @@
-SKIP: FAILED
-
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 67
+; Bound: 78
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main"
                OpExecutionMode %main LocalSize 1 1 1
-               OpName %S "S"
-               OpMemberName %S 0 "a"
-               OpMemberName %S 1 "b"
-               OpMemberName %S 2 "c"
-               OpMemberName %S 3 "d"
-               OpMemberName %S 4 "e"
-               OpMemberName %S 5 "f"
-               OpMemberName %S 6 "g"
-               OpMemberName %S 7 "h"
-               OpMemberName %S 8 "i"
-               OpMemberName %S 9 "j"
-               OpMemberName %S 10 "k"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "a"
+               OpMemberName %S_std140 1 "b"
+               OpMemberName %S_std140 2 "c"
+               OpMemberName %S_std140 3 "d"
+               OpMemberName %S_std140 4 "e"
+               OpMemberName %S_std140 5 "f"
+               OpMemberName %S_std140 6 "g"
+               OpMemberName %S_std140 7 "h"
+               OpMemberName %S_std140 8 "i"
+               OpMemberName %S_std140 9 "j_0"
+               OpMemberName %S_std140 10 "j_1"
+               OpMemberName %S_std140 11 "j_2"
+               OpMemberName %S_std140 12 "k"
                OpName %Inner "Inner"
                OpMemberName %Inner 0 "x"
-               OpMemberName %S 11 "l"
+               OpMemberName %S_std140 13 "l"
                OpName %s "s"
+               OpName %load_s_j "load_s_j"
                OpName %main "main"
-               OpDecorate %S Block
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 12
-               OpMemberDecorate %S 2 Offset 16
-               OpMemberDecorate %S 3 Offset 28
-               OpMemberDecorate %S 4 Offset 32
-               OpMemberDecorate %S 5 Offset 44
-               OpMemberDecorate %S 6 Offset 48
-               OpMemberDecorate %S 7 Offset 56
-               OpMemberDecorate %S 8 Offset 64
-               OpMemberDecorate %S 8 ColMajor
-               OpMemberDecorate %S 8 MatrixStride 16
-               OpMemberDecorate %S 9 Offset 96
-               OpMemberDecorate %S 9 ColMajor
-               OpMemberDecorate %S 9 MatrixStride 8
-               OpMemberDecorate %S 10 Offset 128
+               OpDecorate %S_std140 Block
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 12
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 28
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 44
+               OpMemberDecorate %S_std140 6 Offset 48
+               OpMemberDecorate %S_std140 7 Offset 56
+               OpMemberDecorate %S_std140 8 Offset 64
+               OpMemberDecorate %S_std140 8 ColMajor
+               OpMemberDecorate %S_std140 8 MatrixStride 16
+               OpMemberDecorate %S_std140 9 Offset 96
+               OpMemberDecorate %S_std140 10 Offset 104
+               OpMemberDecorate %S_std140 11 Offset 112
+               OpMemberDecorate %S_std140 12 Offset 128
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %S 11 Offset 144
+               OpMemberDecorate %S_std140 13 Offset 144
                OpDecorate %_arr_Inner_uint_4 ArrayStride 16
                OpDecorate %s NonWritable
                OpDecorate %s Binding 0
@@ -57,15 +58,20 @@
       %v2int = OpTypeVector %int 2
 %mat2v3float = OpTypeMatrix %v3float 2
     %v2float = OpTypeVector %float 2
-%mat3v2float = OpTypeMatrix %v2float 3
       %Inner = OpTypeStruct %int
      %uint_4 = OpConstant %uint 4
 %_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-          %S = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %mat3v2float %Inner %_arr_Inner_uint_4
-%_ptr_Uniform_S = OpTypePointer Uniform %S
-          %s = OpVariable %_ptr_Uniform_S Uniform
+   %S_std140 = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %v2float %v2float %v2float %Inner %_arr_Inner_uint_4
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+          %s = OpVariable %_ptr_Uniform_S_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+         %16 = OpTypeFunction %mat3v2float
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
        %void = OpTypeVoid
-         %17 = OpTypeFunction %void
+         %31 = OpTypeFunction %void
      %uint_0 = OpConstant %uint 0
 %_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
      %uint_1 = OpConstant %uint 1
@@ -82,40 +88,45 @@
      %uint_7 = OpConstant %uint 7
      %uint_8 = OpConstant %uint 8
 %_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
-     %uint_9 = OpConstant %uint 9
-%_ptr_Uniform_mat3v2float = OpTypePointer Uniform %mat3v2float
-    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
 %_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
-    %uint_11 = OpConstant %uint 11
+    %uint_13 = OpConstant %uint 13
 %_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
-       %main = OpFunction %void None %17
-         %20 = OpLabel
-         %23 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0
-         %24 = OpLoad %v3int %23
-         %27 = OpAccessChain %_ptr_Uniform_int %s %uint_1
-         %28 = OpLoad %int %27
-         %31 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_2
-         %32 = OpLoad %v3uint %31
-         %35 = OpAccessChain %_ptr_Uniform_uint %s %uint_3
-         %36 = OpLoad %uint %35
-         %38 = OpAccessChain %_ptr_Uniform_v3float %s %uint_4
-         %39 = OpLoad %v3float %38
-         %42 = OpAccessChain %_ptr_Uniform_float %s %uint_5
-         %43 = OpLoad %float %42
-         %46 = OpAccessChain %_ptr_Uniform_v2int %s %uint_6
-         %47 = OpLoad %v2int %46
-         %49 = OpAccessChain %_ptr_Uniform_v2int %s %uint_7
-         %50 = OpLoad %v2int %49
-         %53 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_8
-         %54 = OpLoad %mat2v3float %53
-         %57 = OpAccessChain %_ptr_Uniform_mat3v2float %s %uint_9
-         %58 = OpLoad %mat3v2float %57
-         %61 = OpAccessChain %_ptr_Uniform_Inner %s %uint_10
-         %62 = OpLoad %Inner %61
-         %65 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %s %uint_11
-         %66 = OpLoad %_arr_Inner_uint_4 %65
+   %load_s_j = OpFunction %mat3v2float None %16
+         %19 = OpLabel
+         %22 = OpAccessChain %_ptr_Uniform_v2float %s %uint_9
+         %23 = OpLoad %v2float %22
+         %25 = OpAccessChain %_ptr_Uniform_v2float %s %uint_10
+         %26 = OpLoad %v2float %25
+         %28 = OpAccessChain %_ptr_Uniform_v2float %s %uint_11
+         %29 = OpLoad %v2float %28
+         %30 = OpCompositeConstruct %mat3v2float %23 %26 %29
+               OpReturnValue %30
+               OpFunctionEnd
+       %main = OpFunction %void None %31
+         %34 = OpLabel
+         %37 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0
+         %38 = OpLoad %v3int %37
+         %41 = OpAccessChain %_ptr_Uniform_int %s %uint_1
+         %42 = OpLoad %int %41
+         %45 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_2
+         %46 = OpLoad %v3uint %45
+         %49 = OpAccessChain %_ptr_Uniform_uint %s %uint_3
+         %50 = OpLoad %uint %49
+         %52 = OpAccessChain %_ptr_Uniform_v3float %s %uint_4
+         %53 = OpLoad %v3float %52
+         %56 = OpAccessChain %_ptr_Uniform_float %s %uint_5
+         %57 = OpLoad %float %56
+         %60 = OpAccessChain %_ptr_Uniform_v2int %s %uint_6
+         %61 = OpLoad %v2int %60
+         %63 = OpAccessChain %_ptr_Uniform_v2int %s %uint_7
+         %64 = OpLoad %v2int %63
+         %67 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_8
+         %68 = OpLoad %mat2v3float %67
+         %69 = OpFunctionCall %mat3v2float %load_s_j
+         %72 = OpAccessChain %_ptr_Uniform_Inner %s %uint_12
+         %73 = OpLoad %Inner %72
+         %76 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %s %uint_13
+         %77 = OpLoad %_arr_Inner_uint_4 %76
                OpReturn
                OpFunctionEnd
-1:1: Structure id 3 decorated as Block for variable in Uniform storage class must follow relaxed uniform buffer layout rules: member 9 is a matrix with stride 8 not satisfying alignment to 16
-  %S = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %mat3v2float %Inner %_arr_Inner_uint_4
-
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..2d14c16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl
@@ -0,0 +1,33 @@
+struct Inner {
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7a2acd8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,79 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x2 tint_symbol_8(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[16], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[16], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[16], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[16], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 64u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (64u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (64u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
+  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((64u * uint(tint_symbol)) + (16u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7a2acd8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,79 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x2 tint_symbol_8(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[16], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[16], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[16], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[16], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 64u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (64u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (64u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
+  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((64u * uint(tint_symbol)) + (16u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..9f698b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,129 @@
+#version 310 es
+
+struct Inner {
+  mat2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol_4 = Inner(mat2(val.m_0, val.m_1));
+  return tint_symbol_4;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_5 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_5;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_a_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return mat2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
+}
+
+vec2 load_a_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    default: {
+      return vec2(0.0f);
+      break;
+    }
+  }
+}
+
+float load_a_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    default: {
+      return 0.0f;
+      break;
+    }
+  }
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat2 p_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  vec2 p_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat2 l_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  vec2 l_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = load_a_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b362cb8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x2 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const I = 1;
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float2x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..218f94c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,324 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 208
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_p0_a_p1_m "load_a_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_p0_a_p1_m_p2 "load_a_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_p0_a_p1_m_p2_p3 "load_a_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 16
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 64
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+%mat2v2float = OpTypeMatrix %v2float 2
+      %Inner = OpTypeStruct %mat2v2float
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %40 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %43 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %56 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %69 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %84 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %97 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %109 = OpTypeFunction %mat2v2float %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+        %121 = OpTypeFunction %v2float %uint %uint %uint
+        %135 = OpConstantNull %v2float
+        %136 = OpTypeFunction %float %uint %uint %uint %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %152 = OpConstantNull %float
+       %void = OpTypeVoid
+        %153 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2float %val 0
+         %30 = OpCompositeExtract %v2float %val 1
+         %31 = OpCompositeConstruct %mat2v2float %29 %30
+         %32 = OpCompositeConstruct %Inner %31
+               OpReturnValue %32
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %33
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %37 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
+        %i_0 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
+               OpBranch %44
+         %44 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %49 = OpLoad %uint %i_0
+         %50 = OpULessThan %bool %49 %uint_4
+         %48 = OpLogicalNot %bool %50
+               OpSelectionMerge %52 None
+               OpBranchConditional %48 %53 %52
+         %53 = OpLabel
+               OpBranch %45
+         %52 = OpLabel
+               OpStore %var_for_index %val_0
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
+         %61 = OpLoad %uint %i_0
+         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %61
+         %64 = OpLoad %Inner_std140 %63
+         %60 = OpFunctionCall %Inner %conv_Inner %64
+               OpStore %59 %60
+               OpBranch %46
+         %46 = OpLabel
+         %65 = OpLoad %uint %i_0
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %i_0 %67
+               OpBranch %44
+         %45 = OpLabel
+         %68 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %68
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %69
+      %val_1 = OpFunctionParameter %Outer_std140
+         %73 = OpLabel
+         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %75
+         %76 = OpCompositeConstruct %Outer %74
+               OpReturnValue %76
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %77
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %81 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
+        %i_1 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %87 %88 None
+               OpBranch %89
+         %89 = OpLabel
+         %91 = OpLoad %uint %i_1
+         %92 = OpULessThan %bool %91 %uint_4
+         %90 = OpLogicalNot %bool %92
+               OpSelectionMerge %93 None
+               OpBranchConditional %90 %94 %93
+         %94 = OpLabel
+               OpBranch %87
+         %93 = OpLabel
+               OpStore %var_for_index_1 %val_2
+         %98 = OpLoad %uint %i_1
+        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
+        %102 = OpLoad %uint %i_1
+        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %102
+        %105 = OpLoad %Outer_std140 %104
+        %101 = OpFunctionCall %Outer %conv_Outer %105
+               OpStore %100 %101
+               OpBranch %88
+         %88 = OpLabel
+        %106 = OpLoad %uint %i_1
+        %107 = OpIAdd %uint %106 %uint_1
+               OpStore %i_1 %107
+               OpBranch %86
+         %87 = OpLabel
+        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %108
+               OpFunctionEnd
+%load_a_p0_a_p1_m = OpFunction %mat2v2float None %109
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %113 = OpLabel
+        %116 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_0
+        %117 = OpLoad %v2float %116
+        %118 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_1
+        %119 = OpLoad %v2float %118
+        %120 = OpCompositeConstruct %mat2v2float %117 %119
+               OpReturnValue %120
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2 = OpFunction %v2float None %121
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %126 = OpLabel
+               OpSelectionMerge %127 None
+               OpSwitch %p2 %128 0 %129 1 %130
+        %129 = OpLabel
+        %131 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %132 = OpLoad %v2float %131
+               OpReturnValue %132
+        %130 = OpLabel
+        %133 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %134 = OpLoad %v2float %133
+               OpReturnValue %134
+        %128 = OpLabel
+               OpReturnValue %135
+        %127 = OpLabel
+               OpReturnValue %135
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2_p3 = OpFunction %float None %136
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %142 = OpLabel
+               OpSelectionMerge %143 None
+               OpSwitch %p2_0 %144 0 %145 1 %146
+        %145 = OpLabel
+        %148 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %149 = OpLoad %float %148
+               OpReturnValue %149
+        %146 = OpLabel
+        %150 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %151 = OpLoad %float %150
+               OpReturnValue %151
+        %144 = OpLabel
+               OpReturnValue %152
+        %143 = OpLabel
+               OpReturnValue %152
+               OpFunctionEnd
+          %f = OpFunction %void None %153
+        %156 = OpLabel
+        %159 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %160 = OpLoad %_arr_Outer_std140_uint_4 %159
+        %157 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %160
+        %161 = OpFunctionCall %int %i
+        %164 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %161
+        %165 = OpLoad %Outer_std140 %164
+        %162 = OpFunctionCall %Outer %conv_Outer %165
+        %168 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %161 %uint_0
+        %169 = OpLoad %_arr_Inner_std140_uint_4 %168
+        %166 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %169
+        %170 = OpFunctionCall %int %i
+        %173 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %161 %uint_0 %170
+        %174 = OpLoad %Inner_std140 %173
+        %171 = OpFunctionCall %Inner %conv_Inner %174
+        %176 = OpBitcast %uint %161
+        %177 = OpBitcast %uint %170
+        %175 = OpFunctionCall %mat2v2float %load_a_p0_a_p1_m %176 %177
+        %178 = OpFunctionCall %int %i
+        %180 = OpBitcast %uint %161
+        %181 = OpBitcast %uint %170
+        %182 = OpBitcast %uint %178
+        %179 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %180 %181 %182
+        %184 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %185 = OpLoad %_arr_Outer_std140_uint_4 %184
+        %183 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %185
+        %187 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %161
+        %188 = OpLoad %Outer_std140 %187
+        %186 = OpFunctionCall %Outer %conv_Outer %188
+        %190 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %161 %uint_0
+        %191 = OpLoad %_arr_Inner_std140_uint_4 %190
+        %189 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %191
+        %193 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %161 %uint_0 %170
+        %194 = OpLoad %Inner_std140 %193
+        %192 = OpFunctionCall %Inner %conv_Inner %194
+        %196 = OpBitcast %uint %161
+        %197 = OpBitcast %uint %170
+        %195 = OpFunctionCall %mat2v2float %load_a_p0_a_p1_m %196 %197
+        %199 = OpBitcast %uint %161
+        %200 = OpBitcast %uint %170
+        %201 = OpBitcast %uint %178
+        %198 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %199 %200 %201
+        %202 = OpFunctionCall %int %i
+        %204 = OpBitcast %uint %161
+        %205 = OpBitcast %uint %170
+        %206 = OpBitcast %uint %178
+        %207 = OpBitcast %uint %202
+        %203 = OpFunctionCall %float %load_a_p0_a_p1_m_p2_p3 %204 %205 %206 %207
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..f998978
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,34 @@
+struct Inner {
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x2<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..1e620f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl
@@ -0,0 +1,30 @@
+struct Inner {
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d27e6df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,63 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float2x2 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[16], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[16], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 192u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 192u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 224u);
+  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 224u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[14].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[14].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d27e6df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,63 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float2x2 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[16], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[16], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 192u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 192u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 224u);
+  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 224u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[14].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[14].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..38280cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,83 @@
+#version 310 es
+
+struct Inner {
+  mat2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol = Inner(mat2(val.m_0, val.m_1));
+  return tint_symbol;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_1 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_1;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_a_3_a_2_m() {
+  return mat2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat2 p_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat2 l_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..a731312
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x2 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  int const I = 1;
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float2x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..92ff1cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,239 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 153
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_3_a_2_m "load_a_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 16
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 64
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+      %Inner = OpTypeStruct %mat2v2float
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %29 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %45 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %58 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %73 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %86 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %98 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+        %110 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2float %val 0
+         %19 = OpCompositeExtract %v2float %val 1
+         %20 = OpCompositeConstruct %mat2v2float %18 %19
+         %21 = OpCompositeConstruct %Inner %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %50
+         %53 = OpLoad %Inner_std140 %52
+         %49 = OpFunctionCall %Inner %conv_Inner %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %58
+      %val_1 = OpFunctionParameter %Outer_std140
+         %62 = OpLabel
+         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %64
+         %65 = OpCompositeConstruct %Outer %63
+               OpReturnValue %65
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %66
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %70 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
+        %i_0 = OpVariable %_ptr_Function_uint Function %32
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
+               OpBranch %75
+         %75 = OpLabel
+               OpLoopMerge %76 %77 None
+               OpBranch %78
+         %78 = OpLabel
+         %80 = OpLoad %uint %i_0
+         %81 = OpULessThan %bool %80 %uint_4
+         %79 = OpLogicalNot %bool %81
+               OpSelectionMerge %82 None
+               OpBranchConditional %79 %83 %82
+         %83 = OpLabel
+               OpBranch %76
+         %82 = OpLabel
+               OpStore %var_for_index_1 %val_2
+         %87 = OpLoad %uint %i_0
+         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
+         %91 = OpLoad %uint %i_0
+         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %91
+         %94 = OpLoad %Outer_std140 %93
+         %90 = OpFunctionCall %Outer %conv_Outer %94
+               OpStore %89 %90
+               OpBranch %77
+         %77 = OpLabel
+         %95 = OpLoad %uint %i_0
+         %96 = OpIAdd %uint %95 %uint_1
+               OpStore %i_0 %96
+               OpBranch %75
+         %76 = OpLabel
+         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %97
+               OpFunctionEnd
+%load_a_3_a_2_m = OpFunction %mat2v2float None %98
+        %100 = OpLabel
+        %105 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_0
+        %106 = OpLoad %v2float %105
+        %107 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %108 = OpLoad %v2float %107
+        %109 = OpCompositeConstruct %mat2v2float %106 %108
+               OpReturnValue %109
+               OpFunctionEnd
+          %f = OpFunction %void None %110
+        %113 = OpLabel
+        %118 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %119 = OpLoad %_arr_Outer_std140_uint_4 %118
+        %116 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %119
+        %122 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %123 = OpLoad %Outer_std140 %122
+        %120 = OpFunctionCall %Outer %conv_Outer %123
+        %126 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %127 = OpLoad %_arr_Inner_std140_uint_4 %126
+        %124 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %127
+        %130 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %131 = OpLoad %Inner_std140 %130
+        %128 = OpFunctionCall %Inner %conv_Inner %131
+        %132 = OpFunctionCall %mat2v2float %load_a_3_a_2_m
+        %133 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %134 = OpLoad %v2float %133
+        %136 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %137 = OpLoad %_arr_Outer_std140_uint_4 %136
+        %135 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %137
+        %139 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %140 = OpLoad %Outer_std140 %139
+        %138 = OpFunctionCall %Outer %conv_Outer %140
+        %142 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %143 = OpLoad %_arr_Inner_std140_uint_4 %142
+        %141 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %143
+        %145 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %146 = OpLoad %Inner_std140 %145
+        %144 = OpFunctionCall %Inner %conv_Inner %146
+        %147 = OpFunctionCall %mat2v2float %load_a_3_a_2_m
+        %148 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %149 = OpLoad %v2float %148
+        %151 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
+        %152 = OpLoad %float %151
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..67b881f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,27 @@
+struct Inner {
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl
new file mode 100644
index 0000000..b712b62
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl
@@ -0,0 +1,14 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a6bdd17
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x2 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x2 t = transpose(tint_symbol(u, 72u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a6bdd17
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x2 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x2 t = transpose(tint_symbol(u, 72u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..91beaa7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.glsl
@@ -0,0 +1,38 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+mat2 load_u_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  mat2 t = transpose(load_u_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..108d689
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.msl
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ int after;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x2 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..641ded1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.spvasm
@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+         %31 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %11 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+         %32 = OpConstantNull %uint
+ %load_u_2_m = OpFunction %mat2v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %20 = OpLoad %v2float %19
+         %21 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %22 = OpLoad %v2float %21
+         %23 = OpCompositeConstruct %mat2v2float %20 %22
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %29 = OpFunctionCall %mat2v2float %load_u_2_m
+         %28 = OpTranspose %mat2v2float %29
+         %33 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %32 %uint_2
+         %34 = OpLoad %v2float %33
+         %35 = OpVectorShuffle %v2float %34 %34 1 0
+         %30 = OpExtInst %float %31 Length %35
+         %37 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %32 %uint_2
+         %38 = OpLoad %v2float %37
+         %39 = OpVectorShuffle %v2float %38 %38 1 0
+         %40 = OpCompositeExtract %float %39 0
+         %36 = OpExtInst %float %31 FAbs %40
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..2e27c88
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_builtin.wgsl.expected.wgsl
@@ -0,0 +1,14 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl
new file mode 100644
index 0000000..3ec4eda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl
@@ -0,0 +1,22 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..41ccfc6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.dxc.hlsl
@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(tint_symbol_3(u, 72u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..41ccfc6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.fxc.hlsl
@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(tint_symbol_3(u, 72u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..b2fc7d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.glsl
@@ -0,0 +1,70 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat2(val.m_0, val.m_1), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  a(conv_arr_4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..eda83c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.msl
@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ int after;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..52b17ad
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.spvasm
@@ -0,0 +1,199 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 115
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 24
+               OpDecorate %_arr_S_uint_4 ArrayStride 32
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat2v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %51 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %54 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %67 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %80 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+         %91 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat2v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeConstruct %mat2v2float %40 %41
+         %43 = OpCompositeExtract %int %val 3
+         %44 = OpCompositeConstruct %S %39 %42 %43
+               OpReturnValue %44
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %45
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %48 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
+          %i = OpVariable %_ptr_Function_uint Function %54
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
+               OpBranch %55
+         %55 = OpLabel
+               OpLoopMerge %56 %57 None
+               OpBranch %58
+         %58 = OpLabel
+         %60 = OpLoad %uint %i
+         %61 = OpULessThan %bool %60 %uint_4
+         %59 = OpLogicalNot %bool %61
+               OpSelectionMerge %63 None
+               OpBranchConditional %59 %64 %63
+         %64 = OpLabel
+               OpBranch %56
+         %63 = OpLabel
+               OpStore %var_for_index %val_0
+         %68 = OpLoad %uint %i
+         %70 = OpAccessChain %_ptr_Function_S %arr %68
+         %72 = OpLoad %uint %i
+         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
+         %75 = OpLoad %S_std140 %74
+         %71 = OpFunctionCall %S %conv_S %75
+               OpStore %70 %71
+               OpBranch %57
+         %57 = OpLabel
+         %76 = OpLoad %uint %i
+         %78 = OpIAdd %uint %76 %uint_1
+               OpStore %i %78
+               OpBranch %55
+         %56 = OpLabel
+         %79 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %79
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat2v2float None %80
+         %82 = OpLabel
+         %86 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %87 = OpLoad %v2float %86
+         %88 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %89 = OpLoad %v2float %88
+         %90 = OpCompositeConstruct %mat2v2float %87 %89
+               OpReturnValue %90
+               OpFunctionEnd
+          %f = OpFunction %void None %91
+         %93 = OpLabel
+         %97 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %98 = OpLoad %_arr_S_std140_uint_4 %97
+         %95 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %98
+         %94 = OpFunctionCall %void %a %95
+        %102 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %103 = OpLoad %S_std140 %102
+        %100 = OpFunctionCall %S %conv_S %103
+         %99 = OpFunctionCall %void %b %100
+        %105 = OpFunctionCall %mat2v2float %load_u_2_m
+        %104 = OpFunctionCall %void %c %105
+        %107 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
+        %108 = OpLoad %v2float %107
+        %109 = OpVectorShuffle %v2float %108 %108 1 0
+        %106 = OpFunctionCall %void %d %109
+        %111 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
+        %112 = OpLoad %v2float %111
+        %113 = OpVectorShuffle %v2float %112 %112 1 0
+        %114 = OpCompositeExtract %float %113 0
+        %110 = OpFunctionCall %void %e %114
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..f8ed037
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_fn.wgsl.expected.wgsl
@@ -0,0 +1,31 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl
new file mode 100644
index 0000000..4cf6d16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..46ec4d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.dxc.hlsl
@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static S p[4] = (S[4])0;
+
+float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[3].m = tint_symbol_3(u, 72u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..46ec4d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.fxc.hlsl
@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static S p[4] = (S[4])0;
+
+float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[3].m = tint_symbol_3(u, 72u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..7d8afca
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.glsl
@@ -0,0 +1,55 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0));
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat2(val.m_0, val.m_1), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  p = conv_arr_4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..87743fb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.msl
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ int after;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..ecca4ee
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.spvasm
@@ -0,0 +1,166 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 24
+               OpDecorate %_arr_S_uint_4 ArrayStride 32
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %35 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %48 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %61 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %72 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat2v2float = OpTypePointer Private %mat2v2float
+         %91 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeConstruct %mat2v2float %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %35
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %uint %i
+         %42 = OpULessThan %bool %41 %uint_4
+         %40 = OpLogicalNot %bool %42
+               OpSelectionMerge %44 None
+               OpBranchConditional %40 %45 %44
+         %45 = OpLabel
+               OpBranch %37
+         %44 = OpLabel
+               OpStore %var_for_index %val_0
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_S %arr %49
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
+         %56 = OpLoad %S_std140 %55
+         %52 = OpFunctionCall %S %conv_S %56
+               OpStore %51 %52
+               OpBranch %38
+         %38 = OpLabel
+         %57 = OpLoad %uint %i
+         %59 = OpIAdd %uint %57 %uint_1
+               OpStore %i %59
+               OpBranch %36
+         %37 = OpLabel
+         %60 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %60
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat2v2float None %61
+         %63 = OpLabel
+         %67 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %68 = OpLoad %v2float %67
+         %69 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %70 = OpLoad %v2float %69
+         %71 = OpCompositeConstruct %mat2v2float %68 %70
+               OpReturnValue %71
+               OpFunctionEnd
+          %f = OpFunction %void None %72
+         %75 = OpLabel
+         %78 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %79 = OpLoad %_arr_S_std140_uint_4 %78
+         %76 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %79
+               OpStore %p %76
+         %82 = OpAccessChain %_ptr_Private_S %p %int_1
+         %85 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %86 = OpLoad %S_std140 %85
+         %83 = OpFunctionCall %S %conv_S %86
+               OpStore %82 %83
+         %89 = OpAccessChain %_ptr_Private_mat2v2float %p %int_3 %uint_1
+         %90 = OpFunctionCall %mat2v2float %load_u_2_m
+               OpStore %89 %90
+         %93 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %91
+         %94 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %35 %uint_2
+         %95 = OpLoad %v2float %94
+         %96 = OpVectorShuffle %v2float %95 %95 1 0
+               OpStore %93 %96
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..9d4358a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_private.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl
new file mode 100644
index 0000000..8d2687b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..857e5f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.dxc.hlsl
@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 24u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x2 tint_symbol_8(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_6(u, 64u));
+  tint_symbol_3(s, 104u, tint_symbol_8(u, 72u));
+  s.Store2(40u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..857e5f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.fxc.hlsl
@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 24u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x2 tint_symbol_8(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_6(u, 64u));
+  tint_symbol_3(s, 104u, tint_symbol_8(u, 72u));
+  s.Store2(40u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..6961e35
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.glsl
@@ -0,0 +1,61 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+struct s_block {
+  S inner[4];
+};
+
+layout(binding = 1, std430) buffer s_block_1 {
+  S inner[4];
+} s;
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat2(val.m_0, val.m_1), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  s.inner = conv_arr_4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..b3b4048
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.msl
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ int after;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..03f28a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.spvasm
@@ -0,0 +1,175 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 100
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %s_block "s_block"
+               OpMemberName %s_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s_block Block
+               OpMemberDecorate %s_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 24
+               OpDecorate %_arr_S_uint_4 ArrayStride 32
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %s_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_s_block = OpTypePointer StorageBuffer %s_block
+          %s = OpVariable %_ptr_StorageBuffer_s_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %33 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %73 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+         %94 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeConstruct %mat2v2float %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat2v2float None %62
+         %64 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %69 = OpLoad %v2float %68
+         %70 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %71 = OpLoad %v2float %70
+         %72 = OpCompositeConstruct %mat2v2float %69 %71
+               OpReturnValue %72
+               OpFunctionEnd
+          %f = OpFunction %void None %73
+         %76 = OpLabel
+         %78 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %81 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %82 = OpLoad %_arr_S_std140_uint_4 %81
+         %79 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %82
+               OpStore %78 %79
+         %85 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %89 = OpLoad %S_std140 %88
+         %86 = OpFunctionCall %S %conv_S %89
+               OpStore %85 %86
+         %92 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %s %uint_0 %int_3 %uint_1
+         %93 = OpFunctionCall %mat2v2float %load_u_2_m
+               OpStore %92 %93
+         %96 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %94
+         %97 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
+         %98 = OpLoad %v2float %97
+         %99 = OpVectorShuffle %v2float %98 %98 1 0
+               OpStore %96 %99
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..e9ed5d0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_storage.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl
new file mode 100644
index 0000000..714b643
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ae60a93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_5(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[3].m = tint_symbol_5(u, 72u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ae60a93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_5(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[3].m = tint_symbol_5(u, 72u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..0f49e3f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.glsl
@@ -0,0 +1,63 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat2(val.m_0, val.m_1), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol_1 = S(0, mat2(vec2(0.0f), vec2(0.0f)), 0);
+      w[i] = tint_symbol_1;
+    }
+  }
+  barrier();
+  w = conv_arr_4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..85861c7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.msl
@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ int after;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..305e172
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.spvasm
@@ -0,0 +1,209 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 122
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 24
+               OpDecorate %_arr_S_uint_4 ArrayStride 32
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %74 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %92 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v2float = OpTypePointer Workgroup %mat2v2float
+        %111 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %117 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeConstruct %mat2v2float %23 %24
+         %26 = OpCompositeExtract %int %val 3
+         %27 = OpCompositeConstruct %S %22 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat2v2float None %63
+         %65 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %70 = OpLoad %v2float %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %72 = OpLoad %v2float %71
+         %73 = OpCompositeConstruct %mat2v2float %70 %72
+               OpReturnValue %73
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %74
+%local_invocation_index = OpFunctionParameter %uint
+         %78 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %37
+               OpStore %idx %local_invocation_index
+               OpBranch %80
+         %80 = OpLabel
+               OpLoopMerge %81 %82 None
+               OpBranch %83
+         %83 = OpLabel
+         %85 = OpLoad %uint %idx
+         %86 = OpULessThan %bool %85 %uint_4
+         %84 = OpLogicalNot %bool %86
+               OpSelectionMerge %87 None
+               OpBranchConditional %84 %88 %87
+         %88 = OpLabel
+               OpBranch %81
+         %87 = OpLabel
+         %89 = OpLoad %uint %idx
+         %91 = OpAccessChain %_ptr_Workgroup_S %w %89
+               OpStore %91 %92
+               OpBranch %82
+         %82 = OpLabel
+         %93 = OpLoad %uint %idx
+         %94 = OpIAdd %uint %93 %uint_1
+               OpStore %idx %94
+               OpBranch %80
+         %81 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %99 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %100 = OpLoad %_arr_S_std140_uint_4 %99
+         %97 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %100
+               OpStore %w %97
+        %102 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %105 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %106 = OpLoad %S_std140 %105
+        %103 = OpFunctionCall %S %conv_S %106
+               OpStore %102 %103
+        %109 = OpAccessChain %_ptr_Workgroup_mat2v2float %w %int_3 %uint_1
+        %110 = OpFunctionCall %mat2v2float %load_u_2_m
+               OpStore %109 %110
+        %113 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %111
+        %114 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %115 = OpLoad %v2float %114
+        %116 = OpVectorShuffle %v2float %115 %115 1 0
+               OpStore %113 %116
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %117
+        %119 = OpLabel
+        %121 = OpLoad %uint %local_invocation_index_1
+        %120 = OpFunctionCall %void %f_inner %121
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..14ff361
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat2x2/to_workgroup.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..86bae97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl
@@ -0,0 +1,33 @@
+struct Inner {
+  @size(64) m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..014b25d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,81 @@
+struct Inner {
+  float3x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_3 = a[scalar_offset_3 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..014b25d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,81 @@
+struct Inner {
+  float3x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_3 = a[scalar_offset_3 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..77e3585
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,138 @@
+#version 310 es
+
+struct Inner {
+  mat3x2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol_4 = Inner(mat3x2(val.m_0, val.m_1, val.m_2));
+  return tint_symbol_4;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_5 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_5;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_a_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return mat3x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2);
+}
+
+vec2 load_a_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    default: {
+      return vec2(0.0f);
+      break;
+    }
+  }
+}
+
+float load_a_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    default: {
+      return 0.0f;
+      break;
+    }
+  }
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat3x2 p_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  vec2 p_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat3x2 l_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  vec2 l_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = load_a_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..572eb1c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,50 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const I = 1;
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float3x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..726683f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,338 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 218
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_p0_a_p1_m "load_a_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_p0_a_p1_m_p2 "load_a_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_p0_a_p1_m_p2_p3 "load_a_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+%mat3v2float = OpTypeMatrix %v2float 3
+      %Inner = OpTypeStruct %mat3v2float
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %34 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %41 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %44 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %57 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %70 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %78 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %85 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %98 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %110 = OpTypeFunction %mat3v2float %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_2 = OpConstant %uint 2
+        %125 = OpTypeFunction %v2float %uint %uint %uint
+        %142 = OpConstantNull %v2float
+        %143 = OpTypeFunction %float %uint %uint %uint %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %162 = OpConstantNull %float
+       %void = OpTypeVoid
+        %163 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2float %val 0
+         %30 = OpCompositeExtract %v2float %val 1
+         %31 = OpCompositeExtract %v2float %val 2
+         %32 = OpCompositeConstruct %mat3v2float %29 %30 %31
+         %33 = OpCompositeConstruct %Inner %32
+               OpReturnValue %33
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %34
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %38 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %41
+        %i_0 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %57
+               OpBranch %45
+         %45 = OpLabel
+               OpLoopMerge %46 %47 None
+               OpBranch %48
+         %48 = OpLabel
+         %50 = OpLoad %uint %i_0
+         %51 = OpULessThan %bool %50 %uint_4
+         %49 = OpLogicalNot %bool %51
+               OpSelectionMerge %53 None
+               OpBranchConditional %49 %54 %53
+         %54 = OpLabel
+               OpBranch %46
+         %53 = OpLabel
+               OpStore %var_for_index %val_0
+         %58 = OpLoad %uint %i_0
+         %60 = OpAccessChain %_ptr_Function_Inner %arr %58
+         %62 = OpLoad %uint %i_0
+         %64 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %62
+         %65 = OpLoad %Inner_std140 %64
+         %61 = OpFunctionCall %Inner %conv_Inner %65
+               OpStore %60 %61
+               OpBranch %47
+         %47 = OpLabel
+         %66 = OpLoad %uint %i_0
+         %68 = OpIAdd %uint %66 %uint_1
+               OpStore %i_0 %68
+               OpBranch %45
+         %46 = OpLabel
+         %69 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %69
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %70
+      %val_1 = OpFunctionParameter %Outer_std140
+         %74 = OpLabel
+         %76 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %75 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %76
+         %77 = OpCompositeConstruct %Outer %75
+               OpReturnValue %77
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %78
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %82 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %85
+        %i_1 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %98
+               OpBranch %87
+         %87 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpLoad %uint %i_1
+         %93 = OpULessThan %bool %92 %uint_4
+         %91 = OpLogicalNot %bool %93
+               OpSelectionMerge %94 None
+               OpBranchConditional %91 %95 %94
+         %95 = OpLabel
+               OpBranch %88
+         %94 = OpLabel
+               OpStore %var_for_index_1 %val_2
+         %99 = OpLoad %uint %i_1
+        %101 = OpAccessChain %_ptr_Function_Outer %arr_0 %99
+        %103 = OpLoad %uint %i_1
+        %105 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %103
+        %106 = OpLoad %Outer_std140 %105
+        %102 = OpFunctionCall %Outer %conv_Outer %106
+               OpStore %101 %102
+               OpBranch %89
+         %89 = OpLabel
+        %107 = OpLoad %uint %i_1
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %i_1 %108
+               OpBranch %87
+         %88 = OpLabel
+        %109 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %109
+               OpFunctionEnd
+%load_a_p0_a_p1_m = OpFunction %mat3v2float None %110
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %114 = OpLabel
+        %117 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_0
+        %118 = OpLoad %v2float %117
+        %119 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_1
+        %120 = OpLoad %v2float %119
+        %122 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_2
+        %123 = OpLoad %v2float %122
+        %124 = OpCompositeConstruct %mat3v2float %118 %120 %123
+               OpReturnValue %124
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2 = OpFunction %v2float None %125
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %130 = OpLabel
+               OpSelectionMerge %131 None
+               OpSwitch %p2 %132 0 %133 1 %134 2 %135
+        %133 = OpLabel
+        %136 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %137 = OpLoad %v2float %136
+               OpReturnValue %137
+        %134 = OpLabel
+        %138 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %139 = OpLoad %v2float %138
+               OpReturnValue %139
+        %135 = OpLabel
+        %140 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %141 = OpLoad %v2float %140
+               OpReturnValue %141
+        %132 = OpLabel
+               OpReturnValue %142
+        %131 = OpLabel
+               OpReturnValue %142
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2_p3 = OpFunction %float None %143
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %149 = OpLabel
+               OpSelectionMerge %150 None
+               OpSwitch %p2_0 %151 0 %152 1 %153 2 %154
+        %152 = OpLabel
+        %156 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %157 = OpLoad %float %156
+               OpReturnValue %157
+        %153 = OpLabel
+        %158 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %159 = OpLoad %float %158
+               OpReturnValue %159
+        %154 = OpLabel
+        %160 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %161 = OpLoad %float %160
+               OpReturnValue %161
+        %151 = OpLabel
+               OpReturnValue %162
+        %150 = OpLabel
+               OpReturnValue %162
+               OpFunctionEnd
+          %f = OpFunction %void None %163
+        %166 = OpLabel
+        %169 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %170 = OpLoad %_arr_Outer_std140_uint_4 %169
+        %167 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %170
+        %171 = OpFunctionCall %int %i
+        %174 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %171
+        %175 = OpLoad %Outer_std140 %174
+        %172 = OpFunctionCall %Outer %conv_Outer %175
+        %178 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %171 %uint_0
+        %179 = OpLoad %_arr_Inner_std140_uint_4 %178
+        %176 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %179
+        %180 = OpFunctionCall %int %i
+        %183 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %171 %uint_0 %180
+        %184 = OpLoad %Inner_std140 %183
+        %181 = OpFunctionCall %Inner %conv_Inner %184
+        %186 = OpBitcast %uint %171
+        %187 = OpBitcast %uint %180
+        %185 = OpFunctionCall %mat3v2float %load_a_p0_a_p1_m %186 %187
+        %188 = OpFunctionCall %int %i
+        %190 = OpBitcast %uint %171
+        %191 = OpBitcast %uint %180
+        %192 = OpBitcast %uint %188
+        %189 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %190 %191 %192
+        %194 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %195 = OpLoad %_arr_Outer_std140_uint_4 %194
+        %193 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %195
+        %197 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %171
+        %198 = OpLoad %Outer_std140 %197
+        %196 = OpFunctionCall %Outer %conv_Outer %198
+        %200 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %171 %uint_0
+        %201 = OpLoad %_arr_Inner_std140_uint_4 %200
+        %199 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %201
+        %203 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %171 %uint_0 %180
+        %204 = OpLoad %Inner_std140 %203
+        %202 = OpFunctionCall %Inner %conv_Inner %204
+        %206 = OpBitcast %uint %171
+        %207 = OpBitcast %uint %180
+        %205 = OpFunctionCall %mat3v2float %load_a_p0_a_p1_m %206 %207
+        %209 = OpBitcast %uint %171
+        %210 = OpBitcast %uint %180
+        %211 = OpBitcast %uint %188
+        %208 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %209 %210 %211
+        %212 = OpFunctionCall %int %i
+        %214 = OpBitcast %uint %171
+        %215 = OpBitcast %uint %180
+        %216 = OpBitcast %uint %188
+        %217 = OpBitcast %uint %212
+        %213 = OpFunctionCall %float %load_a_p0_a_p1_m_p2_p3 %214 %215 %216 %217
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..aea4e87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,35 @@
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x2<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..f52dbc0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl
@@ -0,0 +1,30 @@
+struct Inner {
+  @size(64) m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5c62062
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,65 @@
+struct Inner {
+  float3x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5c62062
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,65 @@
+struct Inner {
+  float3x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3df60e5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,84 @@
+#version 310 es
+
+struct Inner {
+  mat3x2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol = Inner(mat3x2(val.m_0, val.m_1, val.m_2));
+  return tint_symbol;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_1 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_1;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_a_3_a_2_m() {
+  return mat3x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2);
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat3x2 p_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat3x2 l_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..e9a1217
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  int const I = 1;
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float3x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..63a09e6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,244 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 156
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_3_a_2_m "load_a_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+      %Inner = OpTypeStruct %mat3v2float
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %23 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %30 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %46 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %59 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %67 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %74 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %87 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %99 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+        %113 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2float %val 0
+         %19 = OpCompositeExtract %v2float %val 1
+         %20 = OpCompositeExtract %v2float %val 2
+         %21 = OpCompositeConstruct %mat3v2float %18 %19 %20
+         %22 = OpCompositeConstruct %Inner %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_Inner %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %51
+         %54 = OpLoad %Inner_std140 %53
+         %50 = OpFunctionCall %Inner %conv_Inner %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %59
+      %val_1 = OpFunctionParameter %Outer_std140
+         %63 = OpLabel
+         %65 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %64 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %65
+         %66 = OpCompositeConstruct %Outer %64
+               OpReturnValue %66
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %67
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %71 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %74
+        %i_0 = OpVariable %_ptr_Function_uint Function %33
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %87
+               OpBranch %76
+         %76 = OpLabel
+               OpLoopMerge %77 %78 None
+               OpBranch %79
+         %79 = OpLabel
+         %81 = OpLoad %uint %i_0
+         %82 = OpULessThan %bool %81 %uint_4
+         %80 = OpLogicalNot %bool %82
+               OpSelectionMerge %83 None
+               OpBranchConditional %80 %84 %83
+         %84 = OpLabel
+               OpBranch %77
+         %83 = OpLabel
+               OpStore %var_for_index_1 %val_2
+         %88 = OpLoad %uint %i_0
+         %90 = OpAccessChain %_ptr_Function_Outer %arr_0 %88
+         %92 = OpLoad %uint %i_0
+         %94 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %92
+         %95 = OpLoad %Outer_std140 %94
+         %91 = OpFunctionCall %Outer %conv_Outer %95
+               OpStore %90 %91
+               OpBranch %78
+         %78 = OpLabel
+         %96 = OpLoad %uint %i_0
+         %97 = OpIAdd %uint %96 %uint_1
+               OpStore %i_0 %97
+               OpBranch %76
+         %77 = OpLabel
+         %98 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %98
+               OpFunctionEnd
+%load_a_3_a_2_m = OpFunction %mat3v2float None %99
+        %101 = OpLabel
+        %106 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_0
+        %107 = OpLoad %v2float %106
+        %108 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %109 = OpLoad %v2float %108
+        %110 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_2
+        %111 = OpLoad %v2float %110
+        %112 = OpCompositeConstruct %mat3v2float %107 %109 %111
+               OpReturnValue %112
+               OpFunctionEnd
+          %f = OpFunction %void None %113
+        %116 = OpLabel
+        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
+        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %122
+        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %126 = OpLoad %Outer_std140 %125
+        %123 = OpFunctionCall %Outer %conv_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %130
+        %133 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %134 = OpLoad %Inner_std140 %133
+        %131 = OpFunctionCall %Inner %conv_Inner %134
+        %135 = OpFunctionCall %mat3v2float %load_a_3_a_2_m
+        %136 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %137 = OpLoad %v2float %136
+        %139 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %140 = OpLoad %_arr_Outer_std140_uint_4 %139
+        %138 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %140
+        %142 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %143 = OpLoad %Outer_std140 %142
+        %141 = OpFunctionCall %Outer %conv_Outer %143
+        %145 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %146 = OpLoad %_arr_Inner_std140_uint_4 %145
+        %144 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %146
+        %148 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %149 = OpLoad %Inner_std140 %148
+        %147 = OpFunctionCall %Inner %conv_Inner %149
+        %150 = OpFunctionCall %mat3v2float %load_a_3_a_2_m
+        %151 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %152 = OpLoad %v2float %151
+        %154 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %33
+        %155 = OpLoad %float %154
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..bea0f53
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/static_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,28 @@
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl
new file mode 100644
index 0000000..48cde20
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl
@@ -0,0 +1,14 @@
+struct S {
+  before : i32,
+  @size(32) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9f083f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.dxc.hlsl
@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 t = transpose(tint_symbol(u, 104u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9f083f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.fxc.hlsl
@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 t = transpose(tint_symbol(u, 104u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..9ffd3ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.glsl
@@ -0,0 +1,39 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat3x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+mat3x2 load_u_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  mat2x3 t = transpose(load_u_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..a65b7f0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.msl
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x3 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..df0b5fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.spvasm
@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+         %36 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+         %11 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %27 = OpTypeFunction %void
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+         %37 = OpConstantNull %uint
+ %load_u_2_m = OpFunction %mat3v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %20 = OpLoad %v2float %19
+         %21 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %22 = OpLoad %v2float %21
+         %24 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %25 = OpLoad %v2float %24
+         %26 = OpCompositeConstruct %mat3v2float %20 %22 %25
+               OpReturnValue %26
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %30 = OpLabel
+         %34 = OpFunctionCall %mat3v2float %load_u_2_m
+         %31 = OpTranspose %mat2v3float %34
+         %38 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+         %39 = OpLoad %v2float %38
+         %40 = OpVectorShuffle %v2float %39 %39 1 0
+         %35 = OpExtInst %float %36 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+         %43 = OpLoad %v2float %42
+         %44 = OpVectorShuffle %v2float %43 %43 1 0
+         %45 = OpCompositeExtract %float %44 0
+         %41 = OpExtInst %float %36 FAbs %45
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..5f718b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_builtin.wgsl.expected.wgsl
@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  @size(32)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl
new file mode 100644
index 0000000..2248b39
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl
@@ -0,0 +1,22 @@
+struct S {
+  before : i32,
+  @size(64) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f7a8444
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.dxc.hlsl
@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 160u));
+  c(tint_symbol_3(u, 168u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f7a8444
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.fxc.hlsl
@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 160u));
+  c(tint_symbol_3(u, 168u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..04a65eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.glsl
@@ -0,0 +1,71 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat3x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat3x2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat3x2(val.m_0, val.m_1, val.m_2), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  a(conv_arr_4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..9247913
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.msl
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0048 */ int after;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..b6ae5ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.spvasm
@@ -0,0 +1,205 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 119
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 72
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 72
+               OpDecorate %_arr_S_uint_4 ArrayStride 80
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat3v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %52 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %55 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %68 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %81 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+         %95 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat3v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeExtract %v2float %val 3
+         %43 = OpCompositeConstruct %mat3v2float %40 %41 %42
+         %44 = OpCompositeExtract %int %val 4
+         %45 = OpCompositeConstruct %S %39 %43 %44
+               OpReturnValue %45
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %46
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %49 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
+          %i = OpVariable %_ptr_Function_uint Function %55
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
+               OpBranch %56
+         %56 = OpLabel
+               OpLoopMerge %57 %58 None
+               OpBranch %59
+         %59 = OpLabel
+         %61 = OpLoad %uint %i
+         %62 = OpULessThan %bool %61 %uint_4
+         %60 = OpLogicalNot %bool %62
+               OpSelectionMerge %64 None
+               OpBranchConditional %60 %65 %64
+         %65 = OpLabel
+               OpBranch %57
+         %64 = OpLabel
+               OpStore %var_for_index %val_0
+         %69 = OpLoad %uint %i
+         %71 = OpAccessChain %_ptr_Function_S %arr %69
+         %73 = OpLoad %uint %i
+         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
+         %76 = OpLoad %S_std140 %75
+         %72 = OpFunctionCall %S %conv_S %76
+               OpStore %71 %72
+               OpBranch %58
+         %58 = OpLabel
+         %77 = OpLoad %uint %i
+         %79 = OpIAdd %uint %77 %uint_1
+               OpStore %i %79
+               OpBranch %56
+         %57 = OpLabel
+         %80 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %80
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat3v2float None %81
+         %83 = OpLabel
+         %87 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %88 = OpLoad %v2float %87
+         %89 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %90 = OpLoad %v2float %89
+         %92 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %93 = OpLoad %v2float %92
+         %94 = OpCompositeConstruct %mat3v2float %88 %90 %93
+               OpReturnValue %94
+               OpFunctionEnd
+          %f = OpFunction %void None %95
+         %97 = OpLabel
+        %101 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %102 = OpLoad %_arr_S_std140_uint_4 %101
+         %99 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %102
+         %98 = OpFunctionCall %void %a %99
+        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %107 = OpLoad %S_std140 %106
+        %104 = OpFunctionCall %S %conv_S %107
+        %103 = OpFunctionCall %void %b %104
+        %109 = OpFunctionCall %mat3v2float %load_u_2_m
+        %108 = OpFunctionCall %void %c %109
+        %111 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
+        %112 = OpLoad %v2float %111
+        %113 = OpVectorShuffle %v2float %112 %112 1 0
+        %110 = OpFunctionCall %void %d %113
+        %115 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
+        %116 = OpLoad %v2float %115
+        %117 = OpVectorShuffle %v2float %116 %116 1 0
+        %118 = OpCompositeExtract %float %117 0
+        %114 = OpFunctionCall %void %e %118
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..b7ca757
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_fn.wgsl.expected.wgsl
@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  @size(64)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl
new file mode 100644
index 0000000..be13ffc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  @size(64) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..31c7a57
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.dxc.hlsl
@@ -0,0 +1,47 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+static S p[4] = (S[4])0;
+
+float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 160u);
+  p[3].m = tint_symbol_3(u, 168u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..31c7a57
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.fxc.hlsl
@@ -0,0 +1,47 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+static S p[4] = (S[4])0;
+
+float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 160u);
+  p[3].m = tint_symbol_3(u, 168u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..f9eeb07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.glsl
@@ -0,0 +1,56 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat3x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat3x2(val.m_0, val.m_1, val.m_2), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  p = conv_arr_4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..2e60d0f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.msl
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0048 */ int after;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..9156099
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.spvasm
@@ -0,0 +1,172 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 101
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 72
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 72
+               OpDecorate %_arr_S_uint_4 ArrayStride 80
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %76 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat3v2float = OpTypePointer Private %mat3v2float
+         %95 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat3v2float None %62
+         %64 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %69 = OpLoad %v2float %68
+         %70 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %71 = OpLoad %v2float %70
+         %73 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %74 = OpLoad %v2float %73
+         %75 = OpCompositeConstruct %mat3v2float %69 %71 %74
+               OpReturnValue %75
+               OpFunctionEnd
+          %f = OpFunction %void None %76
+         %79 = OpLabel
+         %82 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %83 = OpLoad %_arr_S_std140_uint_4 %82
+         %80 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %83
+               OpStore %p %80
+         %86 = OpAccessChain %_ptr_Private_S %p %int_1
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpLoad %S_std140 %89
+         %87 = OpFunctionCall %S %conv_S %90
+               OpStore %86 %87
+         %93 = OpAccessChain %_ptr_Private_mat3v2float %p %int_3 %uint_1
+         %94 = OpFunctionCall %mat3v2float %load_u_2_m
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %95
+         %98 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
+         %99 = OpLoad %v2float %98
+        %100 = OpVectorShuffle %v2float %99 %99 1 0
+               OpStore %97 %100
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..b0ff478
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_private.wgsl.expected.wgsl
@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  @size(64)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl
new file mode 100644
index 0000000..4848e4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  @size(64) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7badae1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.dxc.hlsl
@@ -0,0 +1,68 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 72u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 80u)), array[i]);
+    }
+  }
+}
+
+float3x2 tint_symbol_8(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 80u, tint_symbol_6(u, 160u));
+  tint_symbol_3(s, 248u, tint_symbol_8(u, 168u));
+  s.Store2(88u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7badae1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.fxc.hlsl
@@ -0,0 +1,68 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 72u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 80u)), array[i]);
+    }
+  }
+}
+
+float3x2 tint_symbol_8(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 80u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 80u, tint_symbol_6(u, 160u));
+  tint_symbol_3(s, 248u, tint_symbol_8(u, 168u));
+  s.Store2(88u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..3a1cb49
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.glsl
@@ -0,0 +1,62 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat3x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+struct s_block {
+  S inner[4];
+};
+
+layout(binding = 1, std430) buffer s_block_1 {
+  S inner[4];
+} s;
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat3x2(val.m_0, val.m_1, val.m_2), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  s.inner = conv_arr_4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..0ee4e4c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.msl
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0048 */ int after;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..ec518fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.spvasm
@@ -0,0 +1,181 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %s_block "s_block"
+               OpMemberName %s_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 72
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s_block Block
+               OpMemberDecorate %s_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 72
+               OpDecorate %_arr_S_uint_4 ArrayStride 80
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %s_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_s_block = OpTypePointer StorageBuffer %s_block
+          %s = OpVariable %_ptr_StorageBuffer_s_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %77 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+         %98 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat3v2float None %63
+         %65 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %70 = OpLoad %v2float %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %72 = OpLoad %v2float %71
+         %74 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %75 = OpLoad %v2float %74
+         %76 = OpCompositeConstruct %mat3v2float %70 %72 %75
+               OpReturnValue %76
+               OpFunctionEnd
+          %f = OpFunction %void None %77
+         %80 = OpLabel
+         %82 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %85 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %86 = OpLoad %_arr_S_std140_uint_4 %85
+         %83 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %86
+               OpStore %82 %83
+         %89 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %90 = OpFunctionCall %S %conv_S %93
+               OpStore %89 %90
+         %96 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %int_3 %uint_1
+         %97 = OpFunctionCall %mat3v2float %load_u_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %102 = OpLoad %v2float %101
+        %103 = OpVectorShuffle %v2float %102 %102 1 0
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..56b0f84
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_storage.wgsl.expected.wgsl
@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  @size(64)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl
new file mode 100644
index 0000000..282f12d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  @size(64) m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..98991bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl
@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x2 tint_symbol_5(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 80u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 160u);
+  w[3].m = tint_symbol_5(u, 168u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..98991bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl
@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[20];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x2 tint_symbol_5(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[20], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 72u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[20], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 80u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 160u);
+  w[3].m = tint_symbol_5(u, 168u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..1c5b65e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.glsl
@@ -0,0 +1,64 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat3x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat3x2(val.m_0, val.m_1, val.m_2), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol_1 = S(0, mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0);
+      w[i] = tint_symbol_1;
+    }
+  }
+  barrier();
+  w = conv_arr_4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..97de8a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.msl
@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0048 */ int after;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..325aa7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.spvasm
@@ -0,0 +1,215 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 126
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 72
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 72
+               OpDecorate %_arr_S_uint_4 ArrayStride 80
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %78 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %96 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v2float = OpTypePointer Workgroup %mat3v2float
+        %115 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %121 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeExtract %v2float %val 3
+         %26 = OpCompositeConstruct %mat3v2float %23 %24 %25
+         %27 = OpCompositeExtract %int %val 4
+         %28 = OpCompositeConstruct %S %22 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat3v2float None %64
+         %66 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %71 = OpLoad %v2float %70
+         %72 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %73 = OpLoad %v2float %72
+         %75 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %76 = OpLoad %v2float %75
+         %77 = OpCompositeConstruct %mat3v2float %71 %73 %76
+               OpReturnValue %77
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %78
+%local_invocation_index = OpFunctionParameter %uint
+         %82 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %38
+               OpStore %idx %local_invocation_index
+               OpBranch %84
+         %84 = OpLabel
+               OpLoopMerge %85 %86 None
+               OpBranch %87
+         %87 = OpLabel
+         %89 = OpLoad %uint %idx
+         %90 = OpULessThan %bool %89 %uint_4
+         %88 = OpLogicalNot %bool %90
+               OpSelectionMerge %91 None
+               OpBranchConditional %88 %92 %91
+         %92 = OpLabel
+               OpBranch %85
+         %91 = OpLabel
+         %93 = OpLoad %uint %idx
+         %95 = OpAccessChain %_ptr_Workgroup_S %w %93
+               OpStore %95 %96
+               OpBranch %86
+         %86 = OpLabel
+         %97 = OpLoad %uint %idx
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %idx %98
+               OpBranch %84
+         %85 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %103 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %104 = OpLoad %_arr_S_std140_uint_4 %103
+        %101 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %104
+               OpStore %w %101
+        %106 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %107 = OpFunctionCall %S %conv_S %110
+               OpStore %106 %107
+        %113 = OpAccessChain %_ptr_Workgroup_mat3v2float %w %int_3 %uint_1
+        %114 = OpFunctionCall %mat3v2float %load_u_2_m
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %115
+        %118 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
+        %119 = OpLoad %v2float %118
+        %120 = OpVectorShuffle %v2float %119 %119 1 0
+               OpStore %117 %120
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %121
+        %123 = OpLabel
+        %125 = OpLoad %uint %local_invocation_index_1
+        %124 = OpFunctionCall %void %f_inner %125
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..286d523
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat3x2/to_workgroup.wgsl.expected.wgsl
@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  @size(64)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..ea019e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl
@@ -0,0 +1,33 @@
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e6bf304
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,83 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[32];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[32], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[32], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[32], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[32], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 128u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (128u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (128u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
+  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((128u * uint(tint_symbol)) + (32u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e6bf304
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,83 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[32];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[32], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[32], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[32], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[32], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 128u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (128u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (128u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
+  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((128u * uint(tint_symbol)) + (32u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..f495bbd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,147 @@
+#version 310 es
+
+struct Inner {
+  mat4x2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol_4 = Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3));
+  return tint_symbol_4;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_5 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_5;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_a_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return mat4x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
+}
+
+vec2 load_a_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3;
+      break;
+    }
+    default: {
+      return vec2(0.0f);
+      break;
+    }
+  }
+}
+
+float load_a_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3[p3];
+      break;
+    }
+    default: {
+      return 0.0f;
+      break;
+    }
+  }
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat4x2 p_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  vec2 p_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr_4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat4x2 l_a_i_a_i_m = load_a_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  vec2 l_a_i_a_i_m_i = load_a_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = load_a_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..017e1e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x2 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const I = 1;
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float4x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..5de20f5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,352 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 228
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_p0_a_p1_m "load_a_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_p0_a_p1_m_p2 "load_a_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_p0_a_p1_m_p2_p3 "load_a_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 32
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 128
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 32
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 128
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+%mat4v2float = OpTypeMatrix %v2float 4
+      %Inner = OpTypeStruct %mat4v2float
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %42 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %58 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %71 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %86 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %99 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %111 = OpTypeFunction %mat4v2float %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+        %129 = OpTypeFunction %v2float %uint %uint %uint
+        %149 = OpConstantNull %v2float
+        %150 = OpTypeFunction %float %uint %uint %uint %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %172 = OpConstantNull %float
+       %void = OpTypeVoid
+        %173 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2float %val 0
+         %30 = OpCompositeExtract %v2float %val 1
+         %31 = OpCompositeExtract %v2float %val 2
+         %32 = OpCompositeExtract %v2float %val 3
+         %33 = OpCompositeConstruct %mat4v2float %29 %30 %31 %32
+         %34 = OpCompositeConstruct %Inner %33
+               OpReturnValue %34
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %35
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
+        %i_0 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i_0
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index %val_0
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
+         %63 = OpLoad %uint %i_0
+         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %63
+         %66 = OpLoad %Inner_std140 %65
+         %62 = OpFunctionCall %Inner %conv_Inner %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i_0
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i_0 %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %71
+      %val_1 = OpFunctionParameter %Outer_std140
+         %75 = OpLabel
+         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %77
+         %78 = OpCompositeConstruct %Outer %76
+               OpReturnValue %78
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %79
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %83 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
+        %i_1 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
+               OpBranch %88
+         %88 = OpLabel
+               OpLoopMerge %89 %90 None
+               OpBranch %91
+         %91 = OpLabel
+         %93 = OpLoad %uint %i_1
+         %94 = OpULessThan %bool %93 %uint_4
+         %92 = OpLogicalNot %bool %94
+               OpSelectionMerge %95 None
+               OpBranchConditional %92 %96 %95
+         %96 = OpLabel
+               OpBranch %89
+         %95 = OpLabel
+               OpStore %var_for_index_1 %val_2
+        %100 = OpLoad %uint %i_1
+        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
+        %104 = OpLoad %uint %i_1
+        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %104
+        %107 = OpLoad %Outer_std140 %106
+        %103 = OpFunctionCall %Outer %conv_Outer %107
+               OpStore %102 %103
+               OpBranch %90
+         %90 = OpLabel
+        %108 = OpLoad %uint %i_1
+        %109 = OpIAdd %uint %108 %uint_1
+               OpStore %i_1 %109
+               OpBranch %88
+         %89 = OpLabel
+        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %110
+               OpFunctionEnd
+%load_a_p0_a_p1_m = OpFunction %mat4v2float None %111
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %115 = OpLabel
+        %118 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_0
+        %119 = OpLoad %v2float %118
+        %120 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_1
+        %121 = OpLoad %v2float %120
+        %123 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_2
+        %124 = OpLoad %v2float %123
+        %126 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0 %uint_0 %p1 %uint_3
+        %127 = OpLoad %v2float %126
+        %128 = OpCompositeConstruct %mat4v2float %119 %121 %124 %127
+               OpReturnValue %128
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2 = OpFunction %v2float None %129
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %134 = OpLabel
+               OpSelectionMerge %135 None
+               OpSwitch %p2 %136 0 %137 1 %138 2 %139 3 %140
+        %137 = OpLabel
+        %141 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %142 = OpLoad %v2float %141
+               OpReturnValue %142
+        %138 = OpLabel
+        %143 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %144 = OpLoad %v2float %143
+               OpReturnValue %144
+        %139 = OpLabel
+        %145 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %146 = OpLoad %v2float %145
+               OpReturnValue %146
+        %140 = OpLabel
+        %147 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
+        %148 = OpLoad %v2float %147
+               OpReturnValue %148
+        %136 = OpLabel
+               OpReturnValue %149
+        %135 = OpLabel
+               OpReturnValue %149
+               OpFunctionEnd
+%load_a_p0_a_p1_m_p2_p3 = OpFunction %float None %150
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %156 = OpLabel
+               OpSelectionMerge %157 None
+               OpSwitch %p2_0 %158 0 %159 1 %160 2 %161 3 %162
+        %159 = OpLabel
+        %164 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %165 = OpLoad %float %164
+               OpReturnValue %165
+        %160 = OpLabel
+        %166 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %167 = OpLoad %float %166
+               OpReturnValue %167
+        %161 = OpLabel
+        %168 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %169 = OpLoad %float %168
+               OpReturnValue %169
+        %162 = OpLabel
+        %170 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
+        %171 = OpLoad %float %170
+               OpReturnValue %171
+        %158 = OpLabel
+               OpReturnValue %172
+        %157 = OpLabel
+               OpReturnValue %172
+               OpFunctionEnd
+          %f = OpFunction %void None %173
+        %176 = OpLabel
+        %179 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %180 = OpLoad %_arr_Outer_std140_uint_4 %179
+        %177 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %180
+        %181 = OpFunctionCall %int %i
+        %184 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %181
+        %185 = OpLoad %Outer_std140 %184
+        %182 = OpFunctionCall %Outer %conv_Outer %185
+        %188 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %181 %uint_0
+        %189 = OpLoad %_arr_Inner_std140_uint_4 %188
+        %186 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %189
+        %190 = OpFunctionCall %int %i
+        %193 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %181 %uint_0 %190
+        %194 = OpLoad %Inner_std140 %193
+        %191 = OpFunctionCall %Inner %conv_Inner %194
+        %196 = OpBitcast %uint %181
+        %197 = OpBitcast %uint %190
+        %195 = OpFunctionCall %mat4v2float %load_a_p0_a_p1_m %196 %197
+        %198 = OpFunctionCall %int %i
+        %200 = OpBitcast %uint %181
+        %201 = OpBitcast %uint %190
+        %202 = OpBitcast %uint %198
+        %199 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %200 %201 %202
+        %204 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %205 = OpLoad %_arr_Outer_std140_uint_4 %204
+        %203 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %205
+        %207 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %181
+        %208 = OpLoad %Outer_std140 %207
+        %206 = OpFunctionCall %Outer %conv_Outer %208
+        %210 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %181 %uint_0
+        %211 = OpLoad %_arr_Inner_std140_uint_4 %210
+        %209 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %211
+        %213 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %181 %uint_0 %190
+        %214 = OpLoad %Inner_std140 %213
+        %212 = OpFunctionCall %Inner %conv_Inner %214
+        %216 = OpBitcast %uint %181
+        %217 = OpBitcast %uint %190
+        %215 = OpFunctionCall %mat4v2float %load_a_p0_a_p1_m %216 %217
+        %219 = OpBitcast %uint %181
+        %220 = OpBitcast %uint %190
+        %221 = OpBitcast %uint %198
+        %218 = OpFunctionCall %v2float %load_a_p0_a_p1_m_p2 %219 %220 %221
+        %222 = OpFunctionCall %int %i
+        %224 = OpBitcast %uint %181
+        %225 = OpBitcast %uint %190
+        %226 = OpBitcast %uint %198
+        %227 = OpBitcast %uint %222
+        %223 = OpFunctionCall %float %load_a_p0_a_p1_m_p2_p3 %224 %225 %226 %227
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..8417933
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,34 @@
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x2<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..8731181
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl
@@ -0,0 +1,30 @@
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..597afef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,67 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[32];
+};
+
+float4x2 tint_symbol_4(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[32], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[32], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 384u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 384u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 448u);
+  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 448u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[28].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[28].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..597afef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,67 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[32];
+};
+
+float4x2 tint_symbol_4(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[32], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[32], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int I = 1;
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 384u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 384u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 448u);
+  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 448u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[28].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[28].z);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..c1a89f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,85 @@
+#version 310 es
+
+struct Inner {
+  mat4x2 m;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+struct a_block {
+  Outer_std140 inner[4];
+};
+
+layout(binding = 0) uniform a_block_1 {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  Inner tint_symbol = Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3));
+  return tint_symbol;
+}
+
+Inner[4] conv_arr_4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  Outer tint_symbol_1 = Outer(conv_arr_4_Inner(val.a));
+  return tint_symbol_1;
+}
+
+Outer[4] conv_arr_4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_a_3_a_2_m() {
+  return mat4x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
+}
+
+void f() {
+  int I = 1;
+  Outer p_a[4] = conv_arr_4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat4x2 p_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr_4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr_4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat4x2 l_a_3_a_2_m = load_a_3_a_2_m();
+  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ef14c73
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x2 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  int const I = 1;
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float4x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..f752945
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,249 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 159
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr_4_Inner "conv_arr_4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr_4_Outer "conv_arr_4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %load_a_3_a_2_m "load_a_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 32
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 128
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 32
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 128
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+      %Inner = OpTypeStruct %mat4v2float
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %31 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %47 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %60 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %75 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %88 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %100 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+        %116 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2float %val 0
+         %19 = OpCompositeExtract %v2float %val 1
+         %20 = OpCompositeExtract %v2float %val 2
+         %21 = OpCompositeExtract %v2float %val 3
+         %22 = OpCompositeConstruct %mat4v2float %18 %19 %20 %21
+         %23 = OpCompositeConstruct %Inner %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr_4_Inner = OpFunction %_arr_Inner_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index %52
+         %55 = OpLoad %Inner_std140 %54
+         %51 = OpFunctionCall %Inner %conv_Inner %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %60
+      %val_1 = OpFunctionParameter %Outer_std140
+         %64 = OpLabel
+         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %66
+         %67 = OpCompositeConstruct %Outer %65
+               OpReturnValue %67
+               OpFunctionEnd
+%conv_arr_4_Outer = OpFunction %_arr_Outer_uint_4 None %68
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %72 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
+        %i_0 = OpVariable %_ptr_Function_uint Function %34
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %78 %79 None
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpLoad %uint %i_0
+         %83 = OpULessThan %bool %82 %uint_4
+         %81 = OpLogicalNot %bool %83
+               OpSelectionMerge %84 None
+               OpBranchConditional %81 %85 %84
+         %85 = OpLabel
+               OpBranch %78
+         %84 = OpLabel
+               OpStore %var_for_index_1 %val_2
+         %89 = OpLoad %uint %i_0
+         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
+         %93 = OpLoad %uint %i_0
+         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index_1 %93
+         %96 = OpLoad %Outer_std140 %95
+         %92 = OpFunctionCall %Outer %conv_Outer %96
+               OpStore %91 %92
+               OpBranch %79
+         %79 = OpLabel
+         %97 = OpLoad %uint %i_0
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %i_0 %98
+               OpBranch %77
+         %78 = OpLabel
+         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %99
+               OpFunctionEnd
+%load_a_3_a_2_m = OpFunction %mat4v2float None %100
+        %102 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_0
+        %108 = OpLoad %v2float %107
+        %109 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %110 = OpLoad %v2float %109
+        %111 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_2
+        %112 = OpLoad %v2float %111
+        %113 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_3
+        %114 = OpLoad %v2float %113
+        %115 = OpCompositeConstruct %mat4v2float %108 %110 %112 %114
+               OpReturnValue %115
+               OpFunctionEnd
+          %f = OpFunction %void None %116
+        %119 = OpLabel
+        %124 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %125 = OpLoad %_arr_Outer_std140_uint_4 %124
+        %122 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %125
+        %128 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %129 = OpLoad %Outer_std140 %128
+        %126 = OpFunctionCall %Outer %conv_Outer %129
+        %132 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %133 = OpLoad %_arr_Inner_std140_uint_4 %132
+        %130 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %133
+        %136 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %137 = OpLoad %Inner_std140 %136
+        %134 = OpFunctionCall %Inner %conv_Inner %137
+        %138 = OpFunctionCall %mat4v2float %load_a_3_a_2_m
+        %139 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %140 = OpLoad %v2float %139
+        %142 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %143 = OpLoad %_arr_Outer_std140_uint_4 %142
+        %141 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr_4_Outer %143
+        %145 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %146 = OpLoad %Outer_std140 %145
+        %144 = OpFunctionCall %Outer %conv_Outer %146
+        %148 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %149 = OpLoad %_arr_Inner_std140_uint_4 %148
+        %147 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr_4_Inner %149
+        %151 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %152 = OpLoad %Inner_std140 %151
+        %150 = OpFunctionCall %Inner %conv_Inner %152
+        %153 = OpFunctionCall %mat4v2float %load_a_3_a_2_m
+        %154 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %155 = OpLoad %v2float %154
+        %157 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
+        %158 = OpLoad %float %157
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ff8de06
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,27 @@
+struct Inner {
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let I = 1;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl
new file mode 100644
index 0000000..7aa8fa0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl
@@ -0,0 +1,14 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f1fc60e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.dxc.hlsl
@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float4x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 104u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f1fc60e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.fxc.hlsl
@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float4x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 104u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..110d8e5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.glsl
@@ -0,0 +1,40 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat4x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+mat4x2 load_u_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  mat2x4 t = transpose(load_u_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..d66b53e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.msl
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x4 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..a9f5647
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.spvasm
@@ -0,0 +1,84 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+         %38 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+         %11 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+         %39 = OpConstantNull %uint
+ %load_u_2_m = OpFunction %mat4v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %20 = OpLoad %v2float %19
+         %21 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %22 = OpLoad %v2float %21
+         %24 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %25 = OpLoad %v2float %24
+         %26 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_4
+         %27 = OpLoad %v2float %26
+         %28 = OpCompositeConstruct %mat4v2float %20 %22 %25 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %36 = OpFunctionCall %mat4v2float %load_u_2_m
+         %33 = OpTranspose %mat2v4float %36
+         %40 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %39 %uint_2
+         %41 = OpLoad %v2float %40
+         %42 = OpVectorShuffle %v2float %41 %41 1 0
+         %37 = OpExtInst %float %38 Length %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %39 %uint_2
+         %45 = OpLoad %v2float %44
+         %46 = OpVectorShuffle %v2float %45 %45 1 0
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpExtInst %float %38 FAbs %47
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..59bf43d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_builtin.wgsl.expected.wgsl
@@ -0,0 +1,14 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl
new file mode 100644
index 0000000..41ca70c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl
@@ -0,0 +1,22 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7d6906e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.dxc.hlsl
@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 96u));
+  c(tint_symbol_3(u, 104u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7d6906e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.fxc.hlsl
@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 96u));
+  c(tint_symbol_3(u, 104u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..a73babb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.glsl
@@ -0,0 +1,72 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat4x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat4x2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  a(conv_arr_4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..61f3150
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.msl
@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..9993de97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.spvasm
@@ -0,0 +1,210 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 122
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 40
+               OpDecorate %_arr_S_uint_4 ArrayStride 48
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat4v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %53 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %56 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %69 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %82 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+         %98 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat4v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeExtract %v2float %val 3
+         %43 = OpCompositeExtract %v2float %val 4
+         %44 = OpCompositeConstruct %mat4v2float %40 %41 %42 %43
+         %45 = OpCompositeExtract %int %val 5
+         %46 = OpCompositeConstruct %S %39 %44 %45
+               OpReturnValue %46
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %47
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %50 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
+          %i = OpVariable %_ptr_Function_uint Function %56
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
+               OpBranch %57
+         %57 = OpLabel
+               OpLoopMerge %58 %59 None
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpLoad %uint %i
+         %63 = OpULessThan %bool %62 %uint_4
+         %61 = OpLogicalNot %bool %63
+               OpSelectionMerge %65 None
+               OpBranchConditional %61 %66 %65
+         %66 = OpLabel
+               OpBranch %58
+         %65 = OpLabel
+               OpStore %var_for_index %val_0
+         %70 = OpLoad %uint %i
+         %72 = OpAccessChain %_ptr_Function_S %arr %70
+         %74 = OpLoad %uint %i
+         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
+         %77 = OpLoad %S_std140 %76
+         %73 = OpFunctionCall %S %conv_S %77
+               OpStore %72 %73
+               OpBranch %59
+         %59 = OpLabel
+         %78 = OpLoad %uint %i
+         %80 = OpIAdd %uint %78 %uint_1
+               OpStore %i %80
+               OpBranch %57
+         %58 = OpLabel
+         %81 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %81
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat4v2float None %82
+         %84 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %89 = OpLoad %v2float %88
+         %90 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %91 = OpLoad %v2float %90
+         %93 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %94 = OpLoad %v2float %93
+         %95 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_4
+         %96 = OpLoad %v2float %95
+         %97 = OpCompositeConstruct %mat4v2float %89 %91 %94 %96
+               OpReturnValue %97
+               OpFunctionEnd
+          %f = OpFunction %void None %98
+        %100 = OpLabel
+        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %105 = OpLoad %_arr_S_std140_uint_4 %104
+        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %105
+        %101 = OpFunctionCall %void %a %102
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %107 = OpFunctionCall %S %conv_S %110
+        %106 = OpFunctionCall %void %b %107
+        %112 = OpFunctionCall %mat4v2float %load_u_2_m
+        %111 = OpFunctionCall %void %c %112
+        %114 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
+        %115 = OpLoad %v2float %114
+        %116 = OpVectorShuffle %v2float %115 %115 1 0
+        %113 = OpFunctionCall %void %d %116
+        %118 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
+        %119 = OpLoad %v2float %118
+        %120 = OpVectorShuffle %v2float %119 %119 1 0
+        %121 = OpCompositeExtract %float %120 0
+        %117 = OpFunctionCall %void %e %121
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..29258cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_fn.wgsl.expected.wgsl
@@ -0,0 +1,31 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl
new file mode 100644
index 0000000..4d8b759
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..be0af4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.dxc.hlsl
@@ -0,0 +1,49 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static S p[4] = (S[4])0;
+
+float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[3].m = tint_symbol_3(u, 104u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..be0af4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.fxc.hlsl
@@ -0,0 +1,49 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static S p[4] = (S[4])0;
+
+float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[3].m = tint_symbol_3(u, 104u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..9bf0359
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.glsl
@@ -0,0 +1,57 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat4x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  p = conv_arr_4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..c5f914c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.msl
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..a5b0ac5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.spvasm
@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 40
+               OpDecorate %_arr_S_uint_4 ArrayStride 48
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %79 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat4v2float = OpTypePointer Private %mat4v2float
+         %98 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeExtract %v2float %val 4
+         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat4v2float None %63
+         %65 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %70 = OpLoad %v2float %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %72 = OpLoad %v2float %71
+         %74 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %75 = OpLoad %v2float %74
+         %76 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_4
+         %77 = OpLoad %v2float %76
+         %78 = OpCompositeConstruct %mat4v2float %70 %72 %75 %77
+               OpReturnValue %78
+               OpFunctionEnd
+          %f = OpFunction %void None %79
+         %82 = OpLabel
+         %85 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %86 = OpLoad %_arr_S_std140_uint_4 %85
+         %83 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %86
+               OpStore %p %83
+         %89 = OpAccessChain %_ptr_Private_S %p %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %90 = OpFunctionCall %S %conv_S %93
+               OpStore %89 %90
+         %96 = OpAccessChain %_ptr_Private_mat4v2float %p %int_3 %uint_1
+         %97 = OpFunctionCall %mat4v2float %load_u_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %102 = OpLoad %v2float %101
+        %103 = OpVectorShuffle %v2float %102 %102 1 0
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..32b45e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_private.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl
new file mode 100644
index 0000000..692ff38
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..847a0eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.dxc.hlsl
@@ -0,0 +1,71 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 40u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float4x2 tint_symbol_8(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_6(u, 96u));
+  tint_symbol_3(s, 152u, tint_symbol_8(u, 104u));
+  s.Store2(56u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..847a0eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.fxc.hlsl
@@ -0,0 +1,71 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 40u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    [loop] for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float4x2 tint_symbol_8(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_6(u, 96u));
+  tint_symbol_3(s, 152u, tint_symbol_8(u, 104u));
+  s.Store2(56u, asuint(asfloat(u[1].xy).yx));
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..fa43041
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.glsl
@@ -0,0 +1,63 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat4x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+struct s_block {
+  S inner[4];
+};
+
+layout(binding = 1, std430) buffer s_block_1 {
+  S inner[4];
+} s;
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  s.inner = conv_arr_4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..9ef8ea3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.msl
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..eec2559
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.spvasm
@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 107
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %s_block "s_block"
+               OpMemberName %s_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s_block Block
+               OpMemberDecorate %s_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 40
+               OpDecorate %_arr_S_uint_4 ArrayStride 48
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %s_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_s_block = OpTypePointer StorageBuffer %s_block
+          %s = OpVariable %_ptr_StorageBuffer_s_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %80 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %101 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeExtract %v2float %val 4
+         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat4v2float None %64
+         %66 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %71 = OpLoad %v2float %70
+         %72 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %73 = OpLoad %v2float %72
+         %75 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %76 = OpLoad %v2float %75
+         %77 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_4
+         %78 = OpLoad %v2float %77
+         %79 = OpCompositeConstruct %mat4v2float %71 %73 %76 %78
+               OpReturnValue %79
+               OpFunctionEnd
+          %f = OpFunction %void None %80
+         %83 = OpLabel
+         %85 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %89 = OpLoad %_arr_S_std140_uint_4 %88
+         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %89
+               OpStore %85 %86
+         %92 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %95 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %96 = OpLoad %S_std140 %95
+         %93 = OpFunctionCall %S %conv_S %96
+               OpStore %92 %93
+         %99 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %s %uint_0 %int_3 %uint_1
+        %100 = OpFunctionCall %mat4v2float %load_u_2_m
+               OpStore %99 %100
+        %103 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %101
+        %104 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
+        %105 = OpLoad %v2float %104
+        %106 = OpVectorShuffle %v2float %105 %105 1 0
+               OpStore %103 %106
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..2c43651
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_storage.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl
new file mode 100644
index 0000000..63c1a5b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl
@@ -0,0 +1,16 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0f87d0a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x2 tint_symbol_5(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[3].m = tint_symbol_5(u, 104u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0f87d0a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x2 tint_symbol_5(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 40u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    [loop] for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    [loop] for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[3].m = tint_symbol_5(u, 104u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..945cbcf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.glsl
@@ -0,0 +1,65 @@
+#version 310 es
+
+struct S {
+  int before;
+  mat4x2 m;
+  int after;
+};
+
+struct S_std140 {
+  int before;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  int after;
+};
+
+struct u_block {
+  S_std140 inner[4];
+};
+
+layout(binding = 0) uniform u_block_1 {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  S tint_symbol = S(val.before, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after);
+  return tint_symbol;
+}
+
+S[4] conv_arr_4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0), S(0, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol_1 = S(0, mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0);
+      w[i] = tint_symbol_1;
+    }
+  }
+  barrier();
+  w = conv_arr_4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..1ef4862
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.msl
@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ int after;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..ec0684a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.spvasm
@@ -0,0 +1,220 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 129
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr_4_S "conv_arr_4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_2_m "load_u_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 40
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 40
+               OpDecorate %_arr_S_uint_4 ArrayStride 48
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+    %u_block = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %36 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %52 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %81 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %99 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v2float = OpTypePointer Workgroup %mat4v2float
+        %118 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %124 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeExtract %v2float %val 3
+         %26 = OpCompositeExtract %v2float %val 4
+         %27 = OpCompositeConstruct %mat4v2float %23 %24 %25 %26
+         %28 = OpCompositeExtract %int %val 5
+         %29 = OpCompositeConstruct %S %22 %27 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%conv_arr_4_S = OpFunction %_arr_S_uint_4 None %30
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
+          %i = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S %arr %53
+         %57 = OpLoad %uint %i
+         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
+         %60 = OpLoad %S_std140 %59
+         %56 = OpFunctionCall %S %conv_S %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+ %load_u_2_m = OpFunction %mat4v2float None %65
+         %67 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_1
+         %72 = OpLoad %v2float %71
+         %73 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_2
+         %74 = OpLoad %v2float %73
+         %76 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_3
+         %77 = OpLoad %v2float %76
+         %78 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %uint_2 %uint_4
+         %79 = OpLoad %v2float %78
+         %80 = OpCompositeConstruct %mat4v2float %72 %74 %77 %79
+               OpReturnValue %80
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %81
+%local_invocation_index = OpFunctionParameter %uint
+         %85 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %39
+               OpStore %idx %local_invocation_index
+               OpBranch %87
+         %87 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpLoad %uint %idx
+         %93 = OpULessThan %bool %92 %uint_4
+         %91 = OpLogicalNot %bool %93
+               OpSelectionMerge %94 None
+               OpBranchConditional %91 %95 %94
+         %95 = OpLabel
+               OpBranch %88
+         %94 = OpLabel
+         %96 = OpLoad %uint %idx
+         %98 = OpAccessChain %_ptr_Workgroup_S %w %96
+               OpStore %98 %99
+               OpBranch %89
+         %89 = OpLabel
+        %100 = OpLoad %uint %idx
+        %101 = OpIAdd %uint %100 %uint_1
+               OpStore %idx %101
+               OpBranch %87
+         %88 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %106 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %107 = OpLoad %_arr_S_std140_uint_4 %106
+        %104 = OpFunctionCall %_arr_S_uint_4 %conv_arr_4_S %107
+               OpStore %w %104
+        %109 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %112 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %113 = OpLoad %S_std140 %112
+        %110 = OpFunctionCall %S %conv_S %113
+               OpStore %109 %110
+        %116 = OpAccessChain %_ptr_Workgroup_mat4v2float %w %int_3 %uint_1
+        %117 = OpFunctionCall %mat4v2float %load_u_2_m
+               OpStore %116 %117
+        %120 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %118
+        %121 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %39 %uint_2
+        %122 = OpLoad %v2float %121
+        %123 = OpVectorShuffle %v2float %122 %122 1 0
+               OpStore %120 %123
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %124
+        %126 = OpLabel
+        %128 = OpLoad %uint %local_invocation_index_1
+        %127 = OpFunctionCall %void %f_inner %128
+               OpReturn
+               OpFunctionEnd
diff --git a/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..bb53c7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/mat4x2/to_workgroup.wgsl.expected.wgsl
@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}
diff --git a/test/tint/bug/tint/369.wgsl.expected.glsl b/test/tint/bug/tint/369.wgsl.expected.glsl
index 8e6f8e6..aa195e8 100644
--- a/test/tint/bug/tint/369.wgsl.expected.glsl
+++ b/test/tint/bug/tint/369.wgsl.expected.glsl
@@ -8,10 +8,16 @@
   mat2 m;
 };
 
+struct S_std140 {
+  vec2 m_0;
+  vec2 m_1;
+};
+
 layout(binding = 0, std430) buffer S_1 {
   mat2 m;
 } SSBO;
-layout(binding = 0) uniform S_2 {
-  mat2 m;
+layout(binding = 0) uniform S_std140_1 {
+  vec2 m_0;
+  vec2 m_1;
 } UBO;
 
diff --git a/test/tint/bug/tint/369.wgsl.expected.spvasm b/test/tint/bug/tint/369.wgsl.expected.spvasm
index 898a0f3..678e86a 100644
--- a/test/tint/bug/tint/369.wgsl.expected.spvasm
+++ b/test/tint/bug/tint/369.wgsl.expected.spvasm
@@ -1,9 +1,7 @@
-SKIP: FAILED
-
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 13
+; Bound: 14
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -12,6 +10,9 @@
                OpName %S "S"
                OpMemberName %S 0 "m"
                OpName %SSBO "SSBO"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "m_0"
+               OpMemberName %S_std140 1 "m_1"
                OpName %UBO "UBO"
                OpName %unused_entry_point "unused_entry_point"
                OpDecorate %S Block
@@ -21,6 +22,9 @@
                OpDecorate %SSBO NonWritable
                OpDecorate %SSBO DescriptorSet 0
                OpDecorate %SSBO Binding 0
+               OpDecorate %S_std140 Block
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
                OpDecorate %UBO NonWritable
                OpDecorate %UBO DescriptorSet 0
                OpDecorate %UBO Binding 0
@@ -30,14 +34,12 @@
           %S = OpTypeStruct %mat2v2float
 %_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
        %SSBO = OpVariable %_ptr_StorageBuffer_S StorageBuffer
-%_ptr_Uniform_S = OpTypePointer Uniform %S
-        %UBO = OpVariable %_ptr_Uniform_S Uniform
+   %S_std140 = OpTypeStruct %v2float %v2float
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+        %UBO = OpVariable %_ptr_Uniform_S_std140 Uniform
        %void = OpTypeVoid
-          %9 = OpTypeFunction %void
-%unused_entry_point = OpFunction %void None %9
-         %12 = OpLabel
+         %10 = OpTypeFunction %void
+%unused_entry_point = OpFunction %void None %10
+         %13 = OpLabel
                OpReturn
                OpFunctionEnd
-1:1: Structure id 3 decorated as Block for variable in Uniform storage class must follow relaxed uniform buffer layout rules: member 0 is a matrix with stride 8 not satisfying alignment to 16
-  %S = OpTypeStruct %mat2v2float
-
diff --git a/test/tint/bug/tint/403.wgsl.expected.glsl b/test/tint/bug/tint/403.wgsl.expected.glsl
index d82b85d..e89b9ee 100644
--- a/test/tint/bug/tint/403.wgsl.expected.glsl
+++ b/test/tint/bug/tint/403.wgsl.expected.glsl
@@ -4,22 +4,42 @@
   mat2 transform1;
 };
 
+struct vertexUniformBuffer1_std140 {
+  vec2 transform1_0;
+  vec2 transform1_1;
+};
+
 struct vertexUniformBuffer2 {
   mat2 transform2;
 };
 
-layout(binding = 0) uniform vertexUniformBuffer1_1 {
-  mat2 transform1;
+struct vertexUniformBuffer2_std140 {
+  vec2 transform2_0;
+  vec2 transform2_1;
+};
+
+layout(binding = 0) uniform vertexUniformBuffer1_std140_1 {
+  vec2 transform1_0;
+  vec2 transform1_1;
 } x_20;
 
-layout(binding = 0) uniform vertexUniformBuffer2_1 {
-  mat2 transform2;
+layout(binding = 0) uniform vertexUniformBuffer2_std140_1 {
+  vec2 transform2_0;
+  vec2 transform2_1;
 } x_26;
 
+mat2 load_x_20_transform1() {
+  return mat2(x_20.transform1_0, x_20.transform1_1);
+}
+
+mat2 load_x_26_transform2() {
+  return mat2(x_26.transform2_0, x_26.transform2_1);
+}
+
 vec4 tint_symbol(uint tint_symbol_1) {
   vec2 indexable[3] = vec2[3](vec2(0.0f, 0.0f), vec2(0.0f, 0.0f), vec2(0.0f, 0.0f));
-  mat2 x_23 = x_20.transform1;
-  mat2 x_28 = x_26.transform2;
+  mat2 x_23 = load_x_20_transform1();
+  mat2 x_28 = load_x_26_transform2();
   uint x_46 = tint_symbol_1;
   vec2 tint_symbol_2[3] = vec2[3](vec2(-1.0f, 1.0f), vec2(1.0f), vec2(-1.0f));
   indexable = tint_symbol_2;
diff --git a/test/tint/bug/tint/403.wgsl.expected.spvasm b/test/tint/bug/tint/403.wgsl.expected.spvasm
index 3491de4..5e79202 100644
--- a/test/tint/bug/tint/403.wgsl.expected.spvasm
+++ b/test/tint/bug/tint/403.wgsl.expected.spvasm
@@ -1,9 +1,7 @@
-SKIP: FAILED
-
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 63
+; Bound: 76
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -11,12 +9,16 @@
                OpName %gl_VertexIndex_1 "gl_VertexIndex_1"
                OpName %value "value"
                OpName %vertex_point_size "vertex_point_size"
-               OpName %vertexUniformBuffer1 "vertexUniformBuffer1"
-               OpMemberName %vertexUniformBuffer1 0 "transform1"
+               OpName %vertexUniformBuffer1_std140 "vertexUniformBuffer1_std140"
+               OpMemberName %vertexUniformBuffer1_std140 0 "transform1_0"
+               OpMemberName %vertexUniformBuffer1_std140 1 "transform1_1"
                OpName %x_20 "x_20"
-               OpName %vertexUniformBuffer2 "vertexUniformBuffer2"
-               OpMemberName %vertexUniformBuffer2 0 "transform2"
+               OpName %vertexUniformBuffer2_std140 "vertexUniformBuffer2_std140"
+               OpMemberName %vertexUniformBuffer2_std140 0 "transform2_0"
+               OpMemberName %vertexUniformBuffer2_std140 1 "transform2_1"
                OpName %x_26 "x_26"
+               OpName %load_x_20_transform1 "load_x_20_transform1"
+               OpName %load_x_26_transform2 "load_x_26_transform2"
                OpName %main_inner "main_inner"
                OpName %gl_VertexIndex "gl_VertexIndex"
                OpName %indexable "indexable"
@@ -24,17 +26,15 @@
                OpDecorate %gl_VertexIndex_1 BuiltIn VertexIndex
                OpDecorate %value BuiltIn Position
                OpDecorate %vertex_point_size BuiltIn PointSize
-               OpDecorate %vertexUniformBuffer1 Block
-               OpMemberDecorate %vertexUniformBuffer1 0 Offset 0
-               OpMemberDecorate %vertexUniformBuffer1 0 ColMajor
-               OpMemberDecorate %vertexUniformBuffer1 0 MatrixStride 8
+               OpDecorate %vertexUniformBuffer1_std140 Block
+               OpMemberDecorate %vertexUniformBuffer1_std140 0 Offset 0
+               OpMemberDecorate %vertexUniformBuffer1_std140 1 Offset 8
                OpDecorate %x_20 NonWritable
                OpDecorate %x_20 DescriptorSet 0
                OpDecorate %x_20 Binding 0
-               OpDecorate %vertexUniformBuffer2 Block
-               OpMemberDecorate %vertexUniformBuffer2 0 Offset 0
-               OpMemberDecorate %vertexUniformBuffer2 0 ColMajor
-               OpMemberDecorate %vertexUniformBuffer2 0 MatrixStride 8
+               OpDecorate %vertexUniformBuffer2_std140 Block
+               OpMemberDecorate %vertexUniformBuffer2_std140 0 Offset 0
+               OpMemberDecorate %vertexUniformBuffer2_std140 1 Offset 8
                OpDecorate %x_26 NonWritable
                OpDecorate %x_26 DescriptorSet 1
                OpDecorate %x_26 Binding 0
@@ -51,63 +51,77 @@
          %11 = OpConstantNull %float
 %vertex_point_size = OpVariable %_ptr_Output_float Output %11
     %v2float = OpTypeVector %float 2
+%vertexUniformBuffer1_std140 = OpTypeStruct %v2float %v2float
+%_ptr_Uniform_vertexUniformBuffer1_std140 = OpTypePointer Uniform %vertexUniformBuffer1_std140
+       %x_20 = OpVariable %_ptr_Uniform_vertexUniformBuffer1_std140 Uniform
+%vertexUniformBuffer2_std140 = OpTypeStruct %v2float %v2float
+%_ptr_Uniform_vertexUniformBuffer2_std140 = OpTypePointer Uniform %vertexUniformBuffer2_std140
+       %x_26 = OpVariable %_ptr_Uniform_vertexUniformBuffer2_std140 Uniform
 %mat2v2float = OpTypeMatrix %v2float 2
-%vertexUniformBuffer1 = OpTypeStruct %mat2v2float
-%_ptr_Uniform_vertexUniformBuffer1 = OpTypePointer Uniform %vertexUniformBuffer1
-       %x_20 = OpVariable %_ptr_Uniform_vertexUniformBuffer1 Uniform
-%vertexUniformBuffer2 = OpTypeStruct %mat2v2float
-%_ptr_Uniform_vertexUniformBuffer2 = OpTypePointer Uniform %vertexUniformBuffer2
-       %x_26 = OpVariable %_ptr_Uniform_vertexUniformBuffer2 Uniform
-         %20 = OpTypeFunction %v4float %uint
+         %19 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_1 = OpConstant %uint 1
+         %38 = OpTypeFunction %v4float %uint
      %uint_3 = OpConstant %uint 3
 %_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
 %_ptr_Function__arr_v2float_uint_3 = OpTypePointer Function %_arr_v2float_uint_3
-         %28 = OpConstantNull %_arr_v2float_uint_3
-     %uint_0 = OpConstant %uint 0
-%_ptr_Uniform_mat2v2float = OpTypePointer Uniform %mat2v2float
+         %46 = OpConstantNull %_arr_v2float_uint_3
    %float_n1 = OpConstant %float -1
     %float_1 = OpConstant %float 1
-         %37 = OpConstantComposite %v2float %float_n1 %float_1
-         %38 = OpConstantComposite %v2float %float_1 %float_1
-         %39 = OpConstantComposite %v2float %float_n1 %float_n1
-         %40 = OpConstantComposite %_arr_v2float_uint_3 %37 %38 %39
+         %51 = OpConstantComposite %v2float %float_n1 %float_1
+         %52 = OpConstantComposite %v2float %float_1 %float_1
+         %53 = OpConstantComposite %v2float %float_n1 %float_n1
+         %54 = OpConstantComposite %_arr_v2float_uint_3 %51 %52 %53
 %_ptr_Function_v2float = OpTypePointer Function %v2float
-         %44 = OpConstantNull %uint
-     %uint_1 = OpConstant %uint 1
+         %58 = OpConstantNull %uint
        %void = OpTypeVoid
-         %57 = OpTypeFunction %void
- %main_inner = OpFunction %v4float None %20
-%gl_VertexIndex = OpFunctionParameter %uint
-         %23 = OpLabel
-  %indexable = OpVariable %_ptr_Function__arr_v2float_uint_3 Function %28
-         %31 = OpAccessChain %_ptr_Uniform_mat2v2float %x_20 %uint_0
-         %32 = OpLoad %mat2v2float %31
-         %33 = OpAccessChain %_ptr_Uniform_mat2v2float %x_26 %uint_0
-         %34 = OpLoad %mat2v2float %33
-               OpStore %indexable %40
-         %42 = OpAccessChain %_ptr_Function_v2float %indexable %gl_VertexIndex
-         %43 = OpLoad %v2float %42
-         %45 = OpCompositeExtract %v2float %32 0
-         %46 = OpCompositeExtract %v2float %34 0
-         %47 = OpFAdd %v2float %45 %46
-         %49 = OpCompositeExtract %v2float %32 1
-         %50 = OpCompositeExtract %v2float %34 1
-         %51 = OpFAdd %v2float %49 %50
-         %52 = OpCompositeConstruct %mat2v2float %47 %51
-         %53 = OpMatrixTimesVector %v2float %52 %43
-         %54 = OpCompositeExtract %float %53 0
-         %55 = OpCompositeExtract %float %53 1
-         %56 = OpCompositeConstruct %v4float %54 %55 %11 %float_1
-               OpReturnValue %56
+         %70 = OpTypeFunction %void
+%load_x_20_transform1 = OpFunction %mat2v2float None %19
+         %22 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_v2float %x_20 %uint_0
+         %26 = OpLoad %v2float %25
+         %28 = OpAccessChain %_ptr_Uniform_v2float %x_20 %uint_1
+         %29 = OpLoad %v2float %28
+         %30 = OpCompositeConstruct %mat2v2float %26 %29
+               OpReturnValue %30
                OpFunctionEnd
-       %main = OpFunction %void None %57
-         %60 = OpLabel
-         %62 = OpLoad %uint %gl_VertexIndex_1
-         %61 = OpFunctionCall %v4float %main_inner %62
-               OpStore %value %61
+%load_x_26_transform2 = OpFunction %mat2v2float None %19
+         %32 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform_v2float %x_26 %uint_0
+         %34 = OpLoad %v2float %33
+         %35 = OpAccessChain %_ptr_Uniform_v2float %x_26 %uint_1
+         %36 = OpLoad %v2float %35
+         %37 = OpCompositeConstruct %mat2v2float %34 %36
+               OpReturnValue %37
+               OpFunctionEnd
+ %main_inner = OpFunction %v4float None %38
+%gl_VertexIndex = OpFunctionParameter %uint
+         %41 = OpLabel
+  %indexable = OpVariable %_ptr_Function__arr_v2float_uint_3 Function %46
+         %47 = OpFunctionCall %mat2v2float %load_x_20_transform1
+         %48 = OpFunctionCall %mat2v2float %load_x_26_transform2
+               OpStore %indexable %54
+         %56 = OpAccessChain %_ptr_Function_v2float %indexable %gl_VertexIndex
+         %57 = OpLoad %v2float %56
+         %59 = OpCompositeExtract %v2float %47 0
+         %60 = OpCompositeExtract %v2float %48 0
+         %61 = OpFAdd %v2float %59 %60
+         %62 = OpCompositeExtract %v2float %47 1
+         %63 = OpCompositeExtract %v2float %48 1
+         %64 = OpFAdd %v2float %62 %63
+         %65 = OpCompositeConstruct %mat2v2float %61 %64
+         %66 = OpMatrixTimesVector %v2float %65 %57
+         %67 = OpCompositeExtract %float %66 0
+         %68 = OpCompositeExtract %float %66 1
+         %69 = OpCompositeConstruct %v4float %67 %68 %11 %float_1
+               OpReturnValue %69
+               OpFunctionEnd
+       %main = OpFunction %void None %70
+         %73 = OpLabel
+         %75 = OpLoad %uint %gl_VertexIndex_1
+         %74 = OpFunctionCall %v4float %main_inner %75
+               OpStore %value %74
                OpStore %vertex_point_size %float_1
                OpReturn
                OpFunctionEnd
-1:1: Structure id 14 decorated as Block for variable in Uniform storage class must follow relaxed uniform buffer layout rules: member 0 is a matrix with stride 8 not satisfying alignment to 16
-  %vertexUniformBuffer1 = OpTypeStruct %mat2v2float
-
diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.glsl b/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.glsl
index c998d8f..086769e 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.glsl
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.glsl
@@ -6,13 +6,26 @@
   vec3 vector;
 };
 
-layout(binding = 0) uniform S_1 {
-  mat3x2 matrix;
+struct S_std140 {
+  vec2 matrix_0;
+  vec2 matrix_1;
+  vec2 matrix_2;
+  vec3 vector;
+};
+
+layout(binding = 0) uniform S_std140_1 {
+  vec2 matrix_0;
+  vec2 matrix_1;
+  vec2 matrix_2;
   vec3 vector;
 } data;
 
+mat3x2 load_data_matrix() {
+  return mat3x2(data.matrix_0, data.matrix_1, data.matrix_2);
+}
+
 void tint_symbol() {
-  vec2 x = (data.matrix * data.vector);
+  vec2 x = (load_data_matrix() * data.vector);
 }
 
 void main() {
diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm b/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm
index d0c5266..79f9c49 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm
@@ -1,50 +1,61 @@
-SKIP: FAILED
-
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 22
+; Bound: 33
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint Fragment %main "main"
                OpExecutionMode %main OriginUpperLeft
-               OpName %S "S"
-               OpMemberName %S 0 "matrix"
-               OpMemberName %S 1 "vector"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "matrix_0"
+               OpMemberName %S_std140 1 "matrix_1"
+               OpMemberName %S_std140 2 "matrix_2"
+               OpMemberName %S_std140 3 "vector"
                OpName %data "data"
+               OpName %load_data_matrix "load_data_matrix"
                OpName %main "main"
-               OpDecorate %S Block
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 0 ColMajor
-               OpMemberDecorate %S 0 MatrixStride 8
-               OpMemberDecorate %S 1 Offset 32
+               OpDecorate %S_std140 Block
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 32
                OpDecorate %data NonWritable
                OpDecorate %data DescriptorSet 0
                OpDecorate %data Binding 0
       %float = OpTypeFloat 32
     %v2float = OpTypeVector %float 2
-%mat3v2float = OpTypeMatrix %v2float 3
     %v3float = OpTypeVector %float 3
-          %S = OpTypeStruct %mat3v2float %v3float
-%_ptr_Uniform_S = OpTypePointer Uniform %S
-       %data = OpVariable %_ptr_Uniform_S Uniform
-       %void = OpTypeVoid
-          %8 = OpTypeFunction %void
+   %S_std140 = OpTypeStruct %v2float %v2float %v2float %v3float
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+       %data = OpVariable %_ptr_Uniform_S_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %7 = OpTypeFunction %mat3v2float
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
-%_ptr_Uniform_mat3v2float = OpTypePointer Uniform %mat3v2float
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
      %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %23 = OpTypeFunction %void
+     %uint_3 = OpConstant %uint 3
 %_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
-       %main = OpFunction %void None %8
-         %11 = OpLabel
-         %15 = OpAccessChain %_ptr_Uniform_mat3v2float %data %uint_0
-         %16 = OpLoad %mat3v2float %15
-         %19 = OpAccessChain %_ptr_Uniform_v3float %data %uint_1
-         %20 = OpLoad %v3float %19
-         %21 = OpMatrixTimesVector %v2float %16 %20
+%load_data_matrix = OpFunction %mat3v2float None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_v2float %data %uint_0
+         %15 = OpLoad %v2float %14
+         %17 = OpAccessChain %_ptr_Uniform_v2float %data %uint_1
+         %18 = OpLoad %v2float %17
+         %20 = OpAccessChain %_ptr_Uniform_v2float %data %uint_2
+         %21 = OpLoad %v2float %20
+         %22 = OpCompositeConstruct %mat3v2float %15 %18 %21
+               OpReturnValue %22
+               OpFunctionEnd
+       %main = OpFunction %void None %23
+         %26 = OpLabel
+         %27 = OpFunctionCall %mat3v2float %load_data_matrix
+         %30 = OpAccessChain %_ptr_Uniform_v3float %data %uint_3
+         %31 = OpLoad %v3float %30
+         %32 = OpMatrixTimesVector %v2float %27 %31
                OpReturn
                OpFunctionEnd
-1:1: Structure id 3 decorated as Block for variable in Uniform storage class must follow relaxed uniform buffer layout rules: member 0 is a matrix with stride 8 not satisfying alignment to 16
-  %S = OpTypeStruct %mat3v2float %v3float
-