transform: Fixes for DecomposeMemoryAccess

CloneContext::Replace(T* what, T* with) is bug-prone, as complex transforms may want to clone `what` multiple times, or not at all. In both cases, this will likely result in an ICE as either the replacement will be reachable multiple times, or not at all.

The CTS test: webgpu:shader,execution,robust_access:linear_memory:storageClass="storage";storageMode="read_write";access="read";atomic=true;baseType="i32"
Was triggering this brokenness with DecomposeMemoryAccess's use of CloneContext::Replace(T*, T*).

Switch the usage of CloneContext::Replace(T*, T*) to the new function form.

As std::function is copyable, it cannot hold a captured std::unique_ptr.
This prevented the Replace() lambdas from capturing the necessary `BufferAccess` data, as this held a `std::unique_ptr<Offset>`.
To fix this, use a `BlockAllocator` for Offsets, and use raw pointers instead.

Because the function passed to Replace() is called just before the node is cloned, insertion of new functions will occur just before the currently evaluated module-scope entity.
This allows us to remove the "insert_after" arguments to LoadFunc(), StoreFunc(), and AtomicFunc().
We can also kill the icky InsertGlobal() and TypeDeclOf() helpers.

Bug: tint:993
Change-Id: I60972bc13a2fa819a163ee2671f61e82d0e68d2a
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/58222
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/src/transform/decompose_memory_access.cc b/src/transform/decompose_memory_access.cc
index 1210c72..a43c4c0 100644
--- a/src/transform/decompose_memory_access.cc
+++ b/src/transform/decompose_memory_access.cc
@@ -26,6 +26,7 @@
 #include "src/ast/scalar_constructor_expression.h"
 #include "src/ast/type_name.h"
 #include "src/ast/unary_op.h"
+#include "src/block_allocator.h"
 #include "src/program_builder.h"
 #include "src/sem/array.h"
 #include "src/sem/atomic_type.h"
@@ -50,7 +51,7 @@
 /// offsets for storage and uniform buffer accesses.
 struct Offset : Castable<Offset> {
   /// @returns builds and returns the ast::Expression in `ctx.dst`
-  virtual ast::Expression* Build(CloneContext& ctx) = 0;
+  virtual ast::Expression* Build(CloneContext& ctx) const = 0;
 };
 
 /// OffsetExpr is an implementation of Offset that clones and casts the given
@@ -60,7 +61,7 @@
 
   explicit OffsetExpr(ast::Expression* e) : expr(e) {}
 
-  ast::Expression* Build(CloneContext& ctx) override {
+  ast::Expression* Build(CloneContext& ctx) const override {
     auto* type = ctx.src->Sem().Get(expr)->Type()->UnwrapRef();
     auto* res = ctx.Clone(expr);
     if (!type->Is<sem::U32>()) {
@@ -77,7 +78,7 @@
 
   explicit OffsetLiteral(uint32_t lit) : literal(lit) {}
 
-  ast::Expression* Build(CloneContext& ctx) override {
+  ast::Expression* Build(CloneContext& ctx) const override {
     return ctx.dst->Expr(literal);
   }
 };
@@ -86,103 +87,20 @@
 /// two Offsets.
 struct OffsetBinOp : Offset {
   ast::BinaryOp op;
-  std::unique_ptr<Offset> lhs;
-  std::unique_ptr<Offset> rhs;
+  Offset const* lhs = nullptr;
+  Offset const* rhs = nullptr;
 
-  ast::Expression* Build(CloneContext& ctx) override {
+  ast::Expression* Build(CloneContext& ctx) const override {
     return ctx.dst->create<ast::BinaryExpression>(op, lhs->Build(ctx),
                                                   rhs->Build(ctx));
   }
 };
 
-/// @returns an Offset for the given literal value
-std::unique_ptr<Offset> ToOffset(uint32_t offset) {
-  return std::make_unique<OffsetLiteral>(offset);
-}
-
-/// @returns an Offset for the given ast::Expression
-std::unique_ptr<Offset> ToOffset(ast::Expression* expr) {
-  if (auto* scalar = expr->As<ast::ScalarConstructorExpression>()) {
-    if (auto* u32 = scalar->literal()->As<ast::UintLiteral>()) {
-      return std::make_unique<OffsetLiteral>(u32->value());
-    } else if (auto* i32 = scalar->literal()->As<ast::SintLiteral>()) {
-      if (i32->value() > 0) {
-        return std::make_unique<OffsetLiteral>(i32->value());
-      }
-    }
-  }
-  return std::make_unique<OffsetExpr>(expr);
-}
-
-/// @returns the given offset (pass-through)
-std::unique_ptr<Offset> ToOffset(std::unique_ptr<Offset> offset) {
-  return offset;
-}
-
-/// @return an Offset that is a sum of lhs and rhs, performing basic constant
-/// folding if possible
-template <typename LHS, typename RHS>
-std::unique_ptr<Offset> Add(LHS&& lhs_, RHS&& rhs_) {
-  std::unique_ptr<Offset> lhs = ToOffset(std::forward<LHS>(lhs_));
-  std::unique_ptr<Offset> rhs = ToOffset(std::forward<RHS>(rhs_));
-  auto* lhs_lit = lhs->As<OffsetLiteral>();
-  auto* rhs_lit = rhs->As<OffsetLiteral>();
-  if (lhs_lit && lhs_lit->literal == 0) {
-    return rhs;
-  }
-  if (rhs_lit && rhs_lit->literal == 0) {
-    return lhs;
-  }
-  if (lhs_lit && rhs_lit) {
-    if (static_cast<uint64_t>(lhs_lit->literal) +
-            static_cast<uint64_t>(rhs_lit->literal) <=
-        0xffffffff) {
-      return std::make_unique<OffsetLiteral>(lhs_lit->literal +
-                                             rhs_lit->literal);
-    }
-  }
-  auto out = std::make_unique<OffsetBinOp>();
-  out->op = ast::BinaryOp::kAdd;
-  out->lhs = std::move(lhs);
-  out->rhs = std::move(rhs);
-  return out;
-}
-
-/// @return an Offset that is the multiplication of lhs and rhs, performing
-/// basic constant folding if possible
-template <typename LHS, typename RHS>
-std::unique_ptr<Offset> Mul(LHS&& lhs_, RHS&& rhs_) {
-  std::unique_ptr<Offset> lhs = ToOffset(std::forward<LHS>(lhs_));
-  std::unique_ptr<Offset> rhs = ToOffset(std::forward<RHS>(rhs_));
-  auto* lhs_lit = lhs->As<OffsetLiteral>();
-  auto* rhs_lit = rhs->As<OffsetLiteral>();
-  if (lhs_lit && lhs_lit->literal == 0) {
-    return std::make_unique<OffsetLiteral>(0);
-  }
-  if (rhs_lit && rhs_lit->literal == 0) {
-    return std::make_unique<OffsetLiteral>(0);
-  }
-  if (lhs_lit && lhs_lit->literal == 1) {
-    return rhs;
-  }
-  if (rhs_lit && rhs_lit->literal == 1) {
-    return lhs;
-  }
-  if (lhs_lit && rhs_lit) {
-    return std::make_unique<OffsetLiteral>(lhs_lit->literal * rhs_lit->literal);
-  }
-  auto out = std::make_unique<OffsetBinOp>();
-  out->op = ast::BinaryOp::kMultiply;
-  out->lhs = std::move(lhs);
-  out->rhs = std::move(rhs);
-  return out;
-}
-
 /// LoadStoreKey is the unordered map key to a load or store intrinsic.
 struct LoadStoreKey {
   ast::StorageClass const storage_class;  // buffer storage class
-  sem::Type const* buf_ty;                // buffer type
-  sem::Type const* el_ty;                 // element type
+  sem::Type const* buf_ty = nullptr;      // buffer type
+  sem::Type const* el_ty = nullptr;       // element type
   bool operator==(const LoadStoreKey& rhs) const {
     return storage_class == rhs.storage_class && buf_ty == rhs.buf_ty &&
            el_ty == rhs.el_ty;
@@ -196,9 +114,9 @@
 
 /// AtomicKey is the unordered map key to an atomic intrinsic.
 struct AtomicKey {
-  sem::Type const* buf_ty;      // buffer type
-  sem::Type const* el_ty;       // element type
-  sem::IntrinsicType const op;  // atomic op
+  sem::Type const* buf_ty = nullptr;  // buffer type
+  sem::Type const* el_ty = nullptr;   // element type
+  sem::IntrinsicType const op;        // atomic op
   bool operator==(const AtomicKey& rhs) const {
     return buf_ty == rhs.buf_ty && el_ty == rhs.el_ty && op == rhs.op;
   }
@@ -367,39 +285,10 @@
       builder->ID(), op, ast::StorageClass::kStorage, type);
 }
 
-/// Inserts `node` before `insert_after` in the global declarations of
-/// `ctx.dst`. If `insert_after` is nullptr, then `node` is inserted at the top
-/// of the module.
-void InsertGlobal(CloneContext& ctx,
-                  const Cloneable* insert_after,
-                  Cloneable* node) {
-  auto& globals = ctx.src->AST().GlobalDeclarations();
-  if (insert_after) {
-    ctx.InsertAfter(globals, insert_after, node);
-  } else {
-    ctx.InsertBefore(globals, *globals.begin(), node);
-  }
-}
-
-/// @returns the unwrapped, user-declared type of ty.
-const ast::TypeDecl* TypeDeclOf(const sem::Type* ty) {
-  while (true) {
-    if (auto* ref = ty->As<sem::Reference>()) {
-      ty = ref->StoreType();
-      continue;
-    }
-    if (auto* str = ty->As<sem::Struct>()) {
-      return str->Declaration();
-    }
-    // Not a declared type
-    return nullptr;
-  }
-}
-
 /// BufferAccess describes a single storage or uniform buffer access
 struct BufferAccess {
   sem::Expression const* var = nullptr;  // Storage buffer variable
-  std::unique_ptr<Offset> offset;        // The byte offset on var
+  Offset const* offset = nullptr;        // The byte offset on var
   sem::Type const* type = nullptr;       // The type of the access
   operator bool() const { return var; }  // Returns true if valid
 };
@@ -430,14 +319,105 @@
   std::unordered_map<AtomicKey, Symbol, AtomicKey::Hasher> atomic_funcs;
   /// List of storage or uniform buffer writes
   std::vector<Store> stores;
+  /// Allocations for offsets
+  BlockAllocator<Offset> offsets_;
+
+  /// @param offset the offset value to wrap in an Offset
+  /// @returns an Offset for the given literal value
+  const Offset* ToOffset(uint32_t offset) {
+    return offsets_.Create<OffsetLiteral>(offset);
+  }
+
+  /// @param expr the expression to convert to an Offset
+  /// @returns an Offset for the given ast::Expression
+  const Offset* ToOffset(ast::Expression* expr) {
+    if (auto* scalar = expr->As<ast::ScalarConstructorExpression>()) {
+      if (auto* u32 = scalar->literal()->As<ast::UintLiteral>()) {
+        return offsets_.Create<OffsetLiteral>(u32->value());
+      } else if (auto* i32 = scalar->literal()->As<ast::SintLiteral>()) {
+        if (i32->value() > 0) {
+          return offsets_.Create<OffsetLiteral>(i32->value());
+        }
+      }
+    }
+    return offsets_.Create<OffsetExpr>(expr);
+  }
+
+  /// @param offset the Offset that is returned
+  /// @returns the given offset (pass-through)
+  const Offset* ToOffset(const Offset* offset) { return offset; }
+
+  /// @param lhs_ the left-hand side of the add expression
+  /// @param rhs_ the right-hand side of the add expression
+  /// @return an Offset that is a sum of lhs and rhs, performing basic constant
+  /// folding if possible
+  template <typename LHS, typename RHS>
+  const Offset* Add(LHS&& lhs_, RHS&& rhs_) {
+    auto* lhs = ToOffset(std::forward<LHS>(lhs_));
+    auto* rhs = ToOffset(std::forward<RHS>(rhs_));
+    auto* lhs_lit = tint::As<OffsetLiteral>(lhs);
+    auto* rhs_lit = tint::As<OffsetLiteral>(rhs);
+    if (lhs_lit && lhs_lit->literal == 0) {
+      return rhs;
+    }
+    if (rhs_lit && rhs_lit->literal == 0) {
+      return lhs;
+    }
+    if (lhs_lit && rhs_lit) {
+      if (static_cast<uint64_t>(lhs_lit->literal) +
+              static_cast<uint64_t>(rhs_lit->literal) <=
+          0xffffffff) {
+        return offsets_.Create<OffsetLiteral>(lhs_lit->literal +
+                                              rhs_lit->literal);
+      }
+    }
+    auto* out = offsets_.Create<OffsetBinOp>();
+    out->op = ast::BinaryOp::kAdd;
+    out->lhs = lhs;
+    out->rhs = rhs;
+    return out;
+  }
+
+  /// @param lhs_ the left-hand side of the multiply expression
+  /// @param rhs_ the right-hand side of the multiply expression
+  /// @return an Offset that is the multiplication of lhs and rhs, performing
+  /// basic constant folding if possible
+  template <typename LHS, typename RHS>
+  const Offset* Mul(LHS&& lhs_, RHS&& rhs_) {
+    auto* lhs = ToOffset(std::forward<LHS>(lhs_));
+    auto* rhs = ToOffset(std::forward<RHS>(rhs_));
+    auto* lhs_lit = tint::As<OffsetLiteral>(lhs);
+    auto* rhs_lit = tint::As<OffsetLiteral>(rhs);
+    if (lhs_lit && lhs_lit->literal == 0) {
+      return offsets_.Create<OffsetLiteral>(0);
+    }
+    if (rhs_lit && rhs_lit->literal == 0) {
+      return offsets_.Create<OffsetLiteral>(0);
+    }
+    if (lhs_lit && lhs_lit->literal == 1) {
+      return rhs;
+    }
+    if (rhs_lit && rhs_lit->literal == 1) {
+      return lhs;
+    }
+    if (lhs_lit && rhs_lit) {
+      return offsets_.Create<OffsetLiteral>(lhs_lit->literal *
+                                            rhs_lit->literal);
+    }
+    auto* out = offsets_.Create<OffsetBinOp>();
+    out->op = ast::BinaryOp::kMultiply;
+    out->lhs = lhs;
+    out->rhs = rhs;
+    return out;
+  }
 
   /// AddAccess() adds the `expr -> access` map item to #accesses, and `expr`
   /// to #expression_order.
   /// @param expr the expression that performs the access
   /// @param access the access
-  void AddAccess(ast::Expression* expr, BufferAccess&& access) {
+  void AddAccess(ast::Expression* expr, const BufferAccess& access) {
     TINT_ASSERT(Transform, access.type);
-    accesses.emplace(expr, std::move(access));
+    accesses.emplace(expr, access);
     expression_order.emplace_back(expr);
   }
 
@@ -451,7 +431,7 @@
     if (lhs_it == accesses.end()) {
       return {};
     }
-    auto access = std::move(lhs_it->second);
+    auto access = lhs_it->second;
     accesses.erase(node);
     return access;
   }
@@ -461,13 +441,11 @@
   /// The emitted function has the signature:
   ///   `fn load(buf : buf_ty, offset : u32) -> el_ty`
   /// @param ctx the CloneContext
-  /// @param insert_after the user-declared type to insert the function after
   /// @param buf_ty the storage or uniform buffer type
   /// @param el_ty the storage or uniform buffer element type
   /// @param var_user the variable user
   /// @return the name of the function that performs the load
   Symbol LoadFunc(CloneContext& ctx,
-                  const ast::TypeDecl* insert_after,
                   const sem::Type* buf_ty,
                   const sem::Type* el_ty,
                   const sem::VariableUser* var_user) {
@@ -509,8 +487,7 @@
             ast::ExpressionList values;
             if (auto* mat_ty = el_ty->As<sem::Matrix>()) {
               auto* vec_ty = mat_ty->ColumnType();
-              Symbol load =
-                  LoadFunc(ctx, insert_after, buf_ty, vec_ty, var_user);
+              Symbol load = LoadFunc(ctx, buf_ty, vec_ty, var_user);
               for (uint32_t i = 0; i < mat_ty->columns(); i++) {
                 auto* offset =
                     ctx.dst->Add("offset", i * MatrixColumnStride(mat_ty));
@@ -519,14 +496,14 @@
             } else if (auto* str = el_ty->As<sem::Struct>()) {
               for (auto* member : str->Members()) {
                 auto* offset = ctx.dst->Add("offset", member->Offset());
-                Symbol load = LoadFunc(ctx, insert_after, buf_ty,
-                                       member->Type()->UnwrapRef(), var_user);
+                Symbol load = LoadFunc(ctx, buf_ty, member->Type()->UnwrapRef(),
+                                       var_user);
                 values.emplace_back(ctx.dst->Call(load, "buffer", offset));
               }
             } else if (auto* arr = el_ty->As<sem::Array>()) {
               for (uint32_t i = 0; i < arr->Count(); i++) {
                 auto* offset = ctx.dst->Add("offset", arr->Stride() * i);
-                Symbol load = LoadFunc(ctx, insert_after, buf_ty,
+                Symbol load = LoadFunc(ctx, buf_ty,
                                        arr->ElemType()->UnwrapRef(), var_user);
                 values.emplace_back(ctx.dst->Call(load, "buffer", offset));
               }
@@ -539,7 +516,7 @@
                         CreateASTTypeFor(&ctx, el_ty), values))),
                 ast::DecorationList{}, ast::DecorationList{});
           }
-          InsertGlobal(ctx, insert_after, func);
+          ctx.dst->AST().AddFunction(func);
           return func->symbol();
         });
   }
@@ -549,13 +526,11 @@
   /// The function has the signature:
   ///   `fn store(buf : buf_ty, offset : u32, value : el_ty)`
   /// @param ctx the CloneContext
-  /// @param insert_after the user-declared type to insert the function after
   /// @param buf_ty the storage buffer type
   /// @param el_ty the storage buffer element type
   /// @param var_user the variable user
   /// @return the name of the function that performs the store
   Symbol StoreFunc(CloneContext& ctx,
-                   const ast::TypeDecl* insert_after,
                    const sem::Type* buf_ty,
                    const sem::Type* el_ty,
                    const sem::VariableUser* var_user) {
@@ -597,8 +572,7 @@
             ast::StatementList body;
             if (auto* mat_ty = el_ty->As<sem::Matrix>()) {
               auto* vec_ty = mat_ty->ColumnType();
-              Symbol store =
-                  StoreFunc(ctx, insert_after, buf_ty, vec_ty, var_user);
+              Symbol store = StoreFunc(ctx, buf_ty, vec_ty, var_user);
               for (uint32_t i = 0; i < mat_ty->columns(); i++) {
                 auto* offset =
                     ctx.dst->Add("offset", i * MatrixColumnStride(mat_ty));
@@ -611,7 +585,7 @@
                 auto* offset = ctx.dst->Add("offset", member->Offset());
                 auto* access = ctx.dst->MemberAccessor(
                     "value", ctx.Clone(member->Declaration()->symbol()));
-                Symbol store = StoreFunc(ctx, insert_after, buf_ty,
+                Symbol store = StoreFunc(ctx, buf_ty,
                                          member->Type()->UnwrapRef(), var_user);
                 auto* call = ctx.dst->Call(store, "buffer", offset, access);
                 body.emplace_back(ctx.dst->create<ast::CallStatement>(call));
@@ -621,9 +595,8 @@
                 auto* offset = ctx.dst->Add("offset", arr->Stride() * i);
                 auto* access =
                     ctx.dst->IndexAccessor("value", ctx.dst->Expr(i));
-                Symbol store =
-                    StoreFunc(ctx, insert_after, buf_ty,
-                              arr->ElemType()->UnwrapRef(), var_user);
+                Symbol store = StoreFunc(
+                    ctx, buf_ty, arr->ElemType()->UnwrapRef(), var_user);
                 auto* call = ctx.dst->Call(store, "buffer", offset, access);
                 body.emplace_back(ctx.dst->create<ast::CallStatement>(call));
               }
@@ -634,7 +607,7 @@
                 ast::DecorationList{});
           }
 
-          InsertGlobal(ctx, insert_after, func);
+          ctx.dst->AST().AddFunction(func);
           return func->symbol();
         });
   }
@@ -644,14 +617,12 @@
   /// the signature:
   // `fn atomic_op(buf : buf_ty, offset : u32, ...) -> T`
   /// @param ctx the CloneContext
-  /// @param insert_after the user-declared type to insert the function after
   /// @param buf_ty the storage buffer type
   /// @param el_ty the storage buffer element type
   /// @param intrinsic the atomic intrinsic
   /// @param var_user the variable user
   /// @return the name of the function that performs the load
   Symbol AtomicFunc(CloneContext& ctx,
-                    const ast::TypeDecl* insert_after,
                     const sem::Type* buf_ty,
                     const sem::Type* el_ty,
                     const sem::Intrinsic* intrinsic,
@@ -700,7 +671,7 @@
           },
           ast::DecorationList{});
 
-      InsertGlobal(ctx, insert_after, func);
+      ctx.dst->AST().AddFunction(func);
       return func->symbol();
     });
   }
@@ -825,7 +796,7 @@
           // Variable to a storage or uniform buffer
           state.AddAccess(ident, {
                                      var,
-                                     ToOffset(0u),
+                                     state.ToOffset(0u),
                                      var->Type()->UnwrapRef(),
                                  });
         }
@@ -840,14 +811,13 @@
         if (swizzle->Indices().size() == 1) {
           if (auto access = state.TakeAccess(accessor->structure())) {
             auto* vec_ty = access.type->As<sem::Vector>();
-            auto offset =
-                Mul(ScalarSize(vec_ty->type()), swizzle->Indices()[0]);
-            state.AddAccess(
-                accessor, {
-                              access.var,
-                              Add(std::move(access.offset), std::move(offset)),
-                              vec_ty->type()->UnwrapRef(),
-                          });
+            auto* offset =
+                state.Mul(ScalarSize(vec_ty->type()), swizzle->Indices()[0]);
+            state.AddAccess(accessor, {
+                                          access.var,
+                                          state.Add(access.offset, offset),
+                                          vec_ty->type()->UnwrapRef(),
+                                      });
           }
         }
       } else {
@@ -855,12 +825,11 @@
           auto* str_ty = access.type->As<sem::Struct>();
           auto* member = str_ty->FindMember(accessor->member()->symbol());
           auto offset = member->Offset();
-          state.AddAccess(accessor,
-                          {
-                              access.var,
-                              Add(std::move(access.offset), std::move(offset)),
-                              member->Type()->UnwrapRef(),
-                          });
+          state.AddAccess(accessor, {
+                                        access.var,
+                                        state.Add(access.offset, offset),
+                                        member->Type()->UnwrapRef(),
+                                    });
         }
       }
       continue;
@@ -870,33 +839,32 @@
       if (auto access = state.TakeAccess(accessor->array())) {
         // X[Y]
         if (auto* arr = access.type->As<sem::Array>()) {
-          auto offset = Mul(arr->Stride(), accessor->idx_expr());
-          state.AddAccess(accessor,
-                          {
-                              access.var,
-                              Add(std::move(access.offset), std::move(offset)),
-                              arr->ElemType()->UnwrapRef(),
-                          });
+          auto* offset = state.Mul(arr->Stride(), accessor->idx_expr());
+          state.AddAccess(accessor, {
+                                        access.var,
+                                        state.Add(access.offset, offset),
+                                        arr->ElemType()->UnwrapRef(),
+                                    });
           continue;
         }
         if (auto* vec_ty = access.type->As<sem::Vector>()) {
-          auto offset = Mul(ScalarSize(vec_ty->type()), accessor->idx_expr());
-          state.AddAccess(accessor,
-                          {
-                              access.var,
-                              Add(std::move(access.offset), std::move(offset)),
-                              vec_ty->type()->UnwrapRef(),
-                          });
+          auto* offset =
+              state.Mul(ScalarSize(vec_ty->type()), accessor->idx_expr());
+          state.AddAccess(accessor, {
+                                        access.var,
+                                        state.Add(access.offset, offset),
+                                        vec_ty->type()->UnwrapRef(),
+                                    });
           continue;
         }
         if (auto* mat_ty = access.type->As<sem::Matrix>()) {
-          auto offset = Mul(MatrixColumnStride(mat_ty), accessor->idx_expr());
-          state.AddAccess(accessor,
-                          {
-                              access.var,
-                              Add(std::move(access.offset), std::move(offset)),
-                              mat_ty->ColumnType(),
-                          });
+          auto* offset =
+              state.Mul(MatrixColumnStride(mat_ty), accessor->idx_expr());
+          state.AddAccess(accessor, {
+                                        access.var,
+                                        state.Add(access.offset, offset),
+                                        mat_ty->ColumnType(),
+                                    });
           continue;
         }
       }
@@ -908,7 +876,7 @@
         if (auto access = state.TakeAccess(op->expr())) {
           // HLSL does not support pointers, so just take the access from the
           // reference and place it on the pointer.
-          state.AddAccess(op, std::move(access));
+          state.AddAccess(op, access);
           continue;
         }
       }
@@ -918,7 +886,7 @@
       // X = Y
       // Move the LHS access to a store.
       if (auto lhs = state.TakeAccess(assign->lhs())) {
-        state.stores.emplace_back(Store{assign, std::move(lhs)});
+        state.stores.emplace_back(Store{assign, lhs});
       }
     }
 
@@ -934,23 +902,22 @@
         if (intrinsic->IsAtomic()) {
           if (auto access = state.TakeAccess(call_expr->params()[0])) {
             // atomic___(X)
+            ctx.Replace(call_expr, [=, &ctx, &state] {
+              auto* buf = access.var->Declaration();
+              auto* offset = access.offset->Build(ctx);
+              auto* buf_ty = access.var->Type()->UnwrapRef();
+              auto* el_ty = access.type->UnwrapRef()->As<sem::Atomic>()->Type();
+              Symbol func =
+                  state.AtomicFunc(ctx, buf_ty, el_ty, intrinsic,
+                                   access.var->As<sem::VariableUser>());
 
-            auto* buf = access.var->Declaration();
-            auto* offset = access.offset->Build(ctx);
-            auto* buf_ty = access.var->Type()->UnwrapRef();
-            auto* el_ty = access.type->UnwrapRef()->As<sem::Atomic>()->Type();
-            auto* insert_after = TypeDeclOf(access.var->Type());
-            Symbol func =
-                state.AtomicFunc(ctx, insert_after, buf_ty, el_ty, intrinsic,
-                                 access.var->As<sem::VariableUser>());
-
-            ast::ExpressionList args{ctx.Clone(buf), offset};
-            for (size_t i = 1; i < call_expr->params().size(); i++) {
-              auto* arg = call_expr->params()[i];
-              args.emplace_back(ctx.Clone(arg));
-            }
-
-            ctx.Replace(call_expr, ctx.dst->Call(func, args));
+              ast::ExpressionList args{ctx.Clone(buf), offset};
+              for (size_t i = 1; i < call_expr->params().size(); i++) {
+                auto* arg = call_expr->params()[i];
+                args.emplace_back(ctx.Clone(arg));
+              }
+              return ctx.dst->Call(func, args);
+            });
           }
         }
       }
@@ -964,36 +931,32 @@
     if (access_it == state.accesses.end()) {
       continue;
     }
-
-    auto access = std::move(access_it->second);
-
-    auto* buf = access.var->Declaration();
-    auto* offset = access.offset->Build(ctx);
-    auto* buf_ty = access.var->Type()->UnwrapRef();
-    auto* el_ty = access.type->UnwrapRef();
-    auto* insert_after = TypeDeclOf(access.var->Type());
-    Symbol func = state.LoadFunc(ctx, insert_after, buf_ty, el_ty,
-                                 access.var->As<sem::VariableUser>());
-
-    auto* load = ctx.dst->Call(func, ctx.Clone(buf), offset);
-
-    ctx.Replace(expr, load);
+    BufferAccess access = access_it->second;
+    ctx.Replace(expr, [=, &ctx, &state] {
+      auto* buf = access.var->Declaration();
+      auto* offset = access.offset->Build(ctx);
+      auto* buf_ty = access.var->Type()->UnwrapRef();
+      auto* el_ty = access.type->UnwrapRef();
+      Symbol func = state.LoadFunc(ctx, buf_ty, el_ty,
+                                   access.var->As<sem::VariableUser>());
+      return ctx.dst->Call(func, ctx.CloneWithoutTransform(buf), offset);
+    });
   }
 
   // And replace all storage and uniform buffer assignments with stores
-  for (auto& store : state.stores) {
-    auto* buf = store.target.var->Declaration();
-    auto* offset = store.target.offset->Build(ctx);
-    auto* buf_ty = store.target.var->Type()->UnwrapRef();
-    auto* el_ty = store.target.type->UnwrapRef();
-    auto* value = store.assignment->rhs();
-    auto* insert_after = TypeDeclOf(store.target.var->Type());
-    Symbol func = state.StoreFunc(ctx, insert_after, buf_ty, el_ty,
-                                  store.target.var->As<sem::VariableUser>());
-
-    auto* call = ctx.dst->Call(func, ctx.Clone(buf), offset, ctx.Clone(value));
-
-    ctx.Replace(store.assignment, ctx.dst->create<ast::CallStatement>(call));
+  for (auto store : state.stores) {
+    ctx.Replace(store.assignment, [=, &ctx, &state] {
+      auto* buf = store.target.var->Declaration();
+      auto* offset = store.target.offset->Build(ctx);
+      auto* buf_ty = store.target.var->Type()->UnwrapRef();
+      auto* el_ty = store.target.type->UnwrapRef();
+      auto* value = store.assignment->rhs();
+      Symbol func = state.StoreFunc(ctx, buf_ty, el_ty,
+                                    store.target.var->As<sem::VariableUser>());
+      auto* call = ctx.dst->Call(func, ctx.CloneWithoutTransform(buf), offset,
+                                 ctx.Clone(value));
+      return ctx.dst->create<ast::CallStatement>(call);
+    });
   }
 
   ctx.Clone();
diff --git a/src/transform/decompose_memory_access_test.cc b/src/transform/decompose_memory_access_test.cc
index 0606962..5e9e60d 100644
--- a/src/transform/decompose_memory_access_test.cc
+++ b/src/transform/decompose_memory_access_test.cc
@@ -106,6 +106,8 @@
   v : array<vec3<f32>, 2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_load_storage_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32) -> i32
 
@@ -182,8 +184,6 @@
   return array<vec3<f32>, 2>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var a : i32 = tint_symbol(sb, 0u);
@@ -300,6 +300,8 @@
   v : array<vec3<f32>, 2>;
 };
 
+[[group(0), binding(0)]] var<uniform> ub : UB;
+
 [[internal(intrinsic_load_uniform_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : UB, offset : u32) -> i32
 
@@ -376,8 +378,6 @@
   return array<vec3<f32>, 2>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-[[group(0), binding(0)]] var<uniform> ub : UB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var a : i32 = tint_symbol(ub, 0u);
@@ -494,6 +494,8 @@
   v : array<vec3<f32>, 2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_store_storage_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32, value : i32)
 
@@ -589,8 +591,6 @@
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
 }
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   tint_symbol(sb, 0u, i32());
@@ -686,6 +686,8 @@
   v : array<vec3<f32>, 2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_load_storage_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32) -> i32
 
@@ -766,8 +768,6 @@
   return SB(tint_symbol(buffer, (offset + 0u)), tint_symbol_1(buffer, (offset + 4u)), tint_symbol_2(buffer, (offset + 8u)), tint_symbol_3(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)), tint_symbol_5(buffer, (offset + 32u)), tint_symbol_6(buffer, (offset + 48u)), tint_symbol_7(buffer, (offset + 64u)), tint_symbol_8(buffer, (offset + 80u)), tint_symbol_9(buffer, (offset + 96u)), tint_symbol_10(buffer, (offset + 112u)), tint_symbol_11(buffer, (offset + 128u)), tint_symbol_12(buffer, (offset + 144u)), tint_symbol_13(buffer, (offset + 160u)), tint_symbol_14(buffer, (offset + 192u)), tint_symbol_15(buffer, (offset + 224u)), tint_symbol_16(buffer, (offset + 256u)), tint_symbol_17(buffer, (offset + 304u)), tint_symbol_18(buffer, (offset + 352u)), tint_symbol_19(buffer, (offset + 384u)), tint_symbol_20(buffer, (offset + 448u)), tint_symbol_21(buffer, (offset + 512u)));
 }
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var x : SB = tint_symbol_22(sb, 0u);
@@ -842,6 +842,8 @@
   v : array<vec3<f32>, 2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_store_storage_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32, value : i32)
 
@@ -962,8 +964,6 @@
   tint_symbol_21(buffer, (offset + 512u), value.v);
 }
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   tint_symbol_22(sb, 0u, SB());
@@ -1031,11 +1031,11 @@
   b : [[stride(256)]] array<S2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_load_storage_f32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32) -> f32
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var x : f32 = tint_symbol(sb, 1224u);
@@ -1099,11 +1099,11 @@
   b : [[stride(256)]] array<S2>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_load_storage_f32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32) -> f32
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var i : i32 = 4;
@@ -1186,11 +1186,11 @@
   b : A2_Array;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_load_storage_f32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32) -> f32
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   var i : i32 = 4;
@@ -1250,6 +1250,8 @@
   b : atomic<u32>;
 };
 
+[[group(0), binding(0)]] var<storage, read_write> sb : SB;
+
 [[internal(intrinsic_atomic_store_storage_i32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32, param_1 : i32)
 
@@ -1310,8 +1312,6 @@
 [[internal(intrinsic_atomic_compare_exchange_weak_storage_u32), internal(disable_validation__function_has_no_body)]]
 fn tint_symbol_19([[internal(disable_validation__ignore_constructible_function_parameter)]] buffer : SB, offset : u32, param_1 : u32, param_2 : u32) -> vec2<u32>
 
-[[group(0), binding(0)]] var<storage, read_write> sb : SB;
-
 [[stage(compute), workgroup_size(1)]]
 fn main() {
   tint_symbol(sb, 16u, 123);
diff --git a/src/writer/hlsl/generator_impl_member_accessor_test.cc b/src/writer/hlsl/generator_impl_member_accessor_test.cc
index 1205e93..10d7cb9 100644
--- a/src/writer/hlsl/generator_impl_member_accessor_test.cc
+++ b/src/writer/hlsl/generator_impl_member_accessor_test.cc
@@ -341,13 +341,13 @@
 
   ASSERT_TRUE(gen.Generate()) << gen.error();
   auto* expected =
-      R"(void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+      R"(RWByteAddressBuffer data : register(u0, space1);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 }
 
-RWByteAddressBuffer data : register(u0, space1);
-
 void main() {
   tint_symbol_1(data, 16u, float2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
   return;
diff --git a/test/array/assign_to_function_var.wgsl.expected.hlsl b/test/array/assign_to_function_var.wgsl.expected.hlsl
index 7fc6d52..2c7d6be 100644
--- a/test/array/assign_to_function_var.wgsl.expected.hlsl
+++ b/test/array/assign_to_function_var.wgsl.expected.hlsl
@@ -10,22 +10,6 @@
   tint_padded_array_element arr[4];
 };
 
-typedef tint_padded_array_element tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  const tint_padded_array_element tint_symbol_5[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
-  return tint_symbol_5;
-}
-
-typedef tint_padded_array_element tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
-  const tint_padded_array_element tint_symbol_6[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
-  return tint_symbol_6;
-}
-
 static tint_padded_array_element src_private[4] = (tint_padded_array_element[4])0;
 groupshared tint_padded_array_element src_workgroup[4];
 cbuffer cbuffer_src_uniform : register(b0, space0) {
@@ -35,12 +19,28 @@
 
 typedef tint_padded_array_element ret_arr_ret[4];
 ret_arr_ret ret_arr() {
-  const tint_padded_array_element tint_symbol_7[4] = (tint_padded_array_element[4])0;
-  return tint_symbol_7;
+  const tint_padded_array_element tint_symbol_5[4] = (tint_padded_array_element[4])0;
+  return tint_symbol_5;
 }
 
 S ret_struct_arr() {
-  const S tint_symbol_8 = (S)0;
+  const S tint_symbol_6 = (S)0;
+  return tint_symbol_6;
+}
+
+typedef tint_padded_array_element tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  const tint_padded_array_element tint_symbol_7[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
+  return tint_symbol_7;
+}
+
+typedef tint_padded_array_element tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
+  const tint_padded_array_element tint_symbol_8[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
   return tint_symbol_8;
 }
 
diff --git a/test/array/assign_to_private_var.wgsl.expected.hlsl b/test/array/assign_to_private_var.wgsl.expected.hlsl
index 55e7cc6..68ea5c3 100644
--- a/test/array/assign_to_private_var.wgsl.expected.hlsl
+++ b/test/array/assign_to_private_var.wgsl.expected.hlsl
@@ -10,22 +10,6 @@
   tint_padded_array_element arr[4];
 };
 
-typedef tint_padded_array_element tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  const tint_padded_array_element tint_symbol_5[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
-  return tint_symbol_5;
-}
-
-typedef tint_padded_array_element tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
-  const tint_padded_array_element tint_symbol_6[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
-  return tint_symbol_6;
-}
-
 static tint_padded_array_element src_private[4] = (tint_padded_array_element[4])0;
 groupshared tint_padded_array_element src_workgroup[4];
 cbuffer cbuffer_src_uniform : register(b0, space0) {
@@ -37,12 +21,28 @@
 
 typedef tint_padded_array_element ret_arr_ret[4];
 ret_arr_ret ret_arr() {
-  const tint_padded_array_element tint_symbol_7[4] = (tint_padded_array_element[4])0;
-  return tint_symbol_7;
+  const tint_padded_array_element tint_symbol_5[4] = (tint_padded_array_element[4])0;
+  return tint_symbol_5;
 }
 
 S ret_struct_arr() {
-  const S tint_symbol_8 = (S)0;
+  const S tint_symbol_6 = (S)0;
+  return tint_symbol_6;
+}
+
+typedef tint_padded_array_element tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  const tint_padded_array_element tint_symbol_7[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
+  return tint_symbol_7;
+}
+
+typedef tint_padded_array_element tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
+  const tint_padded_array_element tint_symbol_8[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
   return tint_symbol_8;
 }
 
diff --git a/test/array/assign_to_storage_var.wgsl.expected.hlsl b/test/array/assign_to_storage_var.wgsl.expected.hlsl
index bf9232b..ee58066 100644
--- a/test/array/assign_to_storage_var.wgsl.expected.hlsl
+++ b/test/array/assign_to_storage_var.wgsl.expected.hlsl
@@ -10,29 +10,49 @@
   tint_padded_array_element arr[4];
 };
 
-typedef tint_padded_array_element tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  const tint_padded_array_element tint_symbol_11[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
+static tint_padded_array_element src_private[4] = (tint_padded_array_element[4])0;
+groupshared tint_padded_array_element src_workgroup[4];
+cbuffer cbuffer_src_uniform : register(b0, space0) {
+  uint4 src_uniform[4];
+};
+RWByteAddressBuffer src_storage : register(u1, space0);
+RWByteAddressBuffer tint_symbol : register(u2, space0);
+RWByteAddressBuffer dst_nested : register(u3, space0);
+
+typedef tint_padded_array_element ret_arr_ret[4];
+ret_arr_ret ret_arr() {
+  const tint_padded_array_element tint_symbol_11[4] = (tint_padded_array_element[4])0;
   return tint_symbol_11;
 }
 
-typedef tint_padded_array_element tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
-  const tint_padded_array_element tint_symbol_12[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
+S ret_struct_arr() {
+  const S tint_symbol_12 = (S)0;
   return tint_symbol_12;
 }
 
-void tint_symbol_6(RWByteAddressBuffer buffer, uint offset, tint_padded_array_element value[4]) {
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, tint_padded_array_element value[4]) {
   buffer.Store((offset + 0u), asuint(value[0u].el));
   buffer.Store((offset + 16u), asuint(value[1u].el));
   buffer.Store((offset + 32u), asuint(value[2u].el));
   buffer.Store((offset + 48u), asuint(value[3u].el));
 }
 
+typedef tint_padded_array_element tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  const tint_padded_array_element tint_symbol_13[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
+  return tint_symbol_13;
+}
+
+typedef tint_padded_array_element tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(RWByteAddressBuffer buffer, uint offset) {
+  const tint_padded_array_element tint_symbol_14[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
+  return tint_symbol_14;
+}
+
 void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, int value[2]) {
   buffer.Store((offset + 0u), asuint(value[0u]));
   buffer.Store((offset + 4u), asuint(value[1u]));
@@ -51,40 +71,20 @@
   tint_symbol_9(buffer, (offset + 72u), value[3u]);
 }
 
-static tint_padded_array_element src_private[4] = (tint_padded_array_element[4])0;
-groupshared tint_padded_array_element src_workgroup[4];
-cbuffer cbuffer_src_uniform : register(b0, space0) {
-  uint4 src_uniform[4];
-};
-RWByteAddressBuffer src_storage : register(u1, space0);
-RWByteAddressBuffer tint_symbol : register(u2, space0);
-RWByteAddressBuffer dst_nested : register(u3, space0);
-
-typedef tint_padded_array_element ret_arr_ret[4];
-ret_arr_ret ret_arr() {
-  const tint_padded_array_element tint_symbol_13[4] = (tint_padded_array_element[4])0;
-  return tint_symbol_13;
-}
-
-S ret_struct_arr() {
-  const S tint_symbol_14 = (S)0;
-  return tint_symbol_14;
-}
-
 void foo(tint_padded_array_element src_param[4]) {
   tint_padded_array_element src_function[4] = (tint_padded_array_element[4])0;
   const tint_padded_array_element tint_symbol_15[4] = {{1}, {2}, {3}, {3}};
-  tint_symbol_6(tint_symbol, 0u, tint_symbol_15);
-  tint_symbol_6(tint_symbol, 0u, src_param);
-  tint_symbol_6(tint_symbol, 0u, ret_arr());
+  tint_symbol_2(tint_symbol, 0u, tint_symbol_15);
+  tint_symbol_2(tint_symbol, 0u, src_param);
+  tint_symbol_2(tint_symbol, 0u, ret_arr());
   const tint_padded_array_element src_let[4] = (tint_padded_array_element[4])0;
-  tint_symbol_6(tint_symbol, 0u, src_let);
-  tint_symbol_6(tint_symbol, 0u, src_function);
-  tint_symbol_6(tint_symbol, 0u, src_private);
-  tint_symbol_6(tint_symbol, 0u, src_workgroup);
-  tint_symbol_6(tint_symbol, 0u, ret_struct_arr().arr);
-  tint_symbol_6(tint_symbol, 0u, tint_symbol_2(src_uniform, 0u));
-  tint_symbol_6(tint_symbol, 0u, tint_symbol_4(src_storage, 0u));
+  tint_symbol_2(tint_symbol, 0u, src_let);
+  tint_symbol_2(tint_symbol, 0u, src_function);
+  tint_symbol_2(tint_symbol, 0u, src_private);
+  tint_symbol_2(tint_symbol, 0u, src_workgroup);
+  tint_symbol_2(tint_symbol, 0u, ret_struct_arr().arr);
+  tint_symbol_2(tint_symbol, 0u, tint_symbol_4(src_uniform, 0u));
+  tint_symbol_2(tint_symbol, 0u, tint_symbol_6(src_storage, 0u));
   int src_nested[4][3][2] = (int[4][3][2])0;
   tint_symbol_10(dst_nested, 0u, src_nested);
 }
diff --git a/test/array/assign_to_workgroup_var.wgsl.expected.hlsl b/test/array/assign_to_workgroup_var.wgsl.expected.hlsl
index 65d3fc9..48c5fae 100644
--- a/test/array/assign_to_workgroup_var.wgsl.expected.hlsl
+++ b/test/array/assign_to_workgroup_var.wgsl.expected.hlsl
@@ -10,22 +10,6 @@
   tint_padded_array_element arr[4];
 };
 
-typedef tint_padded_array_element tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  const tint_padded_array_element tint_symbol_5[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
-  return tint_symbol_5;
-}
-
-typedef tint_padded_array_element tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
-  const tint_padded_array_element tint_symbol_6[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
-  return tint_symbol_6;
-}
-
 static tint_padded_array_element src_private[4] = (tint_padded_array_element[4])0;
 groupshared tint_padded_array_element src_workgroup[4];
 cbuffer cbuffer_src_uniform : register(b0, space0) {
@@ -37,12 +21,28 @@
 
 typedef tint_padded_array_element ret_arr_ret[4];
 ret_arr_ret ret_arr() {
-  const tint_padded_array_element tint_symbol_7[4] = (tint_padded_array_element[4])0;
-  return tint_symbol_7;
+  const tint_padded_array_element tint_symbol_5[4] = (tint_padded_array_element[4])0;
+  return tint_symbol_5;
 }
 
 S ret_struct_arr() {
-  const S tint_symbol_8 = (S)0;
+  const S tint_symbol_6 = (S)0;
+  return tint_symbol_6;
+}
+
+typedef tint_padded_array_element tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  const tint_padded_array_element tint_symbol_7[4] = {{asint(buffer[scalar_offset / 4][scalar_offset % 4])}, {asint(buffer[scalar_offset_1 / 4][scalar_offset_1 % 4])}, {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4])}, {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])}};
+  return tint_symbol_7;
+}
+
+typedef tint_padded_array_element tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(RWByteAddressBuffer buffer, uint offset) {
+  const tint_padded_array_element tint_symbol_8[4] = {{asint(buffer.Load((offset + 0u)))}, {asint(buffer.Load((offset + 16u)))}, {asint(buffer.Load((offset + 32u)))}, {asint(buffer.Load((offset + 48u)))}};
   return tint_symbol_8;
 }
 
diff --git a/test/buffer/storage/dynamic_index/read.wgsl.expected.hlsl b/test/buffer/storage/dynamic_index/read.wgsl.expected.hlsl
index 89fc6b9..9b9390a 100644
--- a/test/buffer/storage/dynamic_index/read.wgsl.expected.hlsl
+++ b/test/buffer/storage/dynamic_index/read.wgsl.expected.hlsl
@@ -1,3 +1,9 @@
+ByteAddressBuffer s : register(t0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
 float2x3 tint_symbol_8(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
@@ -12,12 +18,6 @@
   return tint_symbol_13;
 }
 
-ByteAddressBuffer s : register(t0, space0);
-
-struct tint_symbol_1 {
-  uint idx : SV_GroupIndex;
-};
-
 [numthreads(1, 1, 1)]
 void main(tint_symbol_1 tint_symbol) {
   const uint idx = tint_symbol.idx;
diff --git a/test/buffer/storage/dynamic_index/write.wgsl.expected.hlsl b/test/buffer/storage/dynamic_index/write.wgsl.expected.hlsl
index 9d03558..74aeaf2 100644
--- a/test/buffer/storage/dynamic_index/write.wgsl.expected.hlsl
+++ b/test/buffer/storage/dynamic_index/write.wgsl.expected.hlsl
@@ -1,3 +1,9 @@
+RWByteAddressBuffer s : register(u0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
 void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
@@ -16,12 +22,6 @@
   buffer.Store4((offset + 48u), asuint(value[3u]));
 }
 
-RWByteAddressBuffer s : register(u0, space0);
-
-struct tint_symbol_1 {
-  uint idx : SV_GroupIndex;
-};
-
 [numthreads(1, 1, 1)]
 void main(tint_symbol_1 tint_symbol) {
   const uint idx = tint_symbol.idx;
diff --git a/test/buffer/storage/static_index/read.wgsl.expected.hlsl b/test/buffer/storage/static_index/read.wgsl.expected.hlsl
index 95ed272..c14caff 100644
--- a/test/buffer/storage/static_index/read.wgsl.expected.hlsl
+++ b/test/buffer/storage/static_index/read.wgsl.expected.hlsl
@@ -5,6 +5,8 @@
   Inner el;
 };
 
+ByteAddressBuffer s : register(t0, space0);
+
 float2x3 tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
@@ -24,8 +26,6 @@
   return tint_symbol_12;
 }
 
-ByteAddressBuffer s : register(t0, space0);
-
 [numthreads(1, 1, 1)]
 void main() {
   const int3 a = asint(s.Load3(0u));
diff --git a/test/buffer/storage/static_index/write.wgsl.expected.hlsl b/test/buffer/storage/static_index/write.wgsl.expected.hlsl
index 52c36c6..7e569cd 100644
--- a/test/buffer/storage/static_index/write.wgsl.expected.hlsl
+++ b/test/buffer/storage/static_index/write.wgsl.expected.hlsl
@@ -5,6 +5,8 @@
   Inner el;
 };
 
+RWByteAddressBuffer s : register(u0, space0);
+
 void tint_symbol_6(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
@@ -27,8 +29,6 @@
   tint_symbol_9(buffer, (offset + 48u), value[3u].el);
 }
 
-RWByteAddressBuffer s : register(u0, space0);
-
 [numthreads(1, 1, 1)]
 void main() {
   s.Store3(0u, asuint(int3(0, 0, 0)));
diff --git a/test/buffer/uniform/dynamic_index/read.wgsl.expected.hlsl b/test/buffer/uniform/dynamic_index/read.wgsl.expected.hlsl
index 378e877..22133cc 100644
--- a/test/buffer/uniform/dynamic_index/read.wgsl.expected.hlsl
+++ b/test/buffer/uniform/dynamic_index/read.wgsl.expected.hlsl
@@ -1,9 +1,3 @@
-float2x3 tint_symbol_9(uint4 buffer[96], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
-}
-
 cbuffer cbuffer_s : register(b0, space0) {
   uint4 s[96];
 };
@@ -12,6 +6,12 @@
   uint idx : SV_GroupIndex;
 };
 
+float2x3 tint_symbol_9(uint4 buffer[96], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
 [numthreads(1, 1, 1)]
 void main(tint_symbol_1 tint_symbol) {
   const uint idx = tint_symbol.idx;
diff --git a/test/buffer/uniform/static_index/read.wgsl.expected.hlsl b/test/buffer/uniform/static_index/read.wgsl.expected.hlsl
index 7acef2b..fa6d133 100644
--- a/test/buffer/uniform/static_index/read.wgsl.expected.hlsl
+++ b/test/buffer/uniform/static_index/read.wgsl.expected.hlsl
@@ -5,6 +5,10 @@
   Inner el;
 };
 
+cbuffer cbuffer_s : register(b0, space0) {
+  uint4 s[13];
+};
+
 float2x3 tint_symbol_7(uint4 buffer[13], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
   const uint scalar_offset_1 = ((offset + 16u)) / 4;
@@ -33,10 +37,6 @@
   return tint_symbol_13;
 }
 
-cbuffer cbuffer_s : register(b0, space0) {
-  uint4 s[13];
-};
-
 [numthreads(1, 1, 1)]
 void main() {
   const int3 a = asint(s[0].xyz);
diff --git a/test/bug/tint/403.wgsl.expected.hlsl b/test/bug/tint/403.wgsl.expected.hlsl
index 51b6d4e..ef0848c 100644
--- a/test/bug/tint/403.wgsl.expected.hlsl
+++ b/test/bug/tint/403.wgsl.expected.hlsl
@@ -1,3 +1,17 @@
+cbuffer cbuffer_x_20 : register(b0, space0) {
+  uint4 x_20[1];
+};
+cbuffer cbuffer_x_26 : register(b0, space1) {
+  uint4 x_26[1];
+};
+
+struct tint_symbol_1 {
+  uint gl_VertexIndex : SV_VertexID;
+};
+struct tint_symbol_2 {
+  float4 value : SV_Position;
+};
+
 float2x2 tint_symbol_4(uint4 buffer[1], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
   uint4 ubo_load = buffer[scalar_offset / 4];
@@ -14,20 +28,6 @@
   return float2x2(asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
 }
 
-cbuffer cbuffer_x_20 : register(b0, space0) {
-  uint4 x_20[1];
-};
-cbuffer cbuffer_x_26 : register(b0, space1) {
-  uint4 x_26[1];
-};
-
-struct tint_symbol_1 {
-  uint gl_VertexIndex : SV_VertexID;
-};
-struct tint_symbol_2 {
-  float4 value : SV_Position;
-};
-
 tint_symbol_2 main(tint_symbol_1 tint_symbol) {
   const uint gl_VertexIndex = tint_symbol.gl_VertexIndex;
   float2 indexable[3] = (float2[3])0;
diff --git a/test/bug/tint/870.spvasm.expected.hlsl b/test/bug/tint/870.spvasm.expected.hlsl
index db135e6..b3b5071 100644
--- a/test/bug/tint/870.spvasm.expected.hlsl
+++ b/test/bug/tint/870.spvasm.expected.hlsl
@@ -1,11 +1,11 @@
+ByteAddressBuffer sspp962805860buildInformation : register(t2, space0);
+
 typedef int tint_symbol_1_ret[6];
 tint_symbol_1_ret tint_symbol_1(ByteAddressBuffer buffer, uint offset) {
   const int tint_symbol_2[6] = {asint(buffer.Load((offset + 0u))), asint(buffer.Load((offset + 4u))), asint(buffer.Load((offset + 8u))), asint(buffer.Load((offset + 12u))), asint(buffer.Load((offset + 16u))), asint(buffer.Load((offset + 20u)))};
   return tint_symbol_2;
 }
 
-ByteAddressBuffer sspp962805860buildInformation : register(t2, space0);
-
 void main_1() {
   int orientation[6] = (int[6])0;
   const int x_23[6] = tint_symbol_1(sspp962805860buildInformation, 36u);
diff --git a/test/bug/tint/922.wgsl.expected.hlsl b/test/bug/tint/922.wgsl.expected.hlsl
index 60eb716..e0b2049 100644
--- a/test/bug/tint/922.wgsl.expected.hlsl
+++ b/test/bug/tint/922.wgsl.expected.hlsl
@@ -13,31 +13,6 @@
   float4 mx;
   float4 my;
 };
-
-Mat4x4_ tint_symbol_7(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  const Mat4x4_ tint_symbol_10 = {asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4])};
-  return tint_symbol_10;
-}
-
-Mat4x2_ tint_symbol_9(uint4 buffer[3], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 16u)) / 4;
-  const Mat4x2_ tint_symbol_11 = {asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4])};
-  return tint_symbol_11;
-}
-
-Mat4x3_ tint_symbol_5(uint4 buffer[96], uint offset) {
-  const uint scalar_offset_6 = ((offset + 0u)) / 4;
-  const uint scalar_offset_7 = ((offset + 16u)) / 4;
-  const uint scalar_offset_8 = ((offset + 32u)) / 4;
-  const Mat4x3_ tint_symbol_12 = {asfloat(buffer[scalar_offset_6 / 4]), asfloat(buffer[scalar_offset_7 / 4]), asfloat(buffer[scalar_offset_8 / 4])};
-  return tint_symbol_12;
-}
-
 struct VertexOutput {
   float4 v_Color;
   float2 v_TexCoord;
@@ -179,6 +154,30 @@
   return o4;
 }
 
+Mat4x3_ tint_symbol_5(uint4 buffer[96], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const Mat4x3_ tint_symbol_10 = {asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4])};
+  return tint_symbol_10;
+}
+
+Mat4x4_ tint_symbol_7(uint4 buffer[4], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 16u)) / 4;
+  const uint scalar_offset_5 = ((offset + 32u)) / 4;
+  const uint scalar_offset_6 = ((offset + 48u)) / 4;
+  const Mat4x4_ tint_symbol_11 = {asfloat(buffer[scalar_offset_3 / 4]), asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]), asfloat(buffer[scalar_offset_6 / 4])};
+  return tint_symbol_11;
+}
+
+Mat4x2_ tint_symbol_9(uint4 buffer[3], uint offset) {
+  const uint scalar_offset_7 = ((offset + 0u)) / 4;
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  const Mat4x2_ tint_symbol_12 = {asfloat(buffer[scalar_offset_7 / 4]), asfloat(buffer[scalar_offset_8 / 4])};
+  return tint_symbol_12;
+}
+
 void main1() {
   Mat4x3_ t_PosMtx = (Mat4x3_)0;
   float2 t_TexSpaceCoord = float2(0.0f, 0.0f);
diff --git a/test/bug/tint/993.wgsl b/test/bug/tint/993.wgsl
new file mode 100644
index 0000000..d9167f8
--- /dev/null
+++ b/test/bug/tint/993.wgsl
@@ -0,0 +1,24 @@
+
+[[block]] struct Constants {
+  zero: u32;
+};
+[[group(1), binding(0)]] var<uniform> constants: Constants;
+
+[[block]] struct Result {
+  value: u32;
+};
+[[group(1), binding(1)]] var<storage, write> result: Result;
+
+[[block]] struct TestData {
+  data: array<atomic<i32>,3>;
+};
+[[group(0), binding(0)]] var<storage, read_write> s: TestData;
+
+fn runTest() -> i32 {
+  return atomicLoad(&s.data[(0u) + u32(constants.zero)]);
+}
+
+[[stage(compute), workgroup_size(1)]]
+fn main() {
+  result.value = u32(runTest());
+}
\ No newline at end of file
diff --git a/test/bug/tint/993.wgsl.expected.hlsl b/test/bug/tint/993.wgsl.expected.hlsl
new file mode 100644
index 0000000..2b16fd2
--- /dev/null
+++ b/test/bug/tint/993.wgsl.expected.hlsl
@@ -0,0 +1,23 @@
+int atomicLoad_1(RWByteAddressBuffer buffer, uint offset) {
+  int value = 0;
+  buffer.InterlockedOr(offset, 0, value);
+  return value;
+}
+
+cbuffer cbuffer_constants : register(b0, space1) {
+  uint4 constants[1];
+};
+
+RWByteAddressBuffer result : register(u1, space1);
+
+RWByteAddressBuffer s : register(u0, space0);
+
+int runTest() {
+  return atomicLoad_1(s, (4u * (0u + uint(constants[0].x))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  result.Store(0u, asuint(uint(runTest())));
+  return;
+}
diff --git a/test/bug/tint/993.wgsl.expected.msl b/test/bug/tint/993.wgsl.expected.msl
new file mode 100644
index 0000000..50f64ad
--- /dev/null
+++ b/test/bug/tint/993.wgsl.expected.msl
@@ -0,0 +1,34 @@
+SKIP: FAILED
+
+#include <metal_stdlib>
+
+using namespace metal;
+struct Constants {
+  /* 0x0000 */ uint zero;
+};
+struct Result {
+  /* 0x0000 */ uint value;
+};
+struct tint_array_wrapper {
+  /* 0x0000 */ atomic_int arr[3];
+};
+struct TestData {
+  /* 0x0000 */ tint_array_wrapper data;
+};
+
+int runTest(constant Constants& constants, device TestData& s) {
+  return atomic_load_explicit(&(s.data.arr[(0u + uint(constants.zero))]), memory_order_relaxed);
+}
+
+kernel void tint_symbol(constant Constants& constants [[buffer(0)]], device Result& result [[buffer(1)]], device TestData& s [[buffer(0)]]) {
+  result.value = uint(runTest(constants, s));
+  return;
+}
+
+Compilation failed: 
+
+program_source:21:124: error: cannot reserve 'buffer' resource location at index 0
+kernel void tint_symbol(constant Constants& constants [[buffer(0)]], device Result& result [[buffer(1)]], device TestData& s [[buffer(0)]]) {
+                                                                                                                           ^
+
+
diff --git a/test/bug/tint/993.wgsl.expected.spvasm b/test/bug/tint/993.wgsl.expected.spvasm
new file mode 100644
index 0000000..31b94d8
--- /dev/null
+++ b/test/bug/tint/993.wgsl.expected.spvasm
@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %Constants "Constants"
+               OpMemberName %Constants 0 "zero"
+               OpName %constants "constants"
+               OpName %Result "Result"
+               OpMemberName %Result 0 "value"
+               OpName %result "result"
+               OpName %TestData "TestData"
+               OpMemberName %TestData 0 "data"
+               OpName %s "s"
+               OpName %runTest "runTest"
+               OpName %main "main"
+               OpDecorate %Constants Block
+               OpMemberDecorate %Constants 0 Offset 0
+               OpDecorate %constants NonWritable
+               OpDecorate %constants DescriptorSet 1
+               OpDecorate %constants Binding 0
+               OpDecorate %Result Block
+               OpMemberDecorate %Result 0 Offset 0
+               OpDecorate %result NonReadable
+               OpDecorate %result DescriptorSet 1
+               OpDecorate %result Binding 1
+               OpDecorate %TestData Block
+               OpMemberDecorate %TestData 0 Offset 0
+               OpDecorate %_arr_int_uint_3 ArrayStride 4
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 0
+       %uint = OpTypeInt 32 0
+  %Constants = OpTypeStruct %uint
+%_ptr_Uniform_Constants = OpTypePointer Uniform %Constants
+  %constants = OpVariable %_ptr_Uniform_Constants Uniform
+     %Result = OpTypeStruct %uint
+%_ptr_StorageBuffer_Result = OpTypePointer StorageBuffer %Result
+     %result = OpVariable %_ptr_StorageBuffer_Result StorageBuffer
+        %int = OpTypeInt 32 1
+     %uint_3 = OpConstant %uint 3
+%_arr_int_uint_3 = OpTypeArray %int %uint_3
+   %TestData = OpTypeStruct %_arr_int_uint_3
+%_ptr_StorageBuffer_TestData = OpTypePointer StorageBuffer %TestData
+          %s = OpVariable %_ptr_StorageBuffer_TestData StorageBuffer
+         %14 = OpTypeFunction %int
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+       %void = OpTypeVoid
+         %28 = OpTypeFunction %void
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+    %runTest = OpFunction %int None %14
+         %16 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_uint %constants %uint_0
+         %24 = OpLoad %uint %23
+         %25 = OpIAdd %uint %uint_0 %24
+         %27 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %25
+         %17 = OpAtomicLoad %int %27 %uint_1 %uint_0
+               OpReturnValue %17
+               OpFunctionEnd
+       %main = OpFunction %void None %28
+         %31 = OpLabel
+         %33 = OpAccessChain %_ptr_StorageBuffer_uint %result %uint_0
+         %35 = OpFunctionCall %int %runTest
+         %34 = OpBitcast %uint %35
+               OpStore %33 %34
+               OpReturn
+               OpFunctionEnd
diff --git a/test/bug/tint/993.wgsl.expected.wgsl b/test/bug/tint/993.wgsl.expected.wgsl
new file mode 100644
index 0000000..b5bf11b
--- /dev/null
+++ b/test/bug/tint/993.wgsl.expected.wgsl
@@ -0,0 +1,29 @@
+[[block]]
+struct Constants {
+  zero : u32;
+};
+
+[[group(1), binding(0)]] var<uniform> constants : Constants;
+
+[[block]]
+struct Result {
+  value : u32;
+};
+
+[[group(1), binding(1)]] var<storage, write> result : Result;
+
+[[block]]
+struct TestData {
+  data : array<atomic<i32>, 3>;
+};
+
+[[group(0), binding(0)]] var<storage, read_write> s : TestData;
+
+fn runTest() -> i32 {
+  return atomicLoad(&(s.data[(0u + u32(constants.zero))]));
+}
+
+[[stage(compute), workgroup_size(1)]]
+fn main() {
+  result.value = u32(runTest());
+}
diff --git a/test/samples/cube.wgsl.expected.hlsl b/test/samples/cube.wgsl.expected.hlsl
index 851f523..9791b42 100644
--- a/test/samples/cube.wgsl.expected.hlsl
+++ b/test/samples/cube.wgsl.expected.hlsl
@@ -1,11 +1,3 @@
-float4x4 tint_symbol_7(uint4 buffer[4], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  const uint scalar_offset_2 = ((offset + 32u)) / 4;
-  const uint scalar_offset_3 = ((offset + 48u)) / 4;
-  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
-}
-
 cbuffer cbuffer_uniforms : register(b0, space0) {
   uint4 uniforms[4];
 };
@@ -27,6 +19,14 @@
   float4 Position : SV_Position;
 };
 
+float4x4 tint_symbol_7(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
 tint_symbol_2 vtx_main(tint_symbol_1 tint_symbol) {
   const VertexInput input = {tint_symbol.cur_position, tint_symbol.color};
   VertexOutput output = (VertexOutput)0;
diff --git a/test/shader_io/shared_struct_storage_buffer.wgsl.expected.hlsl b/test/shader_io/shared_struct_storage_buffer.wgsl.expected.hlsl
index 1c8ad7d..af4440e 100644
--- a/test/shader_io/shared_struct_storage_buffer.wgsl.expected.hlsl
+++ b/test/shader_io/shared_struct_storage_buffer.wgsl.expected.hlsl
@@ -4,12 +4,6 @@
   float4 v;
 };
 
-void tint_symbol_5(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.f));
-  buffer.Store((offset + 4u), asuint(value.u));
-  buffer.Store4((offset + 128u), asuint(value.v));
-}
-
 RWByteAddressBuffer output : register(u0, space0);
 
 struct tint_symbol_1 {
@@ -18,6 +12,12 @@
   float4 v : SV_Position;
 };
 
+void tint_symbol_5(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.f));
+  buffer.Store((offset + 4u), asuint(value.u));
+  buffer.Store4((offset + 128u), asuint(value.v));
+}
+
 void frag_main(tint_symbol_1 tint_symbol) {
   const S input = {tint_symbol.f, tint_symbol.u, tint_symbol.v};
   const float f = input.f;