writer/hlsl: Emit helper functions for storage class atomic intrinsics

By generating a helper function for these, we can keep the atomic expression pre-statement-free. This can help prevent for-loops from being transformed into while loops, which can upset FXC.

We can't do the same for workgroup storage atomics, as the InterlockedXXX() methods have the workgroup-storage expression as the first argument, and I'm not aware of any way to make a user-declared parameter be `groupshared`.

Change-Id: I8669127a58dc9cae95ce316523029064b5c9b5fa
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57462
Commit-Queue: James Price <jrprice@google.com>
Auto-Submit: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/writer/hlsl/generator_impl.cc b/src/writer/hlsl/generator_impl.cc
index 4cbb6ce..616a3a4 100644
--- a/src/writer/hlsl/generator_impl.cc
+++ b/src/writer/hlsl/generator_impl.cc
@@ -171,6 +171,10 @@
     }
   }
 
+  if (!helpers_.lines.empty()) {
+    current_buffer_->Insert(helpers_, 0, 0);
+  }
+
   return true;
 }
 
@@ -845,7 +849,7 @@
     case Op::kAtomicXor:
     case Op::kAtomicExchange:
     case Op::kAtomicCompareExchangeWeak:
-      return EmitStorageAtomicCall(out, expr, intrinsic->op);
+      return EmitStorageAtomicCall(out, expr, intrinsic);
   }
 
   TINT_UNREACHABLE(Writer, diagnostics_)
@@ -857,188 +861,217 @@
 bool GeneratorImpl::EmitStorageAtomicCall(
     std::ostream& out,
     ast::CallExpression* expr,
-    transform::DecomposeMemoryAccess::Intrinsic::Op op) {
+    const transform::DecomposeMemoryAccess::Intrinsic* intrinsic) {
   using Op = transform::DecomposeMemoryAccess::Intrinsic::Op;
 
-  std::string result = UniqueIdentifier("atomic_result");
-
   auto* result_ty = TypeOf(expr);
-  if (!result_ty->Is<sem::Void>()) {
-    auto pre = line();
-    if (!EmitTypeAndName(pre, TypeOf(expr), ast::StorageClass::kNone,
-                         ast::Access::kUndefined, result)) {
-      return false;
-    }
-    pre << " = ";
-    if (!EmitZeroValue(pre, result_ty)) {
-      return false;
-    }
-    pre << ";";
-  }
 
-  auto* buffer = expr->params()[0];
-  auto* offset = expr->params()[1];
+  auto& buf = helpers_;
 
-  auto call_buffer_method = [&](const char* name) {
-    // First two arguments to the DecomposeMemoryAccess::Intrinsic are the
-    // buffer and offset. The buffer is the moved to the LHS of the '.', and the
-    // offset becomes the first argument. The rest of the method's arguments are
-    // the same.
-    auto pre = line();
-    if (!EmitExpression(pre, buffer)) {
-      return false;
-    }
-    pre << "." << name;
-    {
-      ScopedParen sp(pre);
-      if (!EmitExpression(pre, offset)) {
-        return false;
-      }
-
-      for (size_t i = 2; i < expr->params().size(); i++) {
-        auto* arg = expr->params()[i];
-        pre << ", ";
-        if (!EmitExpression(pre, arg)) {
-          return false;
+  // generate_helper() generates a helper function that translates the
+  // DecomposeMemoryAccess::Intrinsic call into the corresponding HLSL
+  // atomic intrinsic function.
+  auto generate_helper = [&]() -> std::string {
+    auto rmw = [&](const char* wgsl, const char* hlsl) -> std::string {
+      auto name = UniqueIdentifier(wgsl);
+      {
+        auto fn = line(&buf);
+        if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
+                             ast::Access::kUndefined, name)) {
+          return "";
         }
+        fn << "(RWByteAddressBuffer buffer, uint offset, ";
+        if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
+                             ast::Access::kUndefined, "value")) {
+          return "";
+        }
+        fn << ") {";
       }
 
-      pre << ", " << result;
-    }
-    pre << ";";
+      buf.IncrementIndent();
+      TINT_DEFER({
+        buf.DecrementIndent();
+        line(&buf) << "}";
+        line(&buf);
+      });
 
-    out << result;
-    return true;
+      {
+        auto l = line(&buf);
+        if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
+                             ast::Access::kUndefined, "original_value")) {
+          return "";
+        }
+        l << " = 0;";
+      }
+      line(&buf) << "buffer." << hlsl << "(offset, value, original_value);";
+      line(&buf) << "return original_value;";
+      return name;
+    };
+
+    switch (intrinsic->op) {
+      case Op::kAtomicAdd:
+        return rmw("atomicAdd", "InterlockedAdd");
+
+      case Op::kAtomicMax:
+        return rmw("atomicMax", "InterlockedMax");
+
+      case Op::kAtomicMin:
+        return rmw("atomicMin", "InterlockedMin");
+
+      case Op::kAtomicAnd:
+        return rmw("atomicAnd", "InterlockedAnd");
+
+      case Op::kAtomicOr:
+        return rmw("atomicOr", "InterlockedOr");
+
+      case Op::kAtomicXor:
+        return rmw("atomicXor", "InterlockedXor");
+
+      case Op::kAtomicExchange:
+        return rmw("atomicExchange", "InterlockedExchange");
+
+      case Op::kAtomicLoad: {
+        // HLSL does not have an InterlockedLoad, so we emulate it with
+        // InterlockedOr using 0 as the OR value
+        auto name = UniqueIdentifier("atomicLoad");
+        {
+          auto fn = line(&buf);
+          if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, name)) {
+            return "";
+          }
+          fn << "(RWByteAddressBuffer buffer, uint offset) {";
+        }
+
+        buf.IncrementIndent();
+        TINT_DEFER({
+          buf.DecrementIndent();
+          line(&buf) << "}";
+          line(&buf);
+        });
+
+        {
+          auto l = line(&buf);
+          if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "value")) {
+            return "";
+          }
+          l << " = 0;";
+        }
+
+        line(&buf) << "buffer.InterlockedOr(offset, 0, value);";
+        line(&buf) << "return value;";
+        return name;
+      }
+      case Op::kAtomicStore: {
+        // HLSL does not have an InterlockedStore, so we emulate it with
+        // InterlockedExchange and discard the returned value
+        auto* value_ty = TypeOf(expr->params()[2]);
+        auto name = UniqueIdentifier("atomicStore");
+        {
+          auto fn = line(&buf);
+          fn << "void " << name << "(RWByteAddressBuffer buffer, uint offset, ";
+          if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "value")) {
+            return "";
+          }
+          fn << ") {";
+        }
+
+        buf.IncrementIndent();
+        TINT_DEFER({
+          buf.DecrementIndent();
+          line(&buf) << "}";
+          line(&buf);
+        });
+
+        {
+          auto l = line(&buf);
+          if (!EmitTypeAndName(l, value_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "ignored")) {
+            return "";
+          }
+          l << ";";
+        }
+        line(&buf) << "buffer.InterlockedExchange(offset, value, ignored);";
+        return name;
+      }
+      case Op::kAtomicCompareExchangeWeak: {
+        auto* value_ty = TypeOf(expr->params()[2]);
+
+        auto name = UniqueIdentifier("atomicCompareExchangeWeak");
+        {
+          auto fn = line(&buf);
+          if (!EmitTypeAndName(fn, result_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, name)) {
+            return "";
+          }
+          fn << "(RWByteAddressBuffer buffer, uint offset, ";
+          if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "compare")) {
+            return "";
+          }
+          fn << ", ";
+          if (!EmitTypeAndName(fn, value_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "value")) {
+            return "";
+          }
+          fn << ") {";
+        }
+
+        buf.IncrementIndent();
+        TINT_DEFER({
+          buf.DecrementIndent();
+          line(&buf) << "}";
+          line(&buf);
+        });
+
+        {  // T result = {0, 0};
+          auto l = line(&buf);
+          if (!EmitTypeAndName(l, result_ty, ast::StorageClass::kNone,
+                               ast::Access::kUndefined, "result")) {
+            return "";
+          }
+          l << " = {0, 0};";
+        }
+        line(&buf) << "buffer.InterlockedCompareExchange(offset, compare, "
+                      "value, result.x);";
+        line(&buf) << "result.y = result.x == compare;";
+        line(&buf) << "return result;";
+        return name;
+      }
+      default:
+        break;
+    }
+    TINT_UNREACHABLE(Writer, diagnostics_)
+        << "unsupported atomic DecomposeMemoryAccess::Intrinsic::Op: "
+        << static_cast<int>(intrinsic->op);
+    return "";
   };
 
-  switch (op) {
-    case Op::kAtomicLoad: {
-      // HLSL does not have an InterlockedLoad, so we emulate it with
-      // InterlockedOr using 0 as the OR value
-      auto pre = line();
-      if (!EmitExpression(pre, buffer)) {
-        return false;
-      }
-      pre << ".InterlockedOr";
-      {
-        ScopedParen sp(pre);
-        if (!EmitExpression(pre, offset)) {
-          return false;
-        }
-        pre << ", 0, " << result;
-      }
-
-      pre << ";";
-      out << result;
-      return true;
-    }
-    case Op::kAtomicStore: {
-      // HLSL does not have an InterlockedStore, so we emulate it with
-      // InterlockedExchange and discard the returned value
-      auto pre = line();
-      auto* value = expr->params()[2];
-      auto* value_ty = TypeOf(value);
-      if (!EmitTypeAndName(pre, value_ty, ast::StorageClass::kNone,
-                           ast::Access::kUndefined, result)) {
-        return false;
-      }
-      pre << " = ";
-      if (!EmitZeroValue(pre, value_ty)) {
-        return false;
-      }
-      pre << ";";
-
-      if (!EmitExpression(out, buffer)) {
-        return false;
-      }
-      out << ".InterlockedExchange";
-      {
-        ScopedParen sp(out);
-        if (!EmitExpression(out, offset)) {
-          return false;
-        }
-        out << ", ";
-        if (!EmitExpression(out, value)) {
-          return false;
-        }
-        out << ", " << result;
-      }
-      return true;
-    }
-    case Op::kAtomicCompareExchangeWeak: {
-      auto* compare_value = expr->params()[2];
-      auto* value = expr->params()[3];
-
-      std::string compare = UniqueIdentifier("atomic_compare_value");
-      {  // T atomic_compare_value = compare_value;
-        auto pre = line();
-        if (!EmitTypeAndName(pre, TypeOf(compare_value),
-                             ast::StorageClass::kNone, ast::Access::kUndefined,
-                             compare)) {
-          return false;
-        }
-        pre << " = ";
-        if (!EmitExpression(pre, compare_value)) {
-          return false;
-        }
-        pre << ";";
-      }
-      {  // buffer.InterlockedCompareExchange(offset, compare, value, result.x);
-        auto pre = line();
-        if (!EmitExpression(pre, buffer)) {
-          return false;
-        }
-        pre << ".InterlockedCompareExchange";
-        {
-          ScopedParen sp(pre);
-          if (!EmitExpression(pre, offset)) {
-            return false;
-          }
-          pre << ", " << compare << ", ";
-          if (!EmitExpression(pre, value)) {
-            return false;
-          }
-          pre << ", " << result << ".x";
-        }
-        pre << ";";
-      }
-      {  // result.y = result.x == compare;
-        line() << result << ".y = " << result << ".x == " << compare << ";";
-      }
-
-      out << result;
-      return true;
-    }
-
-    case Op::kAtomicAdd:
-      return call_buffer_method("InterlockedAdd");
-
-    case Op::kAtomicMax:
-      return call_buffer_method("InterlockedMax");
-
-    case Op::kAtomicMin:
-      return call_buffer_method("InterlockedMin");
-
-    case Op::kAtomicAnd:
-      return call_buffer_method("InterlockedAnd");
-
-    case Op::kAtomicOr:
-      return call_buffer_method("InterlockedOr");
-
-    case Op::kAtomicXor:
-      return call_buffer_method("InterlockedXor");
-
-    case Op::kAtomicExchange:
-      return call_buffer_method("InterlockedExchange");
-
-    default:
-      break;
+  auto func = utils::GetOrCreate(dma_intrinsics_,
+                                 DMAIntrinsic{intrinsic->op, intrinsic->type},
+                                 generate_helper);
+  if (func.empty()) {
+    return false;
   }
 
-  TINT_UNREACHABLE(Writer, diagnostics_)
-      << "unsupported atomic DecomposeMemoryAccess::Intrinsic::Op: "
-      << static_cast<int>(op);
-  return false;
+  out << func;
+  {
+    ScopedParen sp(out);
+    bool first = true;
+    for (auto* arg : expr->params()) {
+      if (!first) {
+        out << ", ";
+      }
+      first = false;
+      if (!EmitExpression(out, arg)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
 }
 
 bool GeneratorImpl::EmitWorkgroupAtomicCall(std::ostream& out,
diff --git a/src/writer/hlsl/generator_impl.h b/src/writer/hlsl/generator_impl.h
index 8da3178..842cc5f 100644
--- a/src/writer/hlsl/generator_impl.h
+++ b/src/writer/hlsl/generator_impl.h
@@ -34,6 +34,7 @@
 #include "src/program_builder.h"
 #include "src/scope_stack.h"
 #include "src/transform/decompose_memory_access.h"
+#include "src/utils/hash.h"
 #include "src/writer/text_generator.h"
 
 namespace tint {
@@ -126,12 +127,12 @@
   /// Handles generating an atomic intrinsic call for a storage buffer variable
   /// @param out the output of the expression stream
   /// @param expr the call expression
-  /// @param op the atomic op
+  /// @param intrinsic the atomic intrinsic
   /// @returns true if the call expression is emitted
   bool EmitStorageAtomicCall(
       std::ostream& out,
       ast::CallExpression* expr,
-      transform::DecomposeMemoryAccess::Intrinsic::Op op);
+      const transform::DecomposeMemoryAccess::Intrinsic* intrinsic);
   /// Handles generating an atomic intrinsic call for a workgroup variable
   /// @param out the output of the expression stream
   /// @param expr the call expression
@@ -389,9 +390,28 @@
     std::string var_name;
   };
 
+  struct DMAIntrinsic {
+    transform::DecomposeMemoryAccess::Intrinsic::Op op;
+    transform::DecomposeMemoryAccess::Intrinsic::DataType type;
+    bool operator==(const DMAIntrinsic& rhs) const {
+      return op == rhs.op && type == rhs.type;
+    }
+    /// Hasher is a std::hash function for DMAIntrinsic
+    struct Hasher {
+      /// @param i the DMAIntrinsic to hash
+      /// @returns the hash of `i`
+      inline std::size_t operator()(const DMAIntrinsic& i) const {
+        return utils::Hash(i.op, i.type);
+      }
+    };
+  };
+
   std::string get_buffer_name(ast::Expression* expr);
 
+  TextBuffer helpers_;  // Helper functions emitted at the top of the output
   std::function<bool()> emit_continuing_;
+  std::unordered_map<DMAIntrinsic, std::string, DMAIntrinsic::Hasher>
+      dma_intrinsics_;
   std::unordered_map<const sem::Struct*, std::string> structure_builders_;
   std::unordered_map<const ast::AssignmentStatement*, const sem::Vector*>
       vector_assignments_in_loops_;
diff --git a/test/bug/tint/926.wgsl.expected.hlsl b/test/bug/tint/926.wgsl.expected.hlsl
index febfdb4..87ebecb 100644
--- a/test/bug/tint/926.wgsl.expected.hlsl
+++ b/test/bug/tint/926.wgsl.expected.hlsl
@@ -1,3 +1,9 @@
+uint atomicAdd_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedAdd(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer drawOut : register(u5, space0);
 static uint cubeVerts = 0u;
 
@@ -8,8 +14,6 @@
 [numthreads(1, 1, 1)]
 void computeMain(tint_symbol_1 tint_symbol) {
   const uint3 global_id = tint_symbol.global_id;
-  uint atomic_result = 0u;
-  drawOut.InterlockedAdd(0u, cubeVerts, atomic_result);
-  const uint firstVertex = atomic_result;
+  const uint firstVertex = atomicAdd_1(drawOut, 0u, cubeVerts);
   return;
 }
diff --git a/test/intrinsics/gen/atomicAdd/8a199a.wgsl.expected.hlsl b/test/intrinsics/gen/atomicAdd/8a199a.wgsl.expected.hlsl
index d8bcd22..35e6ef3 100644
--- a/test/intrinsics/gen/atomicAdd/8a199a.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicAdd/8a199a.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicAdd_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedAdd(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicAdd_8a199a() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedAdd(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicAdd_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicAdd/d32fe4.wgsl.expected.hlsl b/test/intrinsics/gen/atomicAdd/d32fe4.wgsl.expected.hlsl
index 392d656..f87e491 100644
--- a/test/intrinsics/gen/atomicAdd/d32fe4.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicAdd/d32fe4.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicAdd_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedAdd(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicAdd_d32fe4() {
-  int atomic_result = 0;
-  sb_rw.InterlockedAdd(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicAdd_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicAnd/152966.wgsl.expected.hlsl b/test/intrinsics/gen/atomicAnd/152966.wgsl.expected.hlsl
index 6080eba..fa6c15f 100644
--- a/test/intrinsics/gen/atomicAnd/152966.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicAnd/152966.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicAnd_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedAnd(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicAnd_152966() {
-  int atomic_result = 0;
-  sb_rw.InterlockedAnd(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicAnd_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicAnd/85a8d9.wgsl.expected.hlsl b/test/intrinsics/gen/atomicAnd/85a8d9.wgsl.expected.hlsl
index c0d64fc..9d4eb2f 100644
--- a/test/intrinsics/gen/atomicAnd/85a8d9.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicAnd/85a8d9.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicAnd_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedAnd(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicAnd_85a8d9() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedAnd(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicAnd_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicCompareExchangeWeak/12871c.wgsl.expected.hlsl b/test/intrinsics/gen/atomicCompareExchangeWeak/12871c.wgsl.expected.hlsl
index 0f540c2..9bd884c 100644
--- a/test/intrinsics/gen/atomicCompareExchangeWeak/12871c.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicCompareExchangeWeak/12871c.wgsl.expected.hlsl
@@ -1,11 +1,14 @@
+int2 atomicCompareExchangeWeak_1(RWByteAddressBuffer buffer, uint offset, int compare, int value) {
+  int2 result = {0, 0};
+  buffer.InterlockedCompareExchange(offset, compare, value, result.x);
+  result.y = result.x == compare;
+  return result;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicCompareExchangeWeak_12871c() {
-  int2 atomic_result = int2(0, 0);
-  int atomic_compare_value = 1;
-  sb_rw.InterlockedCompareExchange(0u, atomic_compare_value, 1, atomic_result.x);
-  atomic_result.y = atomic_result.x == atomic_compare_value;
-  int2 res = atomic_result;
+  int2 res = atomicCompareExchangeWeak_1(sb_rw, 0u, 1, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicCompareExchangeWeak/6673da.wgsl.expected.hlsl b/test/intrinsics/gen/atomicCompareExchangeWeak/6673da.wgsl.expected.hlsl
index 0011dec..430f132 100644
--- a/test/intrinsics/gen/atomicCompareExchangeWeak/6673da.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicCompareExchangeWeak/6673da.wgsl.expected.hlsl
@@ -1,11 +1,14 @@
+uint2 atomicCompareExchangeWeak_1(RWByteAddressBuffer buffer, uint offset, uint compare, uint value) {
+  uint2 result = {0, 0};
+  buffer.InterlockedCompareExchange(offset, compare, value, result.x);
+  result.y = result.x == compare;
+  return result;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicCompareExchangeWeak_6673da() {
-  uint2 atomic_result = uint2(0u, 0u);
-  uint atomic_compare_value = 1u;
-  sb_rw.InterlockedCompareExchange(0u, atomic_compare_value, 1u, atomic_result.x);
-  atomic_result.y = atomic_result.x == atomic_compare_value;
-  uint2 res = atomic_result;
+  uint2 res = atomicCompareExchangeWeak_1(sb_rw, 0u, 1u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicExchange/d59712.wgsl.expected.hlsl b/test/intrinsics/gen/atomicExchange/d59712.wgsl.expected.hlsl
index a54642c..506c6fe 100644
--- a/test/intrinsics/gen/atomicExchange/d59712.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicExchange/d59712.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicExchange_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedExchange(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicExchange_d59712() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedExchange(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicExchange_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicExchange/f2e22f.wgsl.expected.hlsl b/test/intrinsics/gen/atomicExchange/f2e22f.wgsl.expected.hlsl
index e361c93..1ab0d6c 100644
--- a/test/intrinsics/gen/atomicExchange/f2e22f.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicExchange/f2e22f.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicExchange_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedExchange(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicExchange_f2e22f() {
-  int atomic_result = 0;
-  sb_rw.InterlockedExchange(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicExchange_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicLoad/0806ad.wgsl.expected.hlsl b/test/intrinsics/gen/atomicLoad/0806ad.wgsl.expected.hlsl
index a573e45..8c58350 100644
--- a/test/intrinsics/gen/atomicLoad/0806ad.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicLoad/0806ad.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicLoad_1(RWByteAddressBuffer buffer, uint offset) {
+  int value = 0;
+  buffer.InterlockedOr(offset, 0, value);
+  return value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicLoad_0806ad() {
-  int atomic_result = 0;
-  sb_rw.InterlockedOr(0u, 0, atomic_result);
-  int res = atomic_result;
+  int res = atomicLoad_1(sb_rw, 0u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicLoad/fe6cc3.wgsl.expected.hlsl b/test/intrinsics/gen/atomicLoad/fe6cc3.wgsl.expected.hlsl
index 86a939e..967d4c0 100644
--- a/test/intrinsics/gen/atomicLoad/fe6cc3.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicLoad/fe6cc3.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicLoad_1(RWByteAddressBuffer buffer, uint offset) {
+  uint value = 0;
+  buffer.InterlockedOr(offset, 0, value);
+  return value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicLoad_fe6cc3() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedOr(0u, 0, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicLoad_1(sb_rw, 0u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicMax/51b9be.wgsl.expected.hlsl b/test/intrinsics/gen/atomicMax/51b9be.wgsl.expected.hlsl
index 41dae87..621944f 100644
--- a/test/intrinsics/gen/atomicMax/51b9be.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicMax/51b9be.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicMax_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedMax(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicMax_51b9be() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedMax(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicMax_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicMax/92aa72.wgsl.expected.hlsl b/test/intrinsics/gen/atomicMax/92aa72.wgsl.expected.hlsl
index e29705e..4ac6cd8 100644
--- a/test/intrinsics/gen/atomicMax/92aa72.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicMax/92aa72.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicMax_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedMax(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicMax_92aa72() {
-  int atomic_result = 0;
-  sb_rw.InterlockedMax(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicMax_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicMin/8e38dc.wgsl.expected.hlsl b/test/intrinsics/gen/atomicMin/8e38dc.wgsl.expected.hlsl
index ab52cbd..5c55017 100644
--- a/test/intrinsics/gen/atomicMin/8e38dc.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicMin/8e38dc.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicMin_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedMin(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicMin_8e38dc() {
-  int atomic_result = 0;
-  sb_rw.InterlockedMin(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicMin_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicMin/c67a74.wgsl.expected.hlsl b/test/intrinsics/gen/atomicMin/c67a74.wgsl.expected.hlsl
index ac804d0..4b350c3 100644
--- a/test/intrinsics/gen/atomicMin/c67a74.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicMin/c67a74.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicMin_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedMin(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicMin_c67a74() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedMin(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicMin_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicOr/5e95d4.wgsl.expected.hlsl b/test/intrinsics/gen/atomicOr/5e95d4.wgsl.expected.hlsl
index 63db7ef..21059e6 100644
--- a/test/intrinsics/gen/atomicOr/5e95d4.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicOr/5e95d4.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicOr_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedOr(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicOr_5e95d4() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedOr(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicOr_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicOr/8d96a0.wgsl.expected.hlsl b/test/intrinsics/gen/atomicOr/8d96a0.wgsl.expected.hlsl
index b5de43e..1504dec 100644
--- a/test/intrinsics/gen/atomicOr/8d96a0.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicOr/8d96a0.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicOr_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedOr(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicOr_8d96a0() {
-  int atomic_result = 0;
-  sb_rw.InterlockedOr(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicOr_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicStore/cdc29e.wgsl.expected.hlsl b/test/intrinsics/gen/atomicStore/cdc29e.wgsl.expected.hlsl
index 031984b..d78cdbe 100644
--- a/test/intrinsics/gen/atomicStore/cdc29e.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicStore/cdc29e.wgsl.expected.hlsl
@@ -1,8 +1,12 @@
+void atomicStore_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint ignored;
+  buffer.InterlockedExchange(offset, value, ignored);
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicStore_cdc29e() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedExchange(0u, 1u, atomic_result);
+  atomicStore_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicStore/d1e9a6.wgsl.expected.hlsl b/test/intrinsics/gen/atomicStore/d1e9a6.wgsl.expected.hlsl
index 9fa1886..afac632 100644
--- a/test/intrinsics/gen/atomicStore/d1e9a6.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicStore/d1e9a6.wgsl.expected.hlsl
@@ -1,8 +1,12 @@
+void atomicStore_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int ignored;
+  buffer.InterlockedExchange(offset, value, ignored);
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicStore_d1e9a6() {
-  int atomic_result = 0;
-  sb_rw.InterlockedExchange(0u, 1, atomic_result);
+  atomicStore_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicXor/54510e.wgsl.expected.hlsl b/test/intrinsics/gen/atomicXor/54510e.wgsl.expected.hlsl
index 401a107..9cc11db 100644
--- a/test/intrinsics/gen/atomicXor/54510e.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicXor/54510e.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+uint atomicXor_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedXor(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicXor_54510e() {
-  uint atomic_result = 0u;
-  sb_rw.InterlockedXor(0u, 1u, atomic_result);
-  uint res = atomic_result;
+  uint res = atomicXor_1(sb_rw, 0u, 1u);
 }
 
 void fragment_main() {
diff --git a/test/intrinsics/gen/atomicXor/c1b78c.wgsl.expected.hlsl b/test/intrinsics/gen/atomicXor/c1b78c.wgsl.expected.hlsl
index d10eedd..7d483b7 100644
--- a/test/intrinsics/gen/atomicXor/c1b78c.wgsl.expected.hlsl
+++ b/test/intrinsics/gen/atomicXor/c1b78c.wgsl.expected.hlsl
@@ -1,9 +1,13 @@
+int atomicXor_1(RWByteAddressBuffer buffer, uint offset, int value) {
+  int original_value = 0;
+  buffer.InterlockedXor(offset, value, original_value);
+  return original_value;
+}
+
 RWByteAddressBuffer sb_rw : register(u0, space0);
 
 void atomicXor_c1b78c() {
-  int atomic_result = 0;
-  sb_rw.InterlockedXor(0u, 1, atomic_result);
-  int res = atomic_result;
+  int res = atomicXor_1(sb_rw, 0u, 1);
 }
 
 void fragment_main() {