Tint/transform: Fix insertBits polyfill for HLSL

This CL fix insertBits polyfill to handle with the left-shifting
behavior in HLSL, i.e. `(1u << 32u) == (1u << 0u) == 1u` where we want
the result to be `0u`.

Fixed: tint:1743
Change-Id: Ibb82abe4ab2f76dbb0fa06057fb19f15f961d969
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/108166
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/transform/builtin_polyfill.cc b/src/tint/transform/builtin_polyfill.cc
index e80436d..17fcc20 100644
--- a/src/tint/transform/builtin_polyfill.cc
+++ b/src/tint/transform/builtin_polyfill.cc
@@ -462,6 +462,14 @@
         auto name = b.Symbols().New("tint_insert_bits");
         uint32_t width = WidthOf(ty);
 
+        // Currently in WGSL parameters of insertBits must be i32, u32, vecN<i32> or vecN<u32>
+        if (!sem::Type::DeepestElementOf(ty)->IsAnyOf<sem::I32, sem::U32>()) {
+            TINT_ICE(Transform, b.Diagnostics())
+                << "insertBits polyfill only support i32, u32, and vector of i32 or u32, got "
+                << b.FriendlyName(ty);
+            return {};
+        }
+
         constexpr uint32_t W = 32u;  // 32-bit
 
         auto V = [&](auto value) -> const ast::Expression* {
@@ -481,21 +489,60 @@
             return b.vec(b.ty.u32(), width, value);
         };
 
-        utils::Vector<const ast::Statement*, 8> body = {
-            b.Decl(b.Let("s", b.Call("min", "offset", u32(W)))),
-            b.Decl(b.Let("e", b.Call("min", u32(W), b.Add("s", "count")))),
-        };
+        // Polyfill algorithm:
+        //      s = min(offset, 32u);
+        //      e = min(32u, (s + count));
+        //      mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
+        //      return (((n << s) & mask) | (v & ~(mask)));
+        // Note that the algorithm above use the left-shifting in C++ manner, but in WGSL, HLSL, MSL
+        // the rhs are modulo to bit-width of lhs (that is 32u in this case), and in GLSL the result
+        // is undefined if rhs is greater than or equal to bit-width of lhs. The results of `x << y`
+        // in C++ and HLSL are different when `y >= 32u`, and the `s` and `e` defined above can be
+        // 32u, which are cases we must handle specially. Replace all `(x << y)` to
+        // `select(Tx(), x << y, y < 32u)`, in which `Tx` is the type of x, where y can be greater
+        // than or equal to 32u.
+        // WGSL polyfill function:
+        //      fn tint_insert_bits(v : T, n : T, offset : u32, count : u32) -> T {
+        //          let e = offset + count;
+        //          let mask = (
+        //                        (select(0u, 1u << offset, offset < 32u) - 1u) ^
+        //                        (select(0u, 1u << e, e < 32u) - 1u)
+        //                     );
+        //          return ((select(T(), n << offset, offset < 32u) & mask) | (v & ~(mask)));
+        //      }
+
+        utils::Vector<const ast::Statement*, 8> body;
 
         switch (polyfill.insert_bits) {
             case Level::kFull:
-                // let mask = ((1 << s) - 1) ^ ((1 << e) - 1)
+                // let e = offset + count;
+                body.Push(b.Decl(b.Let("e", b.Add("offset", "count"))));
+
+                // let mask = (
+                //              (select(0u, 1u << offset, offset < 32u) - 1u) ^
+                //              (select(0u, 1u << e, e < 32u) - 1u)
+                //            );
                 body.Push(b.Decl(b.Let(
-                    "mask", b.Xor(b.Sub(b.Shl(1_u, "s"), 1_u), b.Sub(b.Shl(1_u, "e"), 1_u)))));
-                // return ((n << s) & mask) | (v & ~mask)
-                body.Push(b.Return(b.Or(b.And(b.Shl("n", U("s")), V("mask")),
-                                        b.And("v", V(b.Complement("mask"))))));
+                    "mask",
+                    b.Xor(  //
+                        b.Sub(
+                            b.Call("select", 0_u, b.Shl(1_u, "offset"), b.LessThan("offset", 32_u)),
+                            1_u),
+                        b.Sub(b.Call("select", 0_u, b.Shl(1_u, "e"), b.LessThan("e", 32_u)),
+                              1_u)  //
+                        ))));
+
+                // return ((select(T(), n << offset, offset < 32u) & mask) | (v & ~(mask)));
+                body.Push(
+                    b.Return(b.Or(b.And(b.Call("select", b.Construct(T(ty)),
+                                               b.Shl("n", U("offset")), b.LessThan("offset", 32_u)),
+                                        V("mask")),
+                                  b.And("v", V(b.Complement("mask"))))));
+
                 break;
             case Level::kClampParameters:
+                body.Push(b.Decl(b.Let("s", b.Call("min", "offset", u32(W)))));
+                body.Push(b.Decl(b.Let("e", b.Call("min", u32(W), b.Add("s", "count")))));
                 body.Push(b.Return(b.Call("insertBits", "v", "n", "s", b.Sub("e", "s"))));
                 break;
             default:
diff --git a/src/tint/transform/builtin_polyfill_test.cc b/src/tint/transform/builtin_polyfill_test.cc
index 3b7a42c..1e380d4 100644
--- a/src/tint/transform/builtin_polyfill_test.cc
+++ b/src/tint/transform/builtin_polyfill_test.cc
@@ -1722,10 +1722,9 @@
 
     auto* expect = R"(
 fn tint_insert_bits(v : i32, n : i32, offset : u32, count : u32) -> i32 {
-  let s = min(offset, 32u);
-  let e = min(32u, (s + count));
-  let mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & i32(mask)) | (v & i32(~(mask))));
+  let e = (offset + count);
+  let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u) ^ (select(0u, (1u << e), (e < 32u)) - 1u));
+  return ((select(i32(), (n << offset), (offset < 32u)) & i32(mask)) | (v & i32(~(mask))));
 }
 
 fn f() {
@@ -1749,10 +1748,9 @@
 
     auto* expect = R"(
 fn tint_insert_bits(v : u32, n : u32, offset : u32, count : u32) -> u32 {
-  let s = min(offset, 32u);
-  let e = min(32u, (s + count));
-  let mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & mask) | (v & ~(mask)));
+  let e = (offset + count);
+  let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u) ^ (select(0u, (1u << e), (e < 32u)) - 1u));
+  return ((select(u32(), (n << offset), (offset < 32u)) & mask) | (v & ~(mask)));
 }
 
 fn f() {
@@ -1776,10 +1774,9 @@
 
     auto* expect = R"(
 fn tint_insert_bits(v : vec3<i32>, n : vec3<i32>, offset : u32, count : u32) -> vec3<i32> {
-  let s = min(offset, 32u);
-  let e = min(32u, (s + count));
-  let mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << vec3<u32>(s)) & vec3<i32>(i32(mask))) | (v & vec3<i32>(i32(~(mask)))));
+  let e = (offset + count);
+  let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u) ^ (select(0u, (1u << e), (e < 32u)) - 1u));
+  return ((select(vec3<i32>(), (n << vec3<u32>(offset)), (offset < 32u)) & vec3<i32>(i32(mask))) | (v & vec3<i32>(i32(~(mask)))));
 }
 
 fn f() {
@@ -1803,10 +1800,9 @@
 
     auto* expect = R"(
 fn tint_insert_bits(v : vec3<u32>, n : vec3<u32>, offset : u32, count : u32) -> vec3<u32> {
-  let s = min(offset, 32u);
-  let e = min(32u, (s + count));
-  let mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << vec3<u32>(s)) & vec3<u32>(mask)) | (v & vec3<u32>(~(mask))));
+  let e = (offset + count);
+  let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u) ^ (select(0u, (1u << e), (e < 32u)) - 1u));
+  return ((select(vec3<u32>(), (n << vec3<u32>(offset)), (offset < 32u)) & vec3<u32>(mask)) | (v & vec3<u32>(~(mask))));
 }
 
 fn f() {
diff --git a/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.dxc.hlsl
index e84e9b8..1e92e80 100644
--- a/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint2 tint_insert_bits(uint2 v, uint2 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint2((s).xx)) & uint2((mask).xx)) | (v & uint2((~(mask)).xx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint2((offset).xx)) : (0u).xx) & uint2((mask).xx)) | (v & uint2((~(mask)).xx)));
 }
 
 void insertBits_3c7ba5() {
diff --git a/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.fxc.hlsl
index e84e9b8..1e92e80 100644
--- a/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/3c7ba5.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint2 tint_insert_bits(uint2 v, uint2 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint2((s).xx)) & uint2((mask).xx)) | (v & uint2((~(mask)).xx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint2((offset).xx)) : (0u).xx) & uint2((mask).xx)) | (v & uint2((~(mask)).xx)));
 }
 
 void insertBits_3c7ba5() {
diff --git a/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.dxc.hlsl
index 2991280..f863d24 100644
--- a/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int3 tint_insert_bits(int3 v, int3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0).xxx) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
 }
 
 void insertBits_428b0b() {
diff --git a/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.fxc.hlsl
index 2991280..f863d24 100644
--- a/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/428b0b.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int3 tint_insert_bits(int3 v, int3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0).xxx) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
 }
 
 void insertBits_428b0b() {
diff --git a/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.dxc.hlsl
index 46f552a..e0f4cf3 100644
--- a/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint4 tint_insert_bits(uint4 v, uint4 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint4((s).xxxx)) & uint4((mask).xxxx)) | (v & uint4((~(mask)).xxxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint4((offset).xxxx)) : (0u).xxxx) & uint4((mask).xxxx)) | (v & uint4((~(mask)).xxxx)));
 }
 
 void insertBits_51ede1() {
diff --git a/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.fxc.hlsl
index 46f552a..e0f4cf3 100644
--- a/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/51ede1.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint4 tint_insert_bits(uint4 v, uint4 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint4((s).xxxx)) & uint4((mask).xxxx)) | (v & uint4((~(mask)).xxxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint4((offset).xxxx)) : (0u).xxxx) & uint4((mask).xxxx)) | (v & uint4((~(mask)).xxxx)));
 }
 
 void insertBits_51ede1() {
diff --git a/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.dxc.hlsl
index f87cc19..49ac66a 100644
--- a/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int tint_insert_bits(int v, int n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & int(mask)) | (v & int(~(mask))));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0) & int(mask)) | (v & int(~(mask))));
 }
 
 void insertBits_65468b() {
diff --git a/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.fxc.hlsl
index f87cc19..49ac66a 100644
--- a/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/65468b.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int tint_insert_bits(int v, int n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & int(mask)) | (v & int(~(mask))));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0) & int(mask)) | (v & int(~(mask))));
 }
 
 void insertBits_65468b() {
diff --git a/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.dxc.hlsl
index a820be8..9b1ac83 100644
--- a/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint3 tint_insert_bits(uint3 v, uint3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0u).xxx) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
 }
 
 void insertBits_87826b() {
diff --git a/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.fxc.hlsl
index a820be8..9b1ac83 100644
--- a/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/87826b.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint3 tint_insert_bits(uint3 v, uint3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0u).xxx) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
 }
 
 void insertBits_87826b() {
diff --git a/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.dxc.hlsl
index fdf1dd3..da39ac7 100644
--- a/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int4 tint_insert_bits(int4 v, int4 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint4((s).xxxx)) & int4((int(mask)).xxxx)) | (v & int4((int(~(mask))).xxxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint4((offset).xxxx)) : (0).xxxx) & int4((int(mask)).xxxx)) | (v & int4((int(~(mask))).xxxx)));
 }
 
 void insertBits_d86978() {
diff --git a/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.fxc.hlsl
index fdf1dd3..da39ac7 100644
--- a/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/d86978.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int4 tint_insert_bits(int4 v, int4 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint4((s).xxxx)) & int4((int(mask)).xxxx)) | (v & int4((int(~(mask))).xxxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint4((offset).xxxx)) : (0).xxxx) & int4((int(mask)).xxxx)) | (v & int4((int(~(mask))).xxxx)));
 }
 
 void insertBits_d86978() {
diff --git a/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.dxc.hlsl
index cfbbeee..ea68815 100644
--- a/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint tint_insert_bits(uint v, uint n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & mask) | (v & ~(mask)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0u) & mask) | (v & ~(mask)));
 }
 
 void insertBits_e3e3a2() {
diff --git a/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.fxc.hlsl
index cfbbeee..ea68815 100644
--- a/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/e3e3a2.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint tint_insert_bits(uint v, uint n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & mask) | (v & ~(mask)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0u) & mask) | (v & ~(mask)));
 }
 
 void insertBits_e3e3a2() {
diff --git a/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.dxc.hlsl
index d2a3073..e94b11d 100644
--- a/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int2 tint_insert_bits(int2 v, int2 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint2((s).xx)) & int2((int(mask)).xx)) | (v & int2((int(~(mask))).xx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint2((offset).xx)) : (0).xx) & int2((int(mask)).xx)) | (v & int2((int(~(mask))).xx)));
 }
 
 void insertBits_fe6ba6() {
diff --git a/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.fxc.hlsl
index d2a3073..e94b11d 100644
--- a/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/insertBits/fe6ba6.wgsl.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int2 tint_insert_bits(int2 v, int2 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint2((s).xx)) & int2((int(mask)).xx)) | (v & int2((int(~(mask))).xx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint2((offset).xx)) : (0).xx) & int2((int(mask)).xx)) | (v & int2((int(~(mask))).xx)));
 }
 
 void insertBits_fe6ba6() {
diff --git a/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.dxc.hlsl b/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.dxc.hlsl
index 7c8f828..0b042e2 100644
--- a/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.dxc.hlsl
+++ b/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int tint_insert_bits(int v, int n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & int(mask)) | (v & int(~(mask))));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0) & int(mask)) | (v & int(~(mask))));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.fxc.hlsl b/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.fxc.hlsl
index 7c8f828..0b042e2 100644
--- a/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.fxc.hlsl
+++ b/test/tint/builtins/insertBits/scalar/i32.spvasm.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int tint_insert_bits(int v, int n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & int(mask)) | (v & int(~(mask))));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0) & int(mask)) | (v & int(~(mask))));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.dxc.hlsl b/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.dxc.hlsl
index d416185..e061c8b 100644
--- a/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.dxc.hlsl
+++ b/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint tint_insert_bits(uint v, uint n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & mask) | (v & ~(mask)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0u) & mask) | (v & ~(mask)));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.fxc.hlsl b/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.fxc.hlsl
index d416185..e061c8b 100644
--- a/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.fxc.hlsl
+++ b/test/tint/builtins/insertBits/scalar/u32.spvasm.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint tint_insert_bits(uint v, uint n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << s) & mask) | (v & ~(mask)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << offset) : 0u) & mask) | (v & ~(mask)));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.dxc.hlsl b/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.dxc.hlsl
index d7f1951..4f3340b 100644
--- a/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.dxc.hlsl
+++ b/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 int3 tint_insert_bits(int3 v, int3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0).xxx) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.fxc.hlsl b/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.fxc.hlsl
index d7f1951..4f3340b 100644
--- a/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.fxc.hlsl
+++ b/test/tint/builtins/insertBits/vec3/i32.spvasm.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 int3 tint_insert_bits(int3 v, int3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0).xxx) & int3((int(mask)).xxx)) | (v & int3((int(~(mask))).xxx)));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.dxc.hlsl b/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.dxc.hlsl
index ebd4fb3..a778070 100644
--- a/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.dxc.hlsl
+++ b/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.dxc.hlsl
@@ -1,8 +1,7 @@
 uint3 tint_insert_bits(uint3 v, uint3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0u).xxx) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
 }
 
 void f_1() {
diff --git a/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.fxc.hlsl b/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.fxc.hlsl
index ebd4fb3..a778070 100644
--- a/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.fxc.hlsl
+++ b/test/tint/builtins/insertBits/vec3/u32.spvasm.expected.fxc.hlsl
@@ -1,8 +1,7 @@
 uint3 tint_insert_bits(uint3 v, uint3 n, uint offset, uint count) {
-  const uint s = min(offset, 32u);
-  const uint e = min(32u, (s + count));
-  const uint mask = (((1u << s) - 1u) ^ ((1u << e) - 1u));
-  return (((n << uint3((s).xxx)) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
+  const uint e = (offset + count);
+  const uint mask = ((((offset < 32u) ? (1u << offset) : 0u) - 1u) ^ (((e < 32u) ? (1u << e) : 0u) - 1u));
+  return ((((offset < 32u) ? (n << uint3((offset).xxx)) : (0u).xxx) & uint3((mask).xxx)) | (v & uint3((~(mask)).xxx)));
 }
 
 void f_1() {
diff --git a/webgpu-cts/expectations.txt b/webgpu-cts/expectations.txt
index 7e870ac..ed64b8d 100644
--- a/webgpu-cts/expectations.txt
+++ b/webgpu-cts/expectations.txt
@@ -327,30 +327,6 @@
 crbug.com/dawn/0000 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,firstTrailingBit:u32:inputSource="uniform";vectorize=2 [ Failure ]
 crbug.com/dawn/0000 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,firstTrailingBit:u32:inputSource="uniform";vectorize=3 [ Failure ]
 crbug.com/dawn/0000 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,firstTrailingBit:u32:inputSource="uniform";vectorize=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=false;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=false;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=false;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=false;width=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=true;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=true;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=true;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_r";signed=true;width=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=false;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=false;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=false;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=false;width=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=true;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=true;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=true;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="storage_rw";signed=true;width=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=false;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=false;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=false;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=false;width=4 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=true;width=1 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=true;width=2 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=true;width=3 [ Failure ]
-crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,insertBits:integer:inputSource="uniform";signed=true;width=4 [ Failure ]
 crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,sign:f32:inputSource="storage_r";vectorize="_undef_" [ Failure ]
 crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,sign:f32:inputSource="storage_r";vectorize=2 [ Failure ]
 crbug.com/dawn/0000 [ win10 ] webgpu:shader,execution,expression,call,builtin,sign:f32:inputSource="storage_r";vectorize=3 [ Failure ]