[spirv-reader] Build swizzles for consecutive shuffle accesses For an OpVectorShuffle that accesses both vectors, this allows us to produce swizzles for consecutive indices that fall within the same vector, which can significantly reduce the size of the generated WGSL for some shaders. Fixed: 437398335 Change-Id: I09a6b9eb60ad7314e32150869a8910e9b5fbb430 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/256734 Commit-Queue: James Price <jrprice@google.com> Reviewed-by: dan sinclair <dsinclair@chromium.org>

commit: 6b79284bd854e3bdcc7fad3554c1fffce8a8039b [log] [tgz]
author: James Price <jrprice@google.com> Mon Aug 11 08:53:12 2025 -0700
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Aug 11 08:53:12 2025 -0700
tree: a0a0a3bd8bfdb2a8bad920494dd22c5ff6a663b0
parent: 936e555875e053a28d07340727adba72c600f768 [diff]
diff --git a/src/tint/lang/spirv/reader/parser/composite_test.cc b/src/tint/lang/spirv/reader/parser/composite_test.cc
index a908344..22cc20c 100644
--- a/src/tint/lang/spirv/reader/parser/composite_test.cc
+++ b/src/tint/lang/spirv/reader/parser/composite_test.cc

@@ -1274,7 +1274,7 @@
 )");
 }
 
-TEST_F(SpirvParserTest, VectorShuffle_BothVectors_A) {
+TEST_F(SpirvParserTest, VectorShuffle_AlternateVectors) {
     EXPECT_IR(R"(
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -1293,6 +1293,7 @@
        %tmp1 = OpLoad %v4u32 %vec1
        %tmp2 = OpLoad %v4u32 %vec2
        %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 5 2 7
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1303,18 +1304,19 @@
     %3:ptr<function, vec4<u32>, read_write> = var undef
     %4:vec4<u32> = load %2
     %5:vec4<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %5, 1u
-    %8:u32 = access %4, 2u
-    %9:u32 = access %5, 3u
+    %6:u32 = swizzle %4, x
+    %7:u32 = swizzle %5, y
+    %8:u32 = swizzle %4, z
+    %9:u32 = swizzle %5, w
     %10:vec4<u32> = construct %6, %7, %8, %9
+    %11:vec4<u32> = let %10
     ret
   }
 }
 )");
 }
 
-TEST_F(SpirvParserTest, VectorShuffle_BothVectors_B) {
+TEST_F(SpirvParserTest, VectorShuffle_TwoFromA_Then_TwoFromB) {
     EXPECT_IR(R"(
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -1333,6 +1335,7 @@
        %tmp1 = OpLoad %v4u32 %vec1
        %tmp2 = OpLoad %v4u32 %vec2
        %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 2 5 7
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1343,11 +1346,51 @@
     %3:ptr<function, vec4<u32>, read_write> = var undef
     %4:vec4<u32> = load %2
     %5:vec4<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %4, 2u
-    %8:u32 = access %5, 1u
-    %9:u32 = access %5, 3u
-    %10:vec4<u32> = construct %6, %7, %8, %9
+    %6:vec2<u32> = swizzle %4, xz
+    %7:vec2<u32> = swizzle %5, yw
+    %8:vec4<u32> = construct %6, %7
+    %9:vec4<u32> = let %8
+    ret
+  }
+}
+)");
+}
+
+TEST_F(SpirvParserTest, VectorShuffle_OneFromA_Then_TwoFromB_Then_OneFromA) {
+    EXPECT_IR(R"(
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+       %void = OpTypeVoid
+        %u32 = OpTypeInt 32 0
+      %v4u32 = OpTypeVector %u32 4
+  %v4u32_ptr = OpTypePointer Function %v4u32
+    %ep_type = OpTypeFunction %void
+
+       %main = OpFunction %void None %ep_type
+ %main_start = OpLabel
+       %vec1 = OpVariable %v4u32_ptr Function
+       %vec2 = OpVariable %v4u32_ptr Function
+       %tmp1 = OpLoad %v4u32 %vec1
+       %tmp2 = OpLoad %v4u32 %vec2
+       %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 5 6 3
+     %result = OpCopyObject %v4u32 %shuf
+               OpReturn
+               OpFunctionEnd
+    )",
+              R"(
+%main = @compute @workgroup_size(1u, 1u, 1u) func():void {
+  $B1: {
+    %2:ptr<function, vec4<u32>, read_write> = var undef
+    %3:ptr<function, vec4<u32>, read_write> = var undef
+    %4:vec4<u32> = load %2
+    %5:vec4<u32> = load %3
+    %6:u32 = swizzle %4, x
+    %7:vec2<u32> = swizzle %5, yz
+    %8:u32 = swizzle %4, w
+    %9:vec4<u32> = construct %6, %7, %8
+    %10:vec4<u32> = let %9
     ret
   }
 }
@@ -1376,6 +1419,7 @@
        %tmp1 = OpLoad %v2u32 %vec1
        %tmp2 = OpLoad %v3u32 %vec2
        %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 2 1 4
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1386,11 +1430,12 @@
     %3:ptr<function, vec3<u32>, read_write> = var undef
     %4:vec2<u32> = load %2
     %5:vec3<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %5, 0u
-    %8:u32 = access %4, 1u
-    %9:u32 = access %5, 2u
+    %6:u32 = swizzle %4, x
+    %7:u32 = swizzle %5, x
+    %8:u32 = swizzle %4, y
+    %9:u32 = swizzle %5, z
     %10:vec4<u32> = construct %6, %7, %8, %9
+    %11:vec4<u32> = let %10
     ret
   }
 }
@@ -1419,6 +1464,7 @@
        %tmp1 = OpLoad %v3u32 %vec1
        %tmp2 = OpLoad %v4u32 %vec2
        %shuf = OpVectorShuffle %v2u32 %tmp1 %tmp2 0 4
+     %result = OpCopyObject %v2u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1429,9 +1475,10 @@
     %3:ptr<function, vec4<u32>, read_write> = var undef
     %4:vec3<u32> = load %2
     %5:vec4<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %5, 1u
+    %6:u32 = swizzle %4, x
+    %7:u32 = swizzle %5, y
     %8:vec2<u32> = construct %6, %7
+    %9:vec2<u32> = let %8
     ret
   }
 }
@@ -1457,8 +1504,9 @@
        %tmpA = OpLoad %v4u32 %vecA
        %tmpB = OpLoad %v4u32 %vecB
        %shuf = OpVectorShuffle %v4u32 %tmpA %tmpB 0 4294967295 6 3
-         OpReturn
-         OpFunctionEnd
+     %result = OpCopyObject %v4u32 %shuf
+               OpReturn
+               OpFunctionEnd
     )",
               R"(
 %main = @compute @workgroup_size(1u, 1u, 1u) func():void {
@@ -1467,11 +1515,11 @@
     %3:ptr<function, vec4<u32>, read_write> = var undef
     %4:vec4<u32> = load %2
     %5:vec4<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %4, 0u
-    %8:u32 = access %5, 2u
-    %9:u32 = access %4, 3u
-    %10:vec4<u32> = construct %6, %7, %8, %9
+    %6:vec2<u32> = swizzle %4, xx
+    %7:u32 = swizzle %5, z
+    %8:u32 = swizzle %4, w
+    %9:vec4<u32> = construct %6, %7, %8
+    %10:vec4<u32> = let %9
     ret
   }
 }
@@ -1500,6 +1548,7 @@
        %tmp1 = OpLoad %v2u32 %vec1
        %tmp2 = OpLoad %v3u32 %vec2
        %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 3 4 1
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1510,18 +1559,18 @@
     %3:ptr<function, vec3<u32>, read_write> = var undef
     %4:vec2<u32> = load %2
     %5:vec3<u32> = load %3
-    %6:u32 = access %4, 0u
-    %7:u32 = access %5, 1u
-    %8:u32 = access %5, 2u
-    %9:u32 = access %4, 1u
-    %10:vec4<u32> = construct %6, %7, %8, %9
+    %6:u32 = swizzle %4, x
+    %7:vec2<u32> = swizzle %5, yz
+    %8:u32 = swizzle %4, y
+    %9:vec4<u32> = construct %6, %7, %8
+    %10:vec4<u32> = let %9
     ret
   }
 }
 )");
 }
 
-TEST_F(SpirvParserTest, VectorShuffle_Swizzle_FirstVector) {
+TEST_F(SpirvParserTest, VectorShuffle_AllFromA) {
     EXPECT_IR(R"(
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -1539,6 +1588,7 @@
         %vec = OpVariable %v4u32_ptr Function
         %tmp = OpLoad %v4u32 %vec
        %shuf = OpVectorShuffle %v2u32 %tmp %tmp 0 2
+     %result = OpCopyObject %v2u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1548,13 +1598,14 @@
     %2:ptr<function, vec4<u32>, read_write> = var undef
     %3:vec4<u32> = load %2
     %4:vec2<u32> = swizzle %3, xz
+    %5:vec2<u32> = let %4
     ret
   }
 }
 )");
 }
 
-TEST_F(SpirvParserTest, VectorShuffle_Swizzle_SecondVector) {
+TEST_F(SpirvParserTest, VectorShuffle_AllFromB) {
     EXPECT_IR(R"(
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -1573,6 +1624,7 @@
        %tmp1 = OpLoad %v4u32 %vec1
        %tmp2 = OpLoad %v4u32 %vec2
        %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 4 5 6 7
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1584,6 +1636,7 @@
     %4:vec4<u32> = load %2
     %5:vec4<u32> = load %3
     %6:vec4<u32> = swizzle %5, xyzw
+    %7:vec4<u32> = let %6
     ret
   }
 }
@@ -1608,6 +1661,7 @@
        %vec  = OpVariable %v4u32_ptr Function
        %tmp  = OpLoad %v4u32 %vec
        %shuf = OpVectorShuffle %v2u32 %tmp %tmp 2 0
+     %result = OpCopyObject %v2u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1617,6 +1671,7 @@
     %2:ptr<function, vec4<u32>, read_write> = var undef
     %3:vec4<u32> = load %2
     %4:vec2<u32> = swizzle %3, zx
+    %5:vec2<u32> = let %4
     ret
   }
 }
@@ -1641,6 +1696,7 @@
        %vec  = OpVariable %v2u32_ptr Function
        %tmp  = OpLoad %v2u32 %vec
        %shuf = OpVectorShuffle %v4u32 %tmp %tmp 3 2 3 2
+     %result = OpCopyObject %v4u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1650,6 +1706,7 @@
     %2:ptr<function, vec2<u32>, read_write> = var undef
     %3:vec2<u32> = load %2
     %4:vec4<u32> = swizzle %3, yxyx
+    %5:vec4<u32> = let %4
     ret
   }
 }
@@ -1676,6 +1733,7 @@
         %tmp = OpLoad %v2u32 %vec
      %undef4 = OpUndef %v4u32
        %shuf = OpVectorShuffle %v2u32 %undef4 %tmp 4 5
+     %result = OpCopyObject %v2u32 %shuf
                OpReturn
                OpFunctionEnd
     )",
@@ -1685,6 +1743,7 @@
     %2:ptr<function, vec2<u32>, read_write> = var undef
     %3:vec2<u32> = load %2
     %4:vec2<u32> = swizzle %3, xy
+    %5:vec2<u32> = let %4
     ret
   }
 }

diff --git a/src/tint/lang/spirv/reader/parser/parser.cc b/src/tint/lang/spirv/reader/parser/parser.cc
index 2704f96..6a3a41b 100644
--- a/src/tint/lang/spirv/reader/parser/parser.cc
+++ b/src/tint/lang/spirv/reader/parser/parser.cc

@@ -4201,69 +4201,67 @@
         auto* vector1 = Value(inst.GetSingleWordOperand(2));
         auto* vector2 = Value(inst.GetSingleWordOperand(3));
         auto* result_ty = Type(inst.type_id());
+        auto* el_ty = result_ty->DeepestElement();
 
         uint32_t n1 = vector1->Type()->As<core::type::Vector>()->Width();
         uint32_t n2 = vector2->Type()->As<core::type::Vector>()->Width();
 
-        Vector<uint32_t, 4> literals;
+        Vector<core::ir::Value*, 4> swizzles;
+
+        // Track the current swizzle that we are building from consecutive indices that fall within
+        // the same vector.
+        Vector<uint32_t, 4> current_indices;
+        core::ir::Value* current_vector = nullptr;
+
+        // Emit the current swizzle that we have constructed so far, and add it to the list.
+        auto flush_swizzle = [&] {
+            if (current_vector == nullptr) {
+                return;
+            }
+            auto* swizzle_type = ty_.MatchWidth(el_ty, current_indices.Length());
+            auto* swizzle = b_.Swizzle(swizzle_type, current_vector, current_indices);
+            EmitWithoutSpvResult(swizzle);
+            swizzles.Push(swizzle->Result());
+            current_indices.Clear();
+        };
+
         for (uint32_t i = 4; i < inst.NumOperandWords(); i++) {
-            literals.Push(inst.GetSingleWordOperand(i));
-        }
+            uint32_t literal = inst.GetSingleWordOperand(i);
 
-        // Check if all literals fall entirely within `vector1` or `vector2`,
-        // which would allow us to use a single-vector swizzle.
-        bool swizzle_from_vector1_only = true;
-        bool swizzle_from_vector2_only = true;
-        for (auto& literal : literals) {
-            if (literal == ~0u) {
-                // A `0xFFFFFFFF` literal represents an undefined index,
-                // fallback to first index.
-                literal = 0;
-            }
-            if (literal >= n1) {
-                swizzle_from_vector1_only = false;
-            }
-            if (literal < n1) {
-                swizzle_from_vector2_only = false;
-            }
-        }
-
-        // If only one vector is used, we can swizzle it.
-        if (swizzle_from_vector1_only) {
-            // Indices are already within `[0, n1)`, as expected by `Swizzle` IR
-            // for `vector1`.
-            Emit(b_.Swizzle(result_ty, vector1, literals), inst.result_id());
-            return;
-        }
-        if (swizzle_from_vector2_only) {
-            // Map logical concatenated indices' range `[n1, n1 + n2)` into the range
-            // `[0, n2)`, as expected by `Swizzle` IR for `vector2`.
-            for (auto& literal : literals) {
-                literal -= n1;
-            }
-            Emit(b_.Swizzle(result_ty, vector2, literals), inst.result_id());
-            return;
-        }
-
-        // Swizzle is not possible, construct the result vector out of elements
-        // from both vectors.
-        auto* element_ty = vector1->Type()->DeepestElement();
-        Vector<core::ir::Value*, 4> result;
-        for (auto idx : literals) {
-            TINT_ASSERT(idx < n1 + n2);
-
-            if (idx < n1) {
-                auto* access_inst = b_.Access(element_ty, vector1, b_.Constant(u32(idx)));
-                EmitWithoutSpvResult(access_inst);
-                result.Push(access_inst->Result());
+            // Determine which vector this index falls within.
+            uint32_t next_index;
+            core::ir::Value* next_vector = nullptr;
+            if (literal == 0xFFFFFFFF) {
+                // Undefined component, so just use the first component of the first vector.
+                next_vector = vector1;
+                next_index = 0;
+            } else if (literal < n1) {
+                next_vector = vector1;
+                next_index = literal;
+            } else if (literal < n1 + n2) {
+                next_vector = vector2;
+                next_index = literal - n1;
             } else {
-                auto* access_inst = b_.Access(element_ty, vector2, b_.Constant(u32(idx - n1)));
-                EmitWithoutSpvResult(access_inst);
-                result.Push(access_inst->Result());
+                TINT_ICE() << "invalid vector shuffle index";
             }
+
+            // If the vector has changed from the previous index, flush the swizzle.
+            if (next_vector != current_vector) {
+                flush_swizzle();
+            }
+            current_vector = next_vector;
+            current_indices.Push(next_index);
         }
 
-        Emit(b_.Construct(result_ty, result), inst.result_id());
+        flush_swizzle();
+
+        if (swizzles.Length() == 1) {
+            // There was only one swizzle, so we can just use it directly.
+            AddValue(inst.result_id(), swizzles[0]);
+        } else {
+            // There were multiple swizzles, so we combine them all to produce the final result.
+            Emit(b_.Construct(result_ty, swizzles), inst.result_id());
+        }
     }
 
     /// @param inst the SPIR-V instruction for OpFunctionCall
commit	6b79284bd854e3bdcc7fad3554c1fffce8a8039b	[log] [tgz]
author	James Price <jrprice@google.com>	Mon Aug 11 08:53:12 2025 -0700
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Aug 11 08:53:12 2025 -0700
tree	a0a0a3bd8bfdb2a8bad920494dd22c5ff6a663b0
parent	936e555875e053a28d07340727adba72c600f768 [diff]