[spirv-reader] Build swizzles for consecutive shuffle accesses
For an OpVectorShuffle that accesses both vectors, this allows us to
produce swizzles for consecutive indices that fall within the same
vector, which can significantly reduce the size of the generated WGSL
for some shaders.
Fixed: 437398335
Change-Id: I09a6b9eb60ad7314e32150869a8910e9b5fbb430
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/256734
Commit-Queue: James Price <jrprice@google.com>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
diff --git a/src/tint/lang/spirv/reader/parser/composite_test.cc b/src/tint/lang/spirv/reader/parser/composite_test.cc
index a908344..22cc20c 100644
--- a/src/tint/lang/spirv/reader/parser/composite_test.cc
+++ b/src/tint/lang/spirv/reader/parser/composite_test.cc
@@ -1274,7 +1274,7 @@
)");
}
-TEST_F(SpirvParserTest, VectorShuffle_BothVectors_A) {
+TEST_F(SpirvParserTest, VectorShuffle_AlternateVectors) {
EXPECT_IR(R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
@@ -1293,6 +1293,7 @@
%tmp1 = OpLoad %v4u32 %vec1
%tmp2 = OpLoad %v4u32 %vec2
%shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 5 2 7
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1303,18 +1304,19 @@
%3:ptr<function, vec4<u32>, read_write> = var undef
%4:vec4<u32> = load %2
%5:vec4<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %5, 1u
- %8:u32 = access %4, 2u
- %9:u32 = access %5, 3u
+ %6:u32 = swizzle %4, x
+ %7:u32 = swizzle %5, y
+ %8:u32 = swizzle %4, z
+ %9:u32 = swizzle %5, w
%10:vec4<u32> = construct %6, %7, %8, %9
+ %11:vec4<u32> = let %10
ret
}
}
)");
}
-TEST_F(SpirvParserTest, VectorShuffle_BothVectors_B) {
+TEST_F(SpirvParserTest, VectorShuffle_TwoFromA_Then_TwoFromB) {
EXPECT_IR(R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
@@ -1333,6 +1335,7 @@
%tmp1 = OpLoad %v4u32 %vec1
%tmp2 = OpLoad %v4u32 %vec2
%shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 2 5 7
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1343,11 +1346,51 @@
%3:ptr<function, vec4<u32>, read_write> = var undef
%4:vec4<u32> = load %2
%5:vec4<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %4, 2u
- %8:u32 = access %5, 1u
- %9:u32 = access %5, 3u
- %10:vec4<u32> = construct %6, %7, %8, %9
+ %6:vec2<u32> = swizzle %4, xz
+ %7:vec2<u32> = swizzle %5, yw
+ %8:vec4<u32> = construct %6, %7
+ %9:vec4<u32> = let %8
+ ret
+ }
+}
+)");
+}
+
+TEST_F(SpirvParserTest, VectorShuffle_OneFromA_Then_TwoFromB_Then_OneFromA) {
+ EXPECT_IR(R"(
+ OpCapability Shader
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main"
+ OpExecutionMode %main LocalSize 1 1 1
+ %void = OpTypeVoid
+ %u32 = OpTypeInt 32 0
+ %v4u32 = OpTypeVector %u32 4
+ %v4u32_ptr = OpTypePointer Function %v4u32
+ %ep_type = OpTypeFunction %void
+
+ %main = OpFunction %void None %ep_type
+ %main_start = OpLabel
+ %vec1 = OpVariable %v4u32_ptr Function
+ %vec2 = OpVariable %v4u32_ptr Function
+ %tmp1 = OpLoad %v4u32 %vec1
+ %tmp2 = OpLoad %v4u32 %vec2
+ %shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 5 6 3
+ %result = OpCopyObject %v4u32 %shuf
+ OpReturn
+ OpFunctionEnd
+ )",
+ R"(
+%main = @compute @workgroup_size(1u, 1u, 1u) func():void {
+ $B1: {
+ %2:ptr<function, vec4<u32>, read_write> = var undef
+ %3:ptr<function, vec4<u32>, read_write> = var undef
+ %4:vec4<u32> = load %2
+ %5:vec4<u32> = load %3
+ %6:u32 = swizzle %4, x
+ %7:vec2<u32> = swizzle %5, yz
+ %8:u32 = swizzle %4, w
+ %9:vec4<u32> = construct %6, %7, %8
+ %10:vec4<u32> = let %9
ret
}
}
@@ -1376,6 +1419,7 @@
%tmp1 = OpLoad %v2u32 %vec1
%tmp2 = OpLoad %v3u32 %vec2
%shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 2 1 4
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1386,11 +1430,12 @@
%3:ptr<function, vec3<u32>, read_write> = var undef
%4:vec2<u32> = load %2
%5:vec3<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %5, 0u
- %8:u32 = access %4, 1u
- %9:u32 = access %5, 2u
+ %6:u32 = swizzle %4, x
+ %7:u32 = swizzle %5, x
+ %8:u32 = swizzle %4, y
+ %9:u32 = swizzle %5, z
%10:vec4<u32> = construct %6, %7, %8, %9
+ %11:vec4<u32> = let %10
ret
}
}
@@ -1419,6 +1464,7 @@
%tmp1 = OpLoad %v3u32 %vec1
%tmp2 = OpLoad %v4u32 %vec2
%shuf = OpVectorShuffle %v2u32 %tmp1 %tmp2 0 4
+ %result = OpCopyObject %v2u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1429,9 +1475,10 @@
%3:ptr<function, vec4<u32>, read_write> = var undef
%4:vec3<u32> = load %2
%5:vec4<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %5, 1u
+ %6:u32 = swizzle %4, x
+ %7:u32 = swizzle %5, y
%8:vec2<u32> = construct %6, %7
+ %9:vec2<u32> = let %8
ret
}
}
@@ -1457,8 +1504,9 @@
%tmpA = OpLoad %v4u32 %vecA
%tmpB = OpLoad %v4u32 %vecB
%shuf = OpVectorShuffle %v4u32 %tmpA %tmpB 0 4294967295 6 3
- OpReturn
- OpFunctionEnd
+ %result = OpCopyObject %v4u32 %shuf
+ OpReturn
+ OpFunctionEnd
)",
R"(
%main = @compute @workgroup_size(1u, 1u, 1u) func():void {
@@ -1467,11 +1515,11 @@
%3:ptr<function, vec4<u32>, read_write> = var undef
%4:vec4<u32> = load %2
%5:vec4<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %4, 0u
- %8:u32 = access %5, 2u
- %9:u32 = access %4, 3u
- %10:vec4<u32> = construct %6, %7, %8, %9
+ %6:vec2<u32> = swizzle %4, xx
+ %7:u32 = swizzle %5, z
+ %8:u32 = swizzle %4, w
+ %9:vec4<u32> = construct %6, %7, %8
+ %10:vec4<u32> = let %9
ret
}
}
@@ -1500,6 +1548,7 @@
%tmp1 = OpLoad %v2u32 %vec1
%tmp2 = OpLoad %v3u32 %vec2
%shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 0 3 4 1
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1510,18 +1559,18 @@
%3:ptr<function, vec3<u32>, read_write> = var undef
%4:vec2<u32> = load %2
%5:vec3<u32> = load %3
- %6:u32 = access %4, 0u
- %7:u32 = access %5, 1u
- %8:u32 = access %5, 2u
- %9:u32 = access %4, 1u
- %10:vec4<u32> = construct %6, %7, %8, %9
+ %6:u32 = swizzle %4, x
+ %7:vec2<u32> = swizzle %5, yz
+ %8:u32 = swizzle %4, y
+ %9:vec4<u32> = construct %6, %7, %8
+ %10:vec4<u32> = let %9
ret
}
}
)");
}
-TEST_F(SpirvParserTest, VectorShuffle_Swizzle_FirstVector) {
+TEST_F(SpirvParserTest, VectorShuffle_AllFromA) {
EXPECT_IR(R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
@@ -1539,6 +1588,7 @@
%vec = OpVariable %v4u32_ptr Function
%tmp = OpLoad %v4u32 %vec
%shuf = OpVectorShuffle %v2u32 %tmp %tmp 0 2
+ %result = OpCopyObject %v2u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1548,13 +1598,14 @@
%2:ptr<function, vec4<u32>, read_write> = var undef
%3:vec4<u32> = load %2
%4:vec2<u32> = swizzle %3, xz
+ %5:vec2<u32> = let %4
ret
}
}
)");
}
-TEST_F(SpirvParserTest, VectorShuffle_Swizzle_SecondVector) {
+TEST_F(SpirvParserTest, VectorShuffle_AllFromB) {
EXPECT_IR(R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
@@ -1573,6 +1624,7 @@
%tmp1 = OpLoad %v4u32 %vec1
%tmp2 = OpLoad %v4u32 %vec2
%shuf = OpVectorShuffle %v4u32 %tmp1 %tmp2 4 5 6 7
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1584,6 +1636,7 @@
%4:vec4<u32> = load %2
%5:vec4<u32> = load %3
%6:vec4<u32> = swizzle %5, xyzw
+ %7:vec4<u32> = let %6
ret
}
}
@@ -1608,6 +1661,7 @@
%vec = OpVariable %v4u32_ptr Function
%tmp = OpLoad %v4u32 %vec
%shuf = OpVectorShuffle %v2u32 %tmp %tmp 2 0
+ %result = OpCopyObject %v2u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1617,6 +1671,7 @@
%2:ptr<function, vec4<u32>, read_write> = var undef
%3:vec4<u32> = load %2
%4:vec2<u32> = swizzle %3, zx
+ %5:vec2<u32> = let %4
ret
}
}
@@ -1641,6 +1696,7 @@
%vec = OpVariable %v2u32_ptr Function
%tmp = OpLoad %v2u32 %vec
%shuf = OpVectorShuffle %v4u32 %tmp %tmp 3 2 3 2
+ %result = OpCopyObject %v4u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1650,6 +1706,7 @@
%2:ptr<function, vec2<u32>, read_write> = var undef
%3:vec2<u32> = load %2
%4:vec4<u32> = swizzle %3, yxyx
+ %5:vec4<u32> = let %4
ret
}
}
@@ -1676,6 +1733,7 @@
%tmp = OpLoad %v2u32 %vec
%undef4 = OpUndef %v4u32
%shuf = OpVectorShuffle %v2u32 %undef4 %tmp 4 5
+ %result = OpCopyObject %v2u32 %shuf
OpReturn
OpFunctionEnd
)",
@@ -1685,6 +1743,7 @@
%2:ptr<function, vec2<u32>, read_write> = var undef
%3:vec2<u32> = load %2
%4:vec2<u32> = swizzle %3, xy
+ %5:vec2<u32> = let %4
ret
}
}
diff --git a/src/tint/lang/spirv/reader/parser/parser.cc b/src/tint/lang/spirv/reader/parser/parser.cc
index 2704f96..6a3a41b 100644
--- a/src/tint/lang/spirv/reader/parser/parser.cc
+++ b/src/tint/lang/spirv/reader/parser/parser.cc
@@ -4201,69 +4201,67 @@
auto* vector1 = Value(inst.GetSingleWordOperand(2));
auto* vector2 = Value(inst.GetSingleWordOperand(3));
auto* result_ty = Type(inst.type_id());
+ auto* el_ty = result_ty->DeepestElement();
uint32_t n1 = vector1->Type()->As<core::type::Vector>()->Width();
uint32_t n2 = vector2->Type()->As<core::type::Vector>()->Width();
- Vector<uint32_t, 4> literals;
+ Vector<core::ir::Value*, 4> swizzles;
+
+ // Track the current swizzle that we are building from consecutive indices that fall within
+ // the same vector.
+ Vector<uint32_t, 4> current_indices;
+ core::ir::Value* current_vector = nullptr;
+
+ // Emit the current swizzle that we have constructed so far, and add it to the list.
+ auto flush_swizzle = [&] {
+ if (current_vector == nullptr) {
+ return;
+ }
+ auto* swizzle_type = ty_.MatchWidth(el_ty, current_indices.Length());
+ auto* swizzle = b_.Swizzle(swizzle_type, current_vector, current_indices);
+ EmitWithoutSpvResult(swizzle);
+ swizzles.Push(swizzle->Result());
+ current_indices.Clear();
+ };
+
for (uint32_t i = 4; i < inst.NumOperandWords(); i++) {
- literals.Push(inst.GetSingleWordOperand(i));
- }
+ uint32_t literal = inst.GetSingleWordOperand(i);
- // Check if all literals fall entirely within `vector1` or `vector2`,
- // which would allow us to use a single-vector swizzle.
- bool swizzle_from_vector1_only = true;
- bool swizzle_from_vector2_only = true;
- for (auto& literal : literals) {
- if (literal == ~0u) {
- // A `0xFFFFFFFF` literal represents an undefined index,
- // fallback to first index.
- literal = 0;
- }
- if (literal >= n1) {
- swizzle_from_vector1_only = false;
- }
- if (literal < n1) {
- swizzle_from_vector2_only = false;
- }
- }
-
- // If only one vector is used, we can swizzle it.
- if (swizzle_from_vector1_only) {
- // Indices are already within `[0, n1)`, as expected by `Swizzle` IR
- // for `vector1`.
- Emit(b_.Swizzle(result_ty, vector1, literals), inst.result_id());
- return;
- }
- if (swizzle_from_vector2_only) {
- // Map logical concatenated indices' range `[n1, n1 + n2)` into the range
- // `[0, n2)`, as expected by `Swizzle` IR for `vector2`.
- for (auto& literal : literals) {
- literal -= n1;
- }
- Emit(b_.Swizzle(result_ty, vector2, literals), inst.result_id());
- return;
- }
-
- // Swizzle is not possible, construct the result vector out of elements
- // from both vectors.
- auto* element_ty = vector1->Type()->DeepestElement();
- Vector<core::ir::Value*, 4> result;
- for (auto idx : literals) {
- TINT_ASSERT(idx < n1 + n2);
-
- if (idx < n1) {
- auto* access_inst = b_.Access(element_ty, vector1, b_.Constant(u32(idx)));
- EmitWithoutSpvResult(access_inst);
- result.Push(access_inst->Result());
+ // Determine which vector this index falls within.
+ uint32_t next_index;
+ core::ir::Value* next_vector = nullptr;
+ if (literal == 0xFFFFFFFF) {
+ // Undefined component, so just use the first component of the first vector.
+ next_vector = vector1;
+ next_index = 0;
+ } else if (literal < n1) {
+ next_vector = vector1;
+ next_index = literal;
+ } else if (literal < n1 + n2) {
+ next_vector = vector2;
+ next_index = literal - n1;
} else {
- auto* access_inst = b_.Access(element_ty, vector2, b_.Constant(u32(idx - n1)));
- EmitWithoutSpvResult(access_inst);
- result.Push(access_inst->Result());
+ TINT_ICE() << "invalid vector shuffle index";
}
+
+ // If the vector has changed from the previous index, flush the swizzle.
+ if (next_vector != current_vector) {
+ flush_swizzle();
+ }
+ current_vector = next_vector;
+ current_indices.Push(next_index);
}
- Emit(b_.Construct(result_ty, result), inst.result_id());
+ flush_swizzle();
+
+ if (swizzles.Length() == 1) {
+ // There was only one swizzle, so we can just use it directly.
+ AddValue(inst.result_id(), swizzles[0]);
+ } else {
+ // There were multiple swizzles, so we combine them all to produce the final result.
+ Emit(b_.Construct(result_ty, swizzles), inst.result_id());
+ }
}
/// @param inst the SPIR-V instruction for OpFunctionCall