tint: fix translation of DP4a on HLSL
This patch adds a workaround on a DXC error in the translation of DP4a
functions that the third parameter "acc" of both dot4add_i8packed() and
dot4add_u8packed() can only be a variable, or a compile error will be
generated.
Bug: tint:1497
Test: tint_unittests
Change-Id: I263d27fb18bd354e0c9110f60cbc98860cf7afe5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/90027
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc
index fb4efb6..b7020f6 100644
--- a/src/tint/writer/hlsl/generator_impl.cc
+++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -2045,9 +2045,11 @@
std::string functionName;
switch (builtin->Type()) {
case sem::BuiltinType::kDot4I8Packed:
+ line(b) << "int accumulator = 0;";
functionName = "dot4add_i8packed";
break;
case sem::BuiltinType::kDot4U8Packed:
+ line(b) << "uint accumulator = 0u;";
functionName = "dot4add_u8packed";
break;
default:
@@ -2056,7 +2058,7 @@
return false;
}
line(b) << "return " << functionName << "(" << params[0] << ", " << params[1]
- << ", 0);";
+ << ", accumulator);";
return true;
});
diff --git a/src/tint/writer/hlsl/generator_impl_builtin_test.cc b/src/tint/writer/hlsl/generator_impl_builtin_test.cc
index 64ef740..6c573ff 100644
--- a/src/tint/writer/hlsl/generator_impl_builtin_test.cc
+++ b/src/tint/writer/hlsl/generator_impl_builtin_test.cc
@@ -741,7 +741,8 @@
ASSERT_TRUE(gen.Generate()) << gen.error();
EXPECT_EQ(gen.result(), R"(int tint_dot4I8Packed(uint param_0, uint param_1) {
- return dot4add_i8packed(param_0, param_1, 0);
+ int accumulator = 0;
+ return dot4add_i8packed(param_0, param_1, accumulator);
}
[numthreads(1, 1, 1)]
@@ -769,7 +770,8 @@
ASSERT_TRUE(gen.Generate()) << gen.error();
EXPECT_EQ(gen.result(), R"(uint tint_dot4U8Packed(uint param_0, uint param_1) {
- return dot4add_u8packed(param_0, param_1, 0);
+ uint accumulator = 0u;
+ return dot4add_u8packed(param_0, param_1, accumulator);
}
[numthreads(1, 1, 1)]