[ir]: polyfill dot4I8Packed, dot4UPacked when needed
Enable packed_4x8_integer_dot_product end2end tests for Android
Bug: tint:1497
Change-Id: Ie59e3ae541329c2c6883f2a943b6cc944e36abb5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/168684
Commit-Queue: David Neto <dneto@google.com>
Reviewed-by: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp b/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
index 556c851..e187d72 100644
--- a/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
+++ b/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
@@ -36,10 +36,6 @@
class Packed4x8IntegerDotProductTests : public DawnTest {};
TEST_P(Packed4x8IntegerDotProductTests, Dot4x8Packed) {
- // TODO(tint:1497): investigate why the creation of compute pipeline with dot4{U|I}8Packed()
- // fails on Pixel 4
- DAWN_SUPPRESS_TEST_IF(IsAndroid());
-
const char* computeShader = R"(
struct Buf {
data1 : i32,
@@ -105,10 +101,6 @@
}
TEST_P(Packed4x8IntegerDotProductTests, Pack4x8) {
- // TODO(tint:1497): investigate why the creation of compute pipeline with pack4xI8(),
- // pack4xU8(), pack4xI8Clamp() or pack4xU8Clamp() fails on Pixel 6
- DAWN_SUPPRESS_TEST_IF(IsAndroid());
-
const char* computeShader = R"(
struct Buf {
data1 : u32,
@@ -181,10 +173,6 @@
}
TEST_P(Packed4x8IntegerDotProductTests, Unpack4x8) {
- // TODO(tint:1497): investigate why the creation of compute pipeline with unpack4xI8() or
- // unpack4xU8() fails on Pixel 6
- DAWN_SUPPRESS_TEST_IF(IsAndroid());
-
const char* computeShader = R"(
struct Buf {
data1 : vec4i,
diff --git a/src/tint/lang/core/ir/transform/builtin_polyfill.cc b/src/tint/lang/core/ir/transform/builtin_polyfill.cc
index e5c506e..4ff5cbf 100644
--- a/src/tint/lang/core/ir/transform/builtin_polyfill.cc
+++ b/src/tint/lang/core/ir/transform/builtin_polyfill.cc
@@ -119,6 +119,13 @@
}
}
break;
+ case core::BuiltinFn::kDot4U8Packed:
+ case core::BuiltinFn::kDot4I8Packed: {
+ if (config.dot_4x8_packed) {
+ worklist.Push(builtin);
+ }
+ break;
+ }
case core::BuiltinFn::kPack4XI8:
case core::BuiltinFn::kPack4XU8:
case core::BuiltinFn::kPack4XI8Clamp:
@@ -167,6 +174,12 @@
case core::BuiltinFn::kTextureSampleBaseClampToEdge:
replacement = TextureSampleBaseClampToEdge_2d_f32(builtin);
break;
+ case core::BuiltinFn::kDot4I8Packed:
+ replacement = Dot4I8Packed(builtin);
+ break;
+ case core::BuiltinFn::kDot4U8Packed:
+ replacement = Dot4U8Packed(builtin);
+ break;
case core::BuiltinFn::kPack4XI8:
replacement = Pack4xI8(builtin);
break;
@@ -601,6 +614,44 @@
return result;
}
+ /// Polyfill a `dot4I8Packed()` builtin call
+ /// @param call the builtin call instruction
+ /// @returns the replacement value
+ ir::Value* Dot4I8Packed(ir::CoreBuiltinCall* call) {
+ // Replace `dot4I8Packed(%x,%y)` with:
+ // %unpacked_x = unpack4xI8(%x);
+ // %unpacked_y = unpack4xI8(%y);
+ // %result = dot(%unpacked_x, %unpacked_y);
+ auto* x = call->Args()[0];
+ auto* y = call->Args()[1];
+ auto* unpacked_x = Unpack4xI8OnValue(call, x);
+ auto* unpacked_y = Unpack4xI8OnValue(call, y);
+ ir::Value* result = nullptr;
+ b.InsertBefore(call, [&] {
+ result = b.Call(ty.i32(), core::BuiltinFn::kDot, unpacked_x, unpacked_y)->Result(0);
+ });
+ return result;
+ }
+
+ /// Polyfill a `dot4U8Packed()` builtin call
+ /// @param call the builtin call instruction
+ /// @returns the replacement value
+ ir::Value* Dot4U8Packed(ir::CoreBuiltinCall* call) {
+ // Replace `dot4U8Packed(%x,%y)` with:
+ // %unpacked_x = unpack4xU8(%x);
+ // %unpacked_y = unpack4xU8(%y);
+ // %result = dot(%unpacked_x, %unpacked_y);
+ auto* x = call->Args()[0];
+ auto* y = call->Args()[1];
+ auto* unpacked_x = Unpack4xU8OnValue(call, x);
+ auto* unpacked_y = Unpack4xU8OnValue(call, y);
+ ir::Value* result = nullptr;
+ b.InsertBefore(call, [&] {
+ result = b.Call(ty.u32(), core::BuiltinFn::kDot, unpacked_x, unpacked_y)->Result(0);
+ });
+ return result;
+ }
+
/// Polyfill a `pack4xI8()` builtin call
/// @param call the builtin call instruction
/// @returns the replacement value
@@ -713,17 +764,16 @@
return result;
}
- /// Polyfill a `unpack4xI8()` builtin call
- /// @param call the builtin call instruction
- /// @returns the replacement value
- ir::Value* Unpack4xI8(ir::CoreBuiltinCall* call) {
+ /// Emit code for `unpack4xI8` on u32 value `x`, before the given call.
+ /// @param call the instruction that should follow the emitted code
+ /// @param x the u32 value to be unpacked
+ ir::Value* Unpack4xI8OnValue(ir::CoreBuiltinCall* call, ir::Value* x) {
// Replace `unpack4xI8(%x)` with:
// %n = vec4u(24, 16, 8, 0);
- // %x_splat = vec4u(x); // splat the scalar to a vector
+ // %x_splat = vec4u(%x); // splat the scalar to a vector
// %x_vec4i = bitcast<vec4i>(%x_splat << n);
// %result = %x_vec4i >> vec4u(24);
ir::Value* result = nullptr;
- auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4i = ty.vec4<i32>();
auto* vec4u = ty.vec4<u32>();
@@ -738,17 +788,23 @@
return result;
}
- /// Polyfill a `unpack4xU8()` builtin call
+ /// Polyfill a `unpack4xI8()` builtin call
/// @param call the builtin call instruction
/// @returns the replacement value
- ir::Value* Unpack4xU8(ir::CoreBuiltinCall* call) {
+ ir::Value* Unpack4xI8(ir::CoreBuiltinCall* call) {
+ return Unpack4xI8OnValue(call, call->Args()[0]);
+ }
+
+ /// Emit code for `unpack4xU8` on u32 value `x`, before the given call.
+ /// @param call the instruction that should follow the emitted code
+ /// @param x the u32 value to be unpacked
+ ir::Value* Unpack4xU8OnValue(ir::CoreBuiltinCall* call, ir::Value* x) {
// Replace `unpack4xU8(%x)` with:
// %n = vec4u(0, 8, 16, 24);
- // %x_splat = vec4u(x); // splat the scalar to a vector
+ // %x_splat = vec4u(%x); // splat the scalar to a vector
// %x_vec4u = %x_splat >> n;
// %result = %x_vec4u & vec4u(0xff);
ir::Value* result = nullptr;
- auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4u = ty.vec4<u32>();
@@ -760,6 +816,13 @@
});
return result;
}
+
+ /// Polyfill a `unpack4xU8()` builtin call
+ /// @param call the builtin call instruction
+ /// @returns the replacement value
+ ir::Value* Unpack4xU8(ir::CoreBuiltinCall* call) {
+ return Unpack4xU8OnValue(call, call->Args()[0]);
+ }
};
} // namespace
diff --git a/src/tint/lang/core/ir/transform/builtin_polyfill.h b/src/tint/lang/core/ir/transform/builtin_polyfill.h
index 2f90486..cb3c054 100644
--- a/src/tint/lang/core/ir/transform/builtin_polyfill.h
+++ b/src/tint/lang/core/ir/transform/builtin_polyfill.h
@@ -69,6 +69,8 @@
bool saturate = false;
/// Should `textureSampleBaseClampToEdge()` be polyfilled for texture_2d<f32> textures?
bool texture_sample_base_clamp_to_edge_2d_f32 = false;
+ /// Should `dot4U8Packed()` and `dot4I8Packed()` be polyfilled?
+ bool dot_4x8_packed = false;
/// Should `pack4xI8()` and `pack4xU8()` be polyfilled?
bool pack_unpack_4x8 = false;
};
diff --git a/src/tint/lang/core/ir/transform/builtin_polyfill_test.cc b/src/tint/lang/core/ir/transform/builtin_polyfill_test.cc
index 29c7afd..9dfaee0 100644
--- a/src/tint/lang/core/ir/transform/builtin_polyfill_test.cc
+++ b/src/tint/lang/core/ir/transform/builtin_polyfill_test.cc
@@ -1622,5 +1622,85 @@
EXPECT_EQ(expect, str());
}
+TEST_F(IR_BuiltinPolyfillTest, Dot4I8Packed) {
+ Build(core::BuiltinFn::kDot4I8Packed, ty.i32(), Vector{ty.u32(), ty.u32()});
+
+ auto* src = R"(
+%foo = func(%arg:u32, %arg_1:u32):i32 -> %b1 { # %arg_1: 'arg'
+ %b1 = block {
+ %result:i32 = dot4I8Packed %arg, %arg_1
+ ret %result
+ }
+}
+)";
+ EXPECT_EQ(src, str());
+
+ auto* expect = R"(
+%foo = func(%arg:u32, %arg_1:u32):i32 -> %b1 { # %arg_1: 'arg'
+ %b1 = block {
+ %4:vec4<u32> = construct 24u, 16u, 8u, 0u
+ %5:vec4<u32> = construct %arg
+ %6:vec4<u32> = shl %5, %4
+ %7:vec4<i32> = bitcast %6
+ %8:vec4<u32> = construct 24u
+ %9:vec4<i32> = shr %7, %8
+ %10:vec4<u32> = construct 24u, 16u, 8u, 0u
+ %11:vec4<u32> = construct %arg_1
+ %12:vec4<u32> = shl %11, %10
+ %13:vec4<i32> = bitcast %12
+ %14:vec4<u32> = construct 24u
+ %15:vec4<i32> = shr %13, %14
+ %result:i32 = dot %9, %15
+ ret %result
+ }
+}
+)";
+
+ BuiltinPolyfillConfig config;
+ config.dot_4x8_packed = true;
+ Run(BuiltinPolyfill, config);
+
+ EXPECT_EQ(expect, str());
+}
+
+TEST_F(IR_BuiltinPolyfillTest, Dot4U8Packed) {
+ Build(core::BuiltinFn::kDot4U8Packed, ty.u32(), Vector{ty.u32(), ty.u32()});
+
+ auto* src = R"(
+%foo = func(%arg:u32, %arg_1:u32):u32 -> %b1 { # %arg_1: 'arg'
+ %b1 = block {
+ %result:u32 = dot4U8Packed %arg, %arg_1
+ ret %result
+ }
+}
+)";
+ EXPECT_EQ(src, str());
+
+ auto* expect = R"(
+%foo = func(%arg:u32, %arg_1:u32):u32 -> %b1 { # %arg_1: 'arg'
+ %b1 = block {
+ %4:vec4<u32> = construct 0u, 8u, 16u, 24u
+ %5:vec4<u32> = construct %arg
+ %6:vec4<u32> = shr %5, %4
+ %7:vec4<u32> = construct 255u
+ %8:vec4<u32> = and %6, %7
+ %9:vec4<u32> = construct 0u, 8u, 16u, 24u
+ %10:vec4<u32> = construct %arg_1
+ %11:vec4<u32> = shr %10, %9
+ %12:vec4<u32> = construct 255u
+ %13:vec4<u32> = and %11, %12
+ %result:u32 = dot %8, %13
+ ret %result
+ }
+}
+)";
+
+ BuiltinPolyfillConfig config;
+ config.dot_4x8_packed = true;
+ Run(BuiltinPolyfill, config);
+
+ EXPECT_EQ(expect, str());
+}
+
} // namespace
} // namespace tint::core::ir::transform
diff --git a/src/tint/lang/spirv/writer/raise/raise.cc b/src/tint/lang/spirv/writer/raise/raise.cc
index 095e47e..bfa4285 100644
--- a/src/tint/lang/spirv/writer/raise/raise.cc
+++ b/src/tint/lang/spirv/writer/raise/raise.cc
@@ -86,6 +86,7 @@
core_polyfills.insert_bits = core::ir::transform::BuiltinPolyfillLevel::kClampOrRangeCheck;
core_polyfills.saturate = true;
core_polyfills.texture_sample_base_clamp_to_edge_2d_f32 = true;
+ core_polyfills.dot_4x8_packed = options.polyfill_dot_4x8_packed;
core_polyfills.pack_unpack_4x8 = true;
RUN_TRANSFORM(core::ir::transform::BuiltinPolyfill, module, core_polyfills);