msl:ast_printer: Polyfill 4x8 int dot products on AMD RDNA1, RDNA2

Remove end2end test suppression for Metal AMD

The IR MSL backend emits code similar to the non-polyfilled AST backend.
That may need reworking.

Fixed: chromium:355485146
Change-Id: I815c33f6768d5db4706e441d5f5d059f46b203cc
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/202054
Reviewed-by: James Price <jrprice@google.com>
Auto-Submit: David Neto <dneto@google.com>
Commit-Queue: James Price <jrprice@google.com>
diff --git a/src/dawn/native/metal/PhysicalDeviceMTL.mm b/src/dawn/native/metal/PhysicalDeviceMTL.mm
index dfe472b..129ff51 100644
--- a/src/dawn/native/metal/PhysicalDeviceMTL.mm
+++ b/src/dawn/native/metal/PhysicalDeviceMTL.mm
@@ -547,6 +547,14 @@
         deviceToggles->Default(
             Toggle::MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats, true);
     }
+
+    // Packed 4x8 integer dot products fail on Macbook Pro 16" with AMD Radeon Pro 5300M,
+    // which are the RDNA1 architecture.
+    // Conservatively, polyfill these functions on RDNA1 and RDNA2.
+    // crbug.com/355485146
+    if (gpu_info::IsAMDRDNA1(vendorId, deviceId) || gpu_info::IsAMDRDNA2(vendorId, deviceId)) {
+        deviceToggles->Default(Toggle::PolyFillPacked4x8DotProduct, true);
+    }
 #endif
 }
 
diff --git a/src/dawn/native/metal/ShaderModuleMTL.mm b/src/dawn/native/metal/ShaderModuleMTL.mm
index 0151a18..6e087e8 100644
--- a/src/dawn/native/metal/ShaderModuleMTL.mm
+++ b/src/dawn/native/metal/ShaderModuleMTL.mm
@@ -290,6 +290,8 @@
     req.use_tint_ir = device->IsToggleEnabled(Toggle::UseTintIR);
     req.tintOptions.disable_polyfill_integer_div_mod =
         device->IsToggleEnabled(Toggle::DisablePolyfillsOnIntegerDivisonAndModulo);
+    req.tintOptions.polyfill_dot_4x8_packed =
+        device->IsToggleEnabled(Toggle::PolyFillPacked4x8DotProduct);
 
     const CombinedLimits& limits = device->GetLimits();
     req.limits = LimitsForCompilationRequest::Create(limits.v1);
diff --git a/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp b/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
index 1c8d76c..d6433b1 100644
--- a/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
+++ b/src/dawn/tests/end2end/Packed4x8IntegerDotProductTests.cpp
@@ -36,9 +36,6 @@
 class Packed4x8IntegerDotProductTests : public DawnTest {};
 
 TEST_P(Packed4x8IntegerDotProductTests, Dot4x8Packed) {
-    // crbug.com/355485146.
-    DAWN_SUPPRESS_TEST_IF(IsMacOS() && IsAMD() && IsMetal());
-
     const char* computeShader = R"(
         struct Buf {
             data1 : i32,
diff --git a/src/tint/lang/msl/writer/ast_printer/ast_printer.cc b/src/tint/lang/msl/writer/ast_printer/ast_printer.cc
index 18c49ea..2fec05f 100644
--- a/src/tint/lang/msl/writer/ast_printer/ast_printer.cc
+++ b/src/tint/lang/msl/writer/ast_printer/ast_printer.cc
@@ -201,6 +201,7 @@
         polyfills.sign_int = true;
         polyfills.texture_sample_base_clamp_to_edge_2d_f32 = true;
         polyfills.workgroup_uniform_load = true;
+        polyfills.dot_4x8_packed = options.polyfill_dot_4x8_packed;
         polyfills.pack_unpack_4x8 = true;
         polyfills.pack_4xu8_clamp = true;
         data.Add<ast::transform::BuiltinPolyfill::Config>(polyfills);
diff --git a/src/tint/lang/msl/writer/ast_printer/builtin_test.cc b/src/tint/lang/msl/writer/ast_printer/builtin_test.cc
index 8837a20..0539763 100644
--- a/src/tint/lang/msl/writer/ast_printer/builtin_test.cc
+++ b/src/tint/lang/msl/writer/ast_printer/builtin_test.cc
@@ -1142,5 +1142,125 @@
 )");
 }
 
+TEST_F(MslASTPrinterTest, PolyfillDot4I8Packed_False) {
+    WrapInFunction(Decl(Let("zero", Expr(0_u))),  //
+                   Decl(Let("v", Call("dot4I8Packed", "zero", Expr(1_u)))));
+
+    Options options;
+    options.polyfill_dot_4x8_packed = false;
+    ASTPrinter& gen = SanitizeAndBuild(options);
+
+    ASSERT_TRUE(gen.Generate()) << gen.Diagnostics();
+    EXPECT_EQ(gen.Result(), R"(#include <metal_stdlib>
+
+using namespace metal;
+
+int tint_dot4I8Packed(uint param_0, uint param_1) {
+  char4 vec1 = as_type<char4>(param_0);
+  char4 vec2 = as_type<char4>(param_1);
+  return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2] + vec1[3] * vec2[3];
+}
+
+kernel void test_function() {
+  uint const zero = 0u;
+  int const v = tint_dot4I8Packed(zero, 1u);
+  return;
+}
+
+)");
+}
+
+TEST_F(MslASTPrinterTest, PolyfillDot4I8Packed_True) {
+    WrapInFunction(Decl(Let("zero", Expr(0_u))),  //
+                   Decl(Let("v", Call("dot4I8Packed", "zero", Expr(1_u)))));
+
+    Options options;
+    options.polyfill_dot_4x8_packed = true;
+    ASTPrinter& gen = SanitizeAndBuild(options);
+
+    ASSERT_TRUE(gen.Generate()) << gen.Diagnostics();
+    EXPECT_EQ(gen.Result(), R"(#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T>
+T tint_dot4(vec<T,4> a, vec<T,4> b) {
+  return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
+}
+int tint_dot4_i8_packed(uint a, uint b) {
+  int4 const a_i8 = (as_type<int4>((uint4(a) << uint4(24u, 16u, 8u, 0u))) >> uint4(24u));
+  int4 const b_i8 = (as_type<int4>((uint4(b) << uint4(24u, 16u, 8u, 0u))) >> uint4(24u));
+  return tint_dot4(a_i8, b_i8);
+}
+
+kernel void test_function() {
+  uint const zero = 0u;
+  int const v = tint_dot4_i8_packed(zero, 1u);
+  return;
+}
+
+)");
+}
+
+TEST_F(MslASTPrinterTest, PolyfillDot4U8Packed_False) {
+    WrapInFunction(Decl(Let("zero", Expr(0_u))),  //
+                   Decl(Let("v", Call("dot4U8Packed", "zero", Expr(1_u)))));
+
+    Options options;
+    options.polyfill_dot_4x8_packed = false;
+    ASTPrinter& gen = SanitizeAndBuild(options);
+
+    ASSERT_TRUE(gen.Generate()) << gen.Diagnostics();
+    EXPECT_EQ(gen.Result(), R"(#include <metal_stdlib>
+
+using namespace metal;
+
+uint tint_dot4U8Packed(uint param_0, uint param_1) {
+  uchar4 vec1 = as_type<uchar4>(param_0);
+  uchar4 vec2 = as_type<uchar4>(param_1);
+  return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2] + vec1[3] * vec2[3];
+}
+
+kernel void test_function() {
+  uint const zero = 0u;
+  uint const v = tint_dot4U8Packed(zero, 1u);
+  return;
+}
+
+)");
+}
+
+TEST_F(MslASTPrinterTest, PolyfillDot4U8Packed_True) {
+    WrapInFunction(Decl(Let("zero", Expr(0_u))),  //
+                   Decl(Let("v", Call("dot4U8Packed", "zero", Expr(1_u)))));
+
+    Options options;
+    options.polyfill_dot_4x8_packed = true;
+    ASTPrinter& gen = SanitizeAndBuild(options);
+
+    ASSERT_TRUE(gen.Generate()) << gen.Diagnostics();
+    EXPECT_EQ(gen.Result(), R"(#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T>
+T tint_dot4(vec<T,4> a, vec<T,4> b) {
+  return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
+}
+uint tint_dot4_u8_packed(uint a, uint b) {
+  uint4 const a_u8 = ((uint4(a) >> uint4(24u, 16u, 8u, 0u)) & uint4(255u));
+  uint4 const b_u8 = ((uint4(b) >> uint4(24u, 16u, 8u, 0u)) & uint4(255u));
+  return tint_dot4(a_u8, b_u8);
+}
+
+kernel void test_function() {
+  uint const zero = 0u;
+  uint const v = tint_dot4_u8_packed(zero, 1u);
+  return;
+}
+
+)");
+}
+
 }  // namespace
 }  // namespace tint::msl::writer
diff --git a/src/tint/lang/msl/writer/common/options.h b/src/tint/lang/msl/writer/common/options.h
index bd783c9..fb57e0d 100644
--- a/src/tint/lang/msl/writer/common/options.h
+++ b/src/tint/lang/msl/writer/common/options.h
@@ -166,6 +166,9 @@
     /// The bindings
     Bindings bindings;
 
+    /// Set to `true` to polyfill dot4I8Packed() dot4U8Packed().
+    bool polyfill_dot_4x8_packed = false;
+
     /// Reflect the fields of this class so that it can be used by tint::ForeachField()
     TINT_REFLECT(Options,
                  disable_robustness,
@@ -176,7 +179,8 @@
                  fixed_sample_mask,
                  pixel_local_attachments,
                  array_length_from_uniform,
-                 bindings);
+                 bindings,
+                 polyfill_dot_4x8_packed);
 };
 
 }  // namespace tint::msl::writer