[tint] Polyfill subgroupShuffle id param range (clamped)

Bug: 435246627
Change-Id: I1cd5aba1760881109ecba92500effffe694bab70
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/255322
Commit-Queue: Peter McNeeley <petermcneeley@google.com>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index 8e72dea..52df799 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -575,6 +575,10 @@
     {Toggle::ScalarizeMaxMinClamp,
      {"scalarize_max_min_clamp", "Scalarize max, min, and clamp builtins.",
       "https://crbug.com/422144514", ToggleStage::Device}},
+    {Toggle::SubgroupShuffleClamped,
+     {"subgroup_shuffle_clamped",
+      "Polyfill subgroupShuffle by clamping the id param to within maximum possible subgroup size.",
+      "https://crbug.com/dawn/2502", ToggleStage::Device}},
     {Toggle::MetalDisableModuleConstantF16,
      {"metal_disable_module_constant_f16",
       "Disable module constant hoisting for values that contain f16 types.",
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 82e81b3..980aead 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -140,6 +140,7 @@
     ExposeWGSLExperimentalFeatures,
     DisablePolyfillsOnIntegerDivisonAndModulo,
     ScalarizeMaxMinClamp,
+    SubgroupShuffleClamped,
     MetalDisableModuleConstantF16,
     EnableImmediateErrorHandling,
     VulkanUseStorageInputOutput16,
diff --git a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
index bfbdfde..d471d6a 100644
--- a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
+++ b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
@@ -920,6 +920,12 @@
         deviceToggles->Default(Toggle::VulkanDirectVariableAccessTransformHandle, true);
     }
 
+    if (IsIntelMesa()) {
+        // Polyfill a clamp of `id` param in subgroupShuffle to follow spec limitations.
+        // See crbug.com/435246627
+        deviceToggles->Default(Toggle::SubgroupShuffleClamped, true);
+    }
+
     if (IsIntelMesa() && gpu_info::IsIntelGen12LP(GetVendorId(), GetDeviceId())) {
         // dawn:1688: Intel Mesa driver has a bug about reusing the VkDeviceMemory that was
         // previously bound to a 2D VkImage. To work around that bug we have to disable the resource
diff --git a/src/dawn/native/vulkan/ShaderModuleVk.cpp b/src/dawn/native/vulkan/ShaderModuleVk.cpp
index 6d5df11..cb13755 100644
--- a/src/dawn/native/vulkan/ShaderModuleVk.cpp
+++ b/src/dawn/native/vulkan/ShaderModuleVk.cpp
@@ -281,6 +281,8 @@
         GetDevice()->IsToggleEnabled(Toggle::DisablePolyfillsOnIntegerDivisonAndModulo);
     req.tintOptions.scalarize_max_min_clamp =
         GetDevice()->IsToggleEnabled(Toggle::ScalarizeMaxMinClamp);
+    req.tintOptions.subgroup_shuffle_clamped =
+        GetDevice()->IsToggleEnabled(Toggle::SubgroupShuffleClamped);
     req.tintOptions.use_vulkan_memory_model =
         GetDevice()->IsToggleEnabled(Toggle::UseVulkanMemoryModel);
     req.tintOptions.spirv_version = GetDevice()->IsToggleEnabled(Toggle::UseSpirv14)
diff --git a/src/tint/lang/spirv/writer/common/options.h b/src/tint/lang/spirv/writer/common/options.h
index ba8deb9e0..8b4bc8b 100644
--- a/src/tint/lang/spirv/writer/common/options.h
+++ b/src/tint/lang/spirv/writer/common/options.h
@@ -208,6 +208,10 @@
     /// `unpack4x8unorm` builtins
     bool polyfill_pack_unpack_4x8_norm = false;
 
+    /// Set to `true` to generate a polyfill clamp of `id` param of subgroupShuffle to within the
+    /// spec max subgroup size.
+    bool subgroup_shuffle_clamped = false;
+
     /// Set to `true` to disable the polyfills on integer division and modulo.
     bool disable_polyfill_integer_div_mod = false;
 
@@ -244,6 +248,7 @@
                  pass_matrix_by_pointer,
                  polyfill_dot_4x8_packed,
                  polyfill_pack_unpack_4x8_norm,
+                 subgroup_shuffle_clamped,
                  disable_polyfill_integer_div_mod,
                  scalarize_max_min_clamp,
                  use_vulkan_memory_model,
diff --git a/src/tint/lang/spirv/writer/raise/builtin_polyfill.cc b/src/tint/lang/spirv/writer/raise/builtin_polyfill.cc
index 026db12..edfeb8c 100644
--- a/src/tint/lang/spirv/writer/raise/builtin_polyfill.cc
+++ b/src/tint/lang/spirv/writer/raise/builtin_polyfill.cc
@@ -48,6 +48,7 @@
 #include "src/tint/lang/spirv/ir/literal_operand.h"
 #include "src/tint/lang/spirv/type/sampled_image.h"
 #include "src/tint/utils/ice/ice.h"
+#include "src/tint/utils/internal_limits.h"
 
 using namespace tint::core::number_suffixes;  // NOLINT
 using namespace tint::core::fluent_types;     // NOLINT
@@ -280,7 +281,7 @@
                     SubgroupBroadcast(builtin);
                     break;
                 case core::BuiltinFn::kSubgroupShuffle:
-                    SubgroupShuffle(builtin);
+                    SubgroupShuffle(builtin, config.subgroup_shuffle_clamped);
                     break;
                 case core::BuiltinFn::kTextureDimensions:
                     TextureDimensions(builtin);
@@ -1093,7 +1094,7 @@
 
     /// Handle a SubgroupShuffle() builtin.
     /// @param builtin the builtin call instruction
-    void SubgroupShuffle(core::ir::CoreBuiltinCall* builtin) {
+    void SubgroupShuffle(core::ir::CoreBuiltinCall* builtin, bool clamp_subgroup_shuffle) {
         TINT_ASSERT(builtin->Args().Length() == 2);
         auto* id = builtin->Args()[1];
 
@@ -1103,6 +1104,17 @@
             cast->InsertBefore(builtin);
             builtin->SetArg(1, cast->Result());
         }
+
+        /// Polyfill a `subgroupShuffle()` builtin call with one that has clamped the 'id' param
+        if (clamp_subgroup_shuffle) {
+            auto* shuffle_id = builtin->Args()[1];
+            auto* mask_max_subgroup_size =
+                b.Constant(core::u32(tint::internal_limits::kMaxSubgroupSize - 1));
+            b.InsertBefore(builtin, [&] {
+                auto* clamp_via_masking_and = b.And<u32>(shuffle_id, mask_max_subgroup_size);
+                builtin->SetArg(1, clamp_via_masking_and->Result());
+            });
+        }
     }
 
     /// Handle a SubgroupBroadcast() builtin.
diff --git a/src/tint/lang/spirv/writer/raise/builtin_polyfill.h b/src/tint/lang/spirv/writer/raise/builtin_polyfill.h
index 98b063d..457e8d0 100644
--- a/src/tint/lang/spirv/writer/raise/builtin_polyfill.h
+++ b/src/tint/lang/spirv/writer/raise/builtin_polyfill.h
@@ -42,6 +42,7 @@
 struct PolyfillConfig {
     bool use_vulkan_memory_model = false;
     SpvVersion version = SpvVersion::kSpv13;
+    bool subgroup_shuffle_clamped = false;
 };
 
 /// BuiltinPolyfill is a transform that replaces calls to builtins with polyfills and calls to
diff --git a/src/tint/lang/spirv/writer/raise/builtin_polyfill_test.cc b/src/tint/lang/spirv/writer/raise/builtin_polyfill_test.cc
index 278a22f..77dd296 100644
--- a/src/tint/lang/spirv/writer/raise/builtin_polyfill_test.cc
+++ b/src/tint/lang/spirv/writer/raise/builtin_polyfill_test.cc
@@ -4777,5 +4777,192 @@
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(SpirvWriter_BuiltinPolyfillTest, SubgroupShuffleClamped_NoPolyfill) {
+    auto* func = b.Function("foo", ty.void_());
+    auto* arg1 = b.FunctionParam("arg1", ty.i32());
+    auto* arg2 = b.FunctionParam("arg2", ty.i32());
+    func->SetParams({arg1, arg2});
+
+    b.Append(func->Block(), [&] {
+        b.Let("a", b.Call(ty.i32(), core::BuiltinFn::kSubgroupShuffle, arg1, arg2));
+        b.Return(func);
+    });
+    auto* src = R"(
+%foo = func(%arg1:i32, %arg2:i32):void {
+  $B1: {
+    %4:i32 = subgroupShuffle %arg1, %arg2
+    %a:i32 = let %4
+    ret
+  }
+}
+)";
+
+    auto* expect = R"(
+%foo = func(%arg1:i32, %arg2:i32):void {
+  $B1: {
+    %4:u32 = bitcast %arg2
+    %5:i32 = subgroupShuffle %arg1, %4
+    %a:i32 = let %5
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    PolyfillConfig config;
+    config.subgroup_shuffle_clamped = false;
+    Run(BuiltinPolyfill, config);
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(SpirvWriter_BuiltinPolyfillTest, SubgroupShuffleClamped_I32) {
+    auto* func = b.Function("foo", ty.void_());
+    auto* arg1 = b.FunctionParam("arg1", ty.i32());
+    auto* arg2 = b.FunctionParam("arg2", ty.i32());
+    func->SetParams({arg1, arg2});
+    b.Append(func->Block(), [&] {
+        b.Let("a", b.Call(ty.i32(), core::BuiltinFn::kSubgroupShuffle, arg1, arg2));
+        b.Return(func);
+    });
+    auto* src = R"(
+%foo = func(%arg1:i32, %arg2:i32):void {
+  $B1: {
+    %4:i32 = subgroupShuffle %arg1, %arg2
+    %a:i32 = let %4
+    ret
+  }
+}
+)";
+    auto* expect = R"(
+%foo = func(%arg1:i32, %arg2:i32):void {
+  $B1: {
+    %4:u32 = bitcast %arg2
+    %5:u32 = and %4, 127u
+    %6:i32 = subgroupShuffle %arg1, %5
+    %a:i32 = let %6
+    ret
+  }
+}
+)";
+
+    EXPECT_EQ(src, str());
+
+    PolyfillConfig config;
+    config.subgroup_shuffle_clamped = true;
+    Run(BuiltinPolyfill, config);
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(SpirvWriter_BuiltinPolyfillTest, SubgroupShuffleClamped_U32) {
+    auto* func = b.Function("foo", ty.void_());
+    auto* arg1 = b.FunctionParam("arg1", ty.u32());
+    auto* arg2 = b.FunctionParam("arg2", ty.u32());
+    func->SetParams({arg1, arg2});
+    b.Append(func->Block(), [&] {
+        b.Let("a", b.Call(ty.u32(), core::BuiltinFn::kSubgroupShuffle, arg1, arg2));
+        b.Return(func);
+    });
+    auto* src = R"(
+%foo = func(%arg1:u32, %arg2:u32):void {
+  $B1: {
+    %4:u32 = subgroupShuffle %arg1, %arg2
+    %a:u32 = let %4
+    ret
+  }
+}
+)";
+    auto* expect = R"(
+%foo = func(%arg1:u32, %arg2:u32):void {
+  $B1: {
+    %4:u32 = and %arg2, 127u
+    %5:u32 = subgroupShuffle %arg1, %4
+    %a:u32 = let %5
+    ret
+  }
+}
+)";
+
+    EXPECT_EQ(src, str());
+
+    PolyfillConfig config;
+    config.subgroup_shuffle_clamped = true;
+    Run(BuiltinPolyfill, config);
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(SpirvWriter_BuiltinPolyfillTest, SubgroupShuffleClamped_F32) {
+    auto* func = b.Function("foo", ty.void_());
+    auto* arg1 = b.FunctionParam("arg1", ty.f32());
+    auto* arg2 = b.FunctionParam("arg2", ty.u32());
+    func->SetParams({arg1, arg2});
+    b.Append(func->Block(), [&] {
+        b.Let("a", b.Call(ty.f32(), core::BuiltinFn::kSubgroupShuffle, arg1, arg2));
+        b.Return(func);
+    });
+    auto* src = R"(
+%foo = func(%arg1:f32, %arg2:u32):void {
+  $B1: {
+    %4:f32 = subgroupShuffle %arg1, %arg2
+    %a:f32 = let %4
+    ret
+  }
+}
+)";
+    auto* expect = R"(
+%foo = func(%arg1:f32, %arg2:u32):void {
+  $B1: {
+    %4:u32 = and %arg2, 127u
+    %5:f32 = subgroupShuffle %arg1, %4
+    %a:f32 = let %5
+    ret
+  }
+}
+)";
+
+    EXPECT_EQ(src, str());
+
+    PolyfillConfig config;
+    config.subgroup_shuffle_clamped = true;
+    Run(BuiltinPolyfill, config);
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(SpirvWriter_BuiltinPolyfillTest, SubgroupShuffleClamped_Vec2F32) {
+    auto* func = b.Function("foo", ty.void_());
+    auto* arg1 = b.FunctionParam("arg1", ty.vec2<f32>());
+    auto* arg2 = b.FunctionParam("arg2", ty.u32());
+    func->SetParams({arg1, arg2});
+    b.Append(func->Block(), [&] {
+        b.Let("a", b.Call(ty.vec2<f32>(), core::BuiltinFn::kSubgroupShuffle, arg1, arg2));
+        b.Return(func);
+    });
+    auto* src = R"(
+%foo = func(%arg1:vec2<f32>, %arg2:u32):void {
+  $B1: {
+    %4:vec2<f32> = subgroupShuffle %arg1, %arg2
+    %a:vec2<f32> = let %4
+    ret
+  }
+}
+)";
+    auto* expect = R"(
+%foo = func(%arg1:vec2<f32>, %arg2:u32):void {
+  $B1: {
+    %4:u32 = and %arg2, 127u
+    %5:vec2<f32> = subgroupShuffle %arg1, %4
+    %a:vec2<f32> = let %5
+    ret
+  }
+}
+)";
+
+    EXPECT_EQ(src, str());
+
+    PolyfillConfig config;
+    config.subgroup_shuffle_clamped = true;
+    Run(BuiltinPolyfill, config);
+    EXPECT_EQ(expect, str());
+}
+
 }  // namespace
 }  // namespace tint::spirv::writer::raise
diff --git a/src/tint/lang/spirv/writer/raise/raise.cc b/src/tint/lang/spirv/writer/raise/raise.cc
index 7600dcb..3fc32f1 100644
--- a/src/tint/lang/spirv/writer/raise/raise.cc
+++ b/src/tint/lang/spirv/writer/raise/raise.cc
@@ -177,7 +177,8 @@
     }
 
     raise::PolyfillConfig config = {.use_vulkan_memory_model = options.use_vulkan_memory_model,
-                                    .version = options.spirv_version};
+                                    .version = options.spirv_version,
+                                    .subgroup_shuffle_clamped = options.subgroup_shuffle_clamped};
     RUN_TRANSFORM(raise::BuiltinPolyfill, module, config);
     RUN_TRANSFORM(raise::ExpandImplicitSplats, module);