HLSL-IR: fix ShaderIO OOB indexing when mixing subgroup and workgroup inputs with others

GetInput was indexing OOB on input_indices when indexing these other
inputs. For example, for a compute shader with a subgroup parameter
followed  by a regular invocation id, GetInput would intercept the
subgroup parameter to emit the proper Wave call, but for the invocation
id, it would index input_indices at a member index that was out of
bounds. The fix is to make sure input_indices is properly populated,
even for members we won't be looking up in there.

Bug: 363199902
Bug: 357896924
Change-Id: I5e3d30980a10b8beeccf5461611ea96b5bcda7b6
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/213379
Auto-Submit: Antonio Maiorano <amaiorano@google.com>
Commit-Queue: dan sinclair <dsinclair@chromium.org>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
diff --git a/src/tint/lang/hlsl/writer/raise/shader_io.cc b/src/tint/lang/hlsl/writer/raise/shader_io.cc
index cf58e37..5bc9230 100644
--- a/src/tint/lang/hlsl/writer/raise/shader_io.cc
+++ b/src/tint/lang/hlsl/writer/raise/shader_io.cc
@@ -28,6 +28,7 @@
 #include "src/tint/lang/hlsl/writer/raise/shader_io.h"
 
 #include <algorithm>
+#include <limits>
 #include <memory>
 #include <utility>
 
@@ -111,6 +112,11 @@
                 return 13;
             case core::BuiltinValue::kClipDistances:
                 return 14;
+            case core::BuiltinValue::kSubgroupInvocationId:
+            case core::BuiltinValue::kSubgroupSize:
+                // These are sorted, but don't actually end up as members. Value doesn't really
+                // matter, so just make it larger than the rest.
+                return std::numeric_limits<uint32_t>::max();
             default:
                 break;
         }
@@ -194,17 +200,16 @@
 
         Vector<MemberInfo, 4> input_data;
         for (uint32_t i = 0; i < inputs.Length(); ++i) {
-            // If subgroup invocation id or size, save the index for GetInput
+            // Save the index of certain builtins for GetIndex. Although struct members will not be
+            // added for these inputs, we still add entries to input_data so that other inputs with
+            // struct members can index input_indices properly in GetIndex.
             if (auto builtin = inputs[i].attributes.builtin) {
                 if (*builtin == core::BuiltinValue::kSubgroupInvocationId) {
                     subgroup_invocation_id_index = i;
-                    continue;
                 } else if (*builtin == core::BuiltinValue::kSubgroupSize) {
                     subgroup_size_index = i;
-                    continue;
                 } else if (*builtin == core::BuiltinValue::kNumWorkgroups) {
                     num_workgroups_index = i;
-                    continue;
                 }
             }
 
@@ -221,6 +226,14 @@
 
         Vector<core::type::Manager::StructMemberDesc, 4> input_struct_members;
         for (auto& input : input_data) {
+            // Don't add members for certain builtins
+            if (input.idx == subgroup_invocation_id_index ||  //
+                input.idx == subgroup_size_index ||           //
+                input.idx == num_workgroups_index) {
+                // Invalid value, should not be indexed
+                input_indices[input.idx] = std::numeric_limits<uint32_t>::max();
+                continue;
+            }
             input_indices[input.idx] = static_cast<uint32_t>(input_struct_members.Length());
             input_struct_members.Push(input.member);
         }
diff --git a/src/tint/lang/hlsl/writer/raise/shader_io_test.cc b/src/tint/lang/hlsl/writer/raise/shader_io_test.cc
index 9bb2465..d4cb238 100644
--- a/src/tint/lang/hlsl/writer/raise/shader_io_test.cc
+++ b/src/tint/lang/hlsl/writer/raise/shader_io_test.cc
@@ -1206,6 +1206,198 @@
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_Subgroup_WithNonSubgroupParamsFirst) {
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* subgroup_invocation_id = b.FunctionParam("id", ty.u32());
+    subgroup_invocation_id->SetBuiltin(core::BuiltinValue::kSubgroupInvocationId);
+
+    auto* subgroup_size = b.FunctionParam("size", ty.u32());
+    subgroup_size->SetBuiltin(core::BuiltinValue::kSubgroupSize);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({invocation_id, subgroup_invocation_id, subgroup_size});
+
+    b.Append(ep->Block(), [&] {
+        auto* x = b.Let("x", b.Multiply(ty.u32(), subgroup_invocation_id, subgroup_size));
+        b.Let("y", b.Add(ty.u32(), x, b.Access(ty.u32(), invocation_id, 0_u)));
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%invoc_id:vec3<u32> [@local_invocation_id], %id:u32 [@subgroup_invocation_id], %size:u32 [@subgroup_size]):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+}
+
+%foo_inner = func(%invoc_id:vec3<u32>, %id:u32, %size:u32):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B2: {
+    %12:vec3<u32> = access %inputs, 0u
+    %13:u32 = hlsl.WaveGetLaneIndex
+    %14:u32 = hlsl.WaveGetLaneCount
+    %15:void = call %foo_inner, %12, %13, %14
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_Subgroup_WithNonSubgroupParamsLast) {
+    auto* subgroup_invocation_id = b.FunctionParam("id", ty.u32());
+    subgroup_invocation_id->SetBuiltin(core::BuiltinValue::kSubgroupInvocationId);
+
+    auto* subgroup_size = b.FunctionParam("size", ty.u32());
+    subgroup_size->SetBuiltin(core::BuiltinValue::kSubgroupSize);
+
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({subgroup_invocation_id, subgroup_size, invocation_id});
+
+    b.Append(ep->Block(), [&] {
+        auto* x = b.Let("x", b.Multiply(ty.u32(), subgroup_invocation_id, subgroup_size));
+        b.Let("y", b.Add(ty.u32(), x, b.Access(ty.u32(), invocation_id, 0_u)));
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%id:u32 [@subgroup_invocation_id], %size:u32 [@subgroup_size], %invoc_id:vec3<u32> [@local_invocation_id]):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+}
+
+%foo_inner = func(%id:u32, %size:u32, %invoc_id:vec3<u32>):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B2: {
+    %12:u32 = hlsl.WaveGetLaneIndex
+    %13:u32 = hlsl.WaveGetLaneCount
+    %14:vec3<u32> = access %inputs, 0u
+    %15:void = call %foo_inner, %12, %13, %14
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_Subgroup_WithNonSubgroupParamsMiddle) {
+    auto* subgroup_invocation_id = b.FunctionParam("id", ty.u32());
+    subgroup_invocation_id->SetBuiltin(core::BuiltinValue::kSubgroupInvocationId);
+
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* subgroup_size = b.FunctionParam("size", ty.u32());
+    subgroup_size->SetBuiltin(core::BuiltinValue::kSubgroupSize);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({subgroup_invocation_id, invocation_id, subgroup_size});
+
+    b.Append(ep->Block(), [&] {
+        auto* x = b.Let("x", b.Multiply(ty.u32(), subgroup_invocation_id, subgroup_size));
+        b.Let("y", b.Add(ty.u32(), x, b.Access(ty.u32(), invocation_id, 0_u)));
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%id:u32 [@subgroup_invocation_id], %invoc_id:vec3<u32> [@local_invocation_id], %size:u32 [@subgroup_size]):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+}
+
+%foo_inner = func(%id:u32, %invoc_id:vec3<u32>, %size:u32):void {
+  $B1: {
+    %5:u32 = mul %id, %size
+    %x:u32 = let %5
+    %7:u32 = access %invoc_id, 0u
+    %8:u32 = add %x, %7
+    %y:u32 = let %8
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B2: {
+    %12:u32 = hlsl.WaveGetLaneIndex
+    %13:vec3<u32> = access %inputs, 0u
+    %14:u32 = hlsl.WaveGetLaneCount
+    %15:void = call %foo_inner, %12, %13, %14
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
 TEST_F(HlslWriterTransformTest, ShaderIOParameters_NumWorkgroups_NonStruct) {
     auto* num_workgroups = b.FunctionParam("num_wgs", ty.vec3<u32>());
     num_workgroups->SetBuiltin(core::BuiltinValue::kNumWorkgroups);
@@ -1453,6 +1645,217 @@
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_NumWorkgroups_WithNonWorkgroupParamFirst) {
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* num_workgroups = b.FunctionParam("num_wgs", ty.vec3<u32>());
+    num_workgroups->SetBuiltin(core::BuiltinValue::kNumWorkgroups);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({invocation_id, num_workgroups});
+
+    b.Append(ep->Block(), [&] {
+        b.Multiply(ty.vec3<u32>(), b.Access(ty.u32(), invocation_id, 0_u), num_workgroups);
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%invoc_id:vec3<u32> [@local_invocation_id], %num_wgs:vec3<u32> [@num_workgroups]):void {
+  $B1: {
+    %4:u32 = access %invoc_id, 0u
+    %5:vec3<u32> = mul %4, %num_wgs
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+}
+
+$B1: {  # root
+  %tint_num_workgroups:ptr<uniform, vec3<u32>, read> = var @binding_point(0, 0)
+}
+
+%foo_inner = func(%invoc_id:vec3<u32>, %num_wgs:vec3<u32>):void {
+  $B2: {
+    %5:u32 = access %invoc_id, 0u
+    %6:vec3<u32> = mul %5, %num_wgs
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B3: {
+    %9:vec3<u32> = access %inputs, 0u
+    %10:vec3<u32> = load %tint_num_workgroups
+    %11:void = call %foo_inner, %9, %10
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_NumWorkgroups_WithNonWorkgroupParamLast) {
+    auto* num_workgroups = b.FunctionParam("num_wgs", ty.vec3<u32>());
+    num_workgroups->SetBuiltin(core::BuiltinValue::kNumWorkgroups);
+
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({num_workgroups, invocation_id});
+
+    b.Append(ep->Block(), [&] {
+        b.Multiply(ty.vec3<u32>(), b.Access(ty.u32(), invocation_id, 0_u), num_workgroups);
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%num_wgs:vec3<u32> [@num_workgroups], %invoc_id:vec3<u32> [@local_invocation_id]):void {
+  $B1: {
+    %4:u32 = access %invoc_id, 0u
+    %5:vec3<u32> = mul %4, %num_wgs
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+}
+
+$B1: {  # root
+  %tint_num_workgroups:ptr<uniform, vec3<u32>, read> = var @binding_point(0, 0)
+}
+
+%foo_inner = func(%num_wgs:vec3<u32>, %invoc_id:vec3<u32>):void {
+  $B2: {
+    %5:u32 = access %invoc_id, 0u
+    %6:vec3<u32> = mul %5, %num_wgs
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B3: {
+    %9:vec3<u32> = load %tint_num_workgroups
+    %10:vec3<u32> = access %inputs, 0u
+    %11:void = call %foo_inner, %9, %10
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(HlslWriterTransformTest, ShaderIOParameters_NumWorkgroupsAndSubgroups_Mixed) {
+    auto* invocation_id = b.FunctionParam("invoc_id", ty.vec3<u32>());
+    invocation_id->SetBuiltin(core::BuiltinValue::kLocalInvocationId);
+
+    auto* num_workgroups = b.FunctionParam("num_wgs", ty.vec3<u32>());
+    num_workgroups->SetBuiltin(core::BuiltinValue::kNumWorkgroups);
+
+    auto* invocation_index = b.FunctionParam("invoc_index", ty.u32());
+    invocation_index->SetBuiltin(core::BuiltinValue::kLocalInvocationIndex);
+
+    auto* subgroup_invocation_id = b.FunctionParam("sg_id", ty.u32());
+    subgroup_invocation_id->SetBuiltin(core::BuiltinValue::kSubgroupInvocationId);
+
+    auto* global_invocation_id = b.FunctionParam("glob_id", ty.vec3<u32>());
+    global_invocation_id->SetBuiltin(core::BuiltinValue::kGlobalInvocationId);
+
+    auto* subgroup_size = b.FunctionParam("sg_size", ty.u32());
+    subgroup_size->SetBuiltin(core::BuiltinValue::kSubgroupSize);
+
+    auto* workgroup_id = b.FunctionParam("wg_id", ty.vec3<u32>());
+    workgroup_id->SetBuiltin(core::BuiltinValue::kWorkgroupId);
+
+    auto* ep = b.ComputeFunction("foo");
+    ep->SetParams({invocation_id, num_workgroups, invocation_index, subgroup_invocation_id,
+                   global_invocation_id, subgroup_size, workgroup_id});
+
+    b.Append(ep->Block(), [&] {
+        b.Let("l_invoc_id", invocation_id);
+        b.Let("l_num_wgs", num_workgroups);
+        b.Let("l_invoc_index", invocation_index);
+        b.Let("l_sg_id", subgroup_invocation_id);
+        b.Let("l_glob_id", global_invocation_id);
+        b.Let("l_sg_size", subgroup_size);
+        b.Let("l_wg_id", workgroup_id);
+        b.Return(ep);
+    });
+
+    auto* src = R"(
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%invoc_id:vec3<u32> [@local_invocation_id], %num_wgs:vec3<u32> [@num_workgroups], %invoc_index:u32 [@local_invocation_index], %sg_id:u32 [@subgroup_invocation_id], %glob_id:vec3<u32> [@global_invocation_id], %sg_size:u32 [@subgroup_size], %wg_id:vec3<u32> [@workgroup_id]):void {
+  $B1: {
+    %l_invoc_id:vec3<u32> = let %invoc_id
+    %l_num_wgs:vec3<u32> = let %num_wgs
+    %l_invoc_index:u32 = let %invoc_index
+    %l_sg_id:u32 = let %sg_id
+    %l_glob_id:vec3<u32> = let %glob_id
+    %l_sg_size:u32 = let %sg_size
+    %l_wg_id:vec3<u32> = let %wg_id
+    ret
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+foo_inputs = struct @align(16) {
+  invoc_id:vec3<u32> @offset(0), @builtin(local_invocation_id)
+  invoc_index:u32 @offset(12), @builtin(local_invocation_index)
+  glob_id:vec3<u32> @offset(16), @builtin(global_invocation_id)
+  wg_id:vec3<u32> @offset(32), @builtin(workgroup_id)
+}
+
+$B1: {  # root
+  %tint_num_workgroups:ptr<uniform, vec3<u32>, read> = var @binding_point(0, 0)
+}
+
+%foo_inner = func(%invoc_id:vec3<u32>, %num_wgs:vec3<u32>, %invoc_index:u32, %sg_id:u32, %glob_id:vec3<u32>, %sg_size:u32, %wg_id:vec3<u32>):void {
+  $B2: {
+    %l_invoc_id:vec3<u32> = let %invoc_id
+    %l_num_wgs:vec3<u32> = let %num_wgs
+    %l_invoc_index:u32 = let %invoc_index
+    %l_sg_id:u32 = let %sg_id
+    %l_glob_id:vec3<u32> = let %glob_id
+    %l_sg_size:u32 = let %sg_size
+    %l_wg_id:vec3<u32> = let %wg_id
+    ret
+  }
+}
+%foo = @compute @workgroup_size(1u, 1u, 1u) func(%inputs:foo_inputs):void {
+  $B3: {
+    %19:vec3<u32> = access %inputs, 0u
+    %20:vec3<u32> = load %tint_num_workgroups
+    %21:u32 = access %inputs, 1u
+    %22:u32 = hlsl.WaveGetLaneIndex
+    %23:vec3<u32> = access %inputs, 2u
+    %24:u32 = hlsl.WaveGetLaneCount
+    %25:vec3<u32> = access %inputs, 3u
+    %26:void = call %foo_inner, %19, %20, %21, %22, %23, %24, %25
+    ret
+  }
+}
+)";
+
+    Run(ShaderIO, ShaderIOConfig{});
+
+    EXPECT_EQ(expect, str());
+}
+
 TEST_F(HlslWriterTransformTest, ShaderIOParameters_ClipDistances_1) {
     core::IOAttributes pos_attr;
     pos_attr.builtin = core::BuiltinValue::kPosition;