Add missing builtin validation

The validation for maxInterStageShaderVariables
requires checking primitive_index, subgroup_size, and
subgroup_invocation_id.

With this change chromium passes the latest CTS test:
https://github.com/gpuweb/cts/pull/4554

Bug: 474266022,474266023
Fixed: 474266022,474266023
Change-Id: I6a6a6964256b06687781dfb026f982fce83b8bac
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/284055
Commit-Queue: Gregg Tavares <gman@chromium.org>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/dawn/native/ShaderModule.cpp b/src/dawn/native/ShaderModule.cpp
index b1a2f8a..5c3176df 100644
--- a/src/dawn/native/ShaderModule.cpp
+++ b/src/dawn/native/ShaderModule.cpp
@@ -963,15 +963,25 @@
         // Other fragment metadata
         metadata->usesSampleMaskOutput = entryPoint.output_sample_mask_used;
         metadata->usesSampleIndex = entryPoint.sample_index_used;
-        if (entryPoint.front_facing_used) {
-            ++totalInterStageShaderVariables;
+
+        struct BoolName {
+            const bool& value;
+            const char* name;
+        };
+        BoolName boolNames[] = {
+            {entryPoint.front_facing_used, "front_facing"},
+            {entryPoint.input_sample_mask_used, "sample_mask"},
+            {entryPoint.sample_index_used, "sample_index_used"},
+            {entryPoint.primitive_index_used, "primitive_index_used"},
+            {entryPoint.subgroup_invocation_id_used, "subgroup_invocation_id"},
+            {entryPoint.subgroup_size_used, "subgroup_size"},
+        };
+        for (const auto& boolName : boolNames) {
+            if (boolName.value) {
+                ++totalInterStageShaderVariables;
+            }
         }
-        if (entryPoint.input_sample_mask_used) {
-            ++totalInterStageShaderVariables;
-        }
-        if (entryPoint.sample_index_used) {
-            ++totalInterStageShaderVariables;
-        }
+
         metadata->usesFragDepth = entryPoint.frag_depth_used;
         metadata->usesFragPosition = entryPoint.frag_position_used;
         metadata->usesFineDerivativeBuiltin = entryPoint.fine_derivative_builtin_used;
@@ -983,24 +993,13 @@
             std::ostringstream builtinInfo;
             if (metadata->totalInterStageShaderVariables > userDefinedInputVariables) {
                 builtinInfo << " + 1 (";
-                bool isFirst = true;
-                if (entryPoint.front_facing_used) {
-                    builtinInfo << "front_facing";
-                    isFirst = false;
-                }
-                if (entryPoint.input_sample_mask_used) {
-                    if (!isFirst) {
-                        builtinInfo << "|";
+
+                const char* separator = "";
+                for (const auto& boolName : boolNames) {
+                    if (boolName.value) {
+                        builtinInfo << separator << boolName.name;
+                        separator = "|";
                     }
-                    builtinInfo << "sample_mask";
-                    isFirst = false;
-                }
-                if (entryPoint.sample_index_used) {
-                    if (!isFirst) {
-                        builtinInfo << "|";
-                    }
-                    builtinInfo << "sample_index";
-                    isFirst = false;
                 }
             }
 
diff --git a/src/dawn/tests/end2end/MaxLimitTests.cpp b/src/dawn/tests/end2end/MaxLimitTests.cpp
index c7f249b..0febd12 100644
--- a/src/dawn/tests/end2end/MaxLimitTests.cpp
+++ b/src/dawn/tests/end2end/MaxLimitTests.cpp
@@ -891,6 +891,9 @@
         bool hasSampleMask;
         bool hasSampleIndex;
         bool hasFrontFacing;
+        bool hasPrimitiveIndex;
+        bool hasSubgroupInvocationId;
+        bool hasSubgroupSize;
         std::optional<uint32_t> clipDistancesSize;
     };
 
@@ -909,10 +912,20 @@
             requiredFeatures.push_back(wgpu::FeatureName::ClipDistances);
             mSupportsClipDistances = true;
         }
+        if (SupportsFeatures({wgpu::FeatureName::PrimitiveIndex})) {
+            requiredFeatures.push_back(wgpu::FeatureName::PrimitiveIndex);
+            mSupportsPrimitiveIndex = true;
+        }
+        if (SupportsFeatures({wgpu::FeatureName::Subgroups})) {
+            requiredFeatures.push_back(wgpu::FeatureName::Subgroups);
+            mSupportsSubgroups = true;
+        }
         return requiredFeatures;
     }
 
     bool mSupportsClipDistances = false;
+    bool mSupportsPrimitiveIndex = false;
+    bool mSupportsSubgroups = false;
 
   private:
     // Allocate the inter-stage shader variables that consume as many inter-stage shader variables
@@ -922,10 +935,9 @@
 
         uint32_t builtinVariableCount = 0;
         std::reference_wrapper<const bool> usages[] = {
-            spec.renderPointLists,
-            spec.hasSampleMask,
-            spec.hasSampleIndex,
-            spec.hasFrontFacing,
+            spec.renderPointLists, spec.hasSampleMask,     spec.hasSampleIndex,
+            spec.hasFrontFacing,   spec.hasPrimitiveIndex, spec.hasSubgroupInvocationId,
+            spec.hasSubgroupSize,
         };
         for (const auto& usage : usages) {
             if (usage) {
@@ -974,6 +986,16 @@
             stream << "enable clip_distances;\n";
         }
 
+        if (spec.hasPrimitiveIndex) {
+            DAWN_ASSERT(mSupportsPrimitiveIndex);
+            stream << "enable primitive_index;\n";
+        }
+
+        if (spec.hasSubgroupInvocationId || spec.hasSubgroupSize) {
+            DAWN_ASSERT(mSupportsSubgroups);
+            stream << "enable subgroups;\n";
+        }
+
         uint32_t interStageVariableCount = GetInterStageVariableCount(spec);
         stream << GetInterStageVariableDeclarations(interStageVariableCount, spec) << "\n"
                << GetVertexShaderForTest(interStageVariableCount) << "\n"
@@ -1012,29 +1034,36 @@
     std::string GetFragmentShaderForTest(uint32_t interStageVariableCount,
                                          const MaxInterStageLimitTestsSpec& spec) {
         std::stringstream stream;
+        struct BoolTypeName {
+            const bool& value;
+            const char* type;
+            const char* name;
+        };
+        BoolTypeName builtins[] = {
+            {spec.hasFrontFacing, "bool", "front_facing"},
+            {spec.hasSampleIndex, "u32", "sample_index"},
+            {spec.hasSampleMask, "u32", "sample_mask"},
+            {spec.hasPrimitiveIndex, "u32", "primitive_index"},
+            {spec.hasSubgroupInvocationId, "u32", "subgroup_invocation_id"},
+            {spec.hasSubgroupSize, "u32", "subgroup_size"},
+        };
 
         stream << "@fragment fn fs_main(input: FragmentInput";
-        if (spec.hasFrontFacing) {
-            stream << ", @builtin(front_facing) isFront : bool";
-        }
-        if (spec.hasSampleIndex) {
-            stream << ", @builtin(sample_index) sampleIndex : u32";
-        }
-        if (spec.hasSampleMask) {
-            stream << ", @builtin(sample_mask) sampleMask : u32";
+        for (const auto& builtin : builtins) {
+            if (builtin.value) {
+                stream << ",\n  @builtin(" << builtin.name << ") b_" << builtin.name << " : "
+                       << builtin.type;
+            }
         }
         // Ensure every inter-stage shader variable and built-in variable is used instead of being
-        // optimized out.
+        // optimized out..
         stream << ") -> @location(0) vec4f {\nreturn input.pos";
-        if (spec.hasFrontFacing) {
-            stream << " + vec4f(f32(isFront), 0, 0, 1)";
+        for (const auto& builtin : builtins) {
+            if (builtin.value) {
+                stream << "\n   + vec4f(f32(b_" << builtin.name << "), 0, 0, 1)";
+            }
         }
-        if (spec.hasSampleIndex) {
-            stream << " + vec4f(f32(sampleIndex), 0, 0, 1)";
-        }
-        if (spec.hasSampleMask) {
-            stream << " + vec4f(f32(sampleMask), 0, 0, 1)";
-        }
+
         for (uint32_t location = 0; location < interStageVariableCount; ++location) {
             stream << " + input.color" << location;
         }
@@ -1129,6 +1158,30 @@
     DoTest(spec);
 }
 
+TEST_P(MaxInterStageShaderVariablesLimitTests, PrimitiveIndex) {
+    DAWN_TEST_UNSUPPORTED_IF(!mSupportsPrimitiveIndex);
+
+    MaxInterStageLimitTestsSpec spec = {};
+    spec.hasPrimitiveIndex = true;
+    DoTest(spec);
+}
+
+TEST_P(MaxInterStageShaderVariablesLimitTests, SubgroupInvocationId) {
+    DAWN_TEST_UNSUPPORTED_IF(!mSupportsSubgroups);
+
+    MaxInterStageLimitTestsSpec spec = {};
+    spec.hasSubgroupInvocationId = true;
+    DoTest(spec);
+}
+
+TEST_P(MaxInterStageShaderVariablesLimitTests, SubgroupSize) {
+    DAWN_TEST_UNSUPPORTED_IF(!mSupportsSubgroups);
+
+    MaxInterStageLimitTestsSpec spec = {};
+    spec.hasSubgroupSize = true;
+    DoTest(spec);
+}
+
 // Tests that maxInterStageShaderVariables works for a render pipeline with
 // @builtin(clip_distances).
 TEST_P(MaxInterStageShaderVariablesLimitTests, ClipDistances) {
diff --git a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
index 4aac088..22331b2 100644
--- a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
+++ b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
@@ -25,6 +25,7 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <bit>
 #include <limits>
 #include <memory>
 #include <sstream>
@@ -535,180 +536,6 @@
     CheckTestPipeline(false, kMaxInterStageShaderVariables, wgpu::ShaderStage::Fragment);
 }
 
-// Validate the number of total inter-stage user-defined variables count and built-in variables
-// cannot exceed kMaxInterStageShaderVariables.
-TEST_F(ShaderModuleValidationTest, MaximumInterStageShaderVariables) {
-    auto CheckTestPipeline = [&](bool success,
-                                 uint32_t totalUserDefinedInterStageShaderVariablesCount,
-                                 wgpu::ShaderStage failingShaderStage,
-                                 const char* extraBuiltInDeclarations = "",
-                                 bool usePointListAsPrimitiveType = false) {
-        // Build the ShaderIO struct containing totalUserDefinedInterStageShaderVariablesCount
-        // variables.
-        std::ostringstream stream;
-        stream << "struct ShaderIO {\n" << extraBuiltInDeclarations << "\n";
-        uint32_t vec4InputLocations = totalUserDefinedInterStageShaderVariablesCount;
-
-        for (uint32_t location = 0; location < vec4InputLocations; ++location) {
-            stream << "@location(" << location << ") var" << location << ": vec4f,\n";
-        }
-
-        if (failingShaderStage == wgpu::ShaderStage::Vertex) {
-            stream << " @builtin(position) pos: vec4f,\n";
-        }
-        stream << "}\n";
-
-        std::string ioStruct = stream.str();
-
-        // Build the test pipeline. Note that it's not possible with just ASSERT_DEVICE_ERROR
-        // whether it is the vertex or fragment shader that fails. So instead we will look for the
-        // string "failingVertex" or "failingFragment" in the error message.
-        utils::ComboRenderPipelineDescriptor pDesc;
-        pDesc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
-        if (usePointListAsPrimitiveType) {
-            pDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
-        } else {
-            pDesc.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
-        }
-
-        const char* errorMatcher = nullptr;
-        switch (failingShaderStage) {
-            case wgpu::ShaderStage::Vertex: {
-                if (usePointListAsPrimitiveType) {
-                    errorMatcher = "PointList";
-                } else {
-                    errorMatcher = "failingVertex";
-                }
-
-                std::string shader = ioStruct + R"(
-                    @vertex fn failingVertex() -> ShaderIO {
-                        var shaderIO : ShaderIO;
-                        shaderIO.pos = vec4f(0.0, 0.0, 0.0, 1.0);
-                        return shaderIO;
-                     }
-                    @fragment fn main() -> @location(0) vec4f {
-                        return vec4f(0.0);
-                    })";
-                wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader);
-
-                pDesc.vertex.entryPoint = "failingVertex";
-                pDesc.vertex.module = shaderModule;
-                pDesc.cFragment.module = shaderModule;
-                break;
-            }
-
-            case wgpu::ShaderStage::Fragment: {
-                std::string shader = ioStruct + R"(
-                     @vertex fn main() -> @builtin(position) vec4f {
-                        return vec4f(0.0);
-                     }
-                     @fragment fn failingFragment(io : ShaderIO) -> @location(0) vec4f {
-                        return vec4f(0.0);
-                     })";
-                wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader);
-
-                errorMatcher = "failingFragment";
-                pDesc.cFragment.entryPoint = "failingFragment";
-                pDesc.cFragment.module = shaderModule;
-                pDesc.vertex.module = shaderModule;
-                break;
-            }
-
-            default:
-                DAWN_UNREACHABLE();
-        }
-
-        if (success) {
-            if (failingShaderStage == wgpu::ShaderStage::Vertex) {
-                // It is allowed that fragment inputs are a subset of the vertex output variables.
-                device.CreateRenderPipeline(&pDesc);
-            } else {
-                ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
-                                    testing::HasSubstr("The fragment input at location"));
-            }
-        } else {
-            ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
-                                testing::HasSubstr(errorMatcher));
-        }
-    };
-
-    // Verify when there is no input builtin variable in a fragment shader, the total user-defined
-    // input variables count must be less than kMaxInterStageShaderVariables.
-    {
-        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Fragment);
-        CheckTestPipeline(false, kMaxInterStageShaderVariables + 1, wgpu::ShaderStage::Fragment);
-    }
-
-    // Verify the total user-defined vertex output variables count must be less than
-    // kMaxInterStageShaderVariables.
-    {
-        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Vertex);
-        CheckTestPipeline(false, kMaxInterStageShaderVariables + 1, wgpu::ShaderStage::Vertex);
-    }
-
-    // Verify the total user-defined vertex output variables count must be less than or equal to
-    // (kMaxInterStageShaderVariables - 1) when the primitive topology is PointList.
-    {
-        constexpr bool kUsePointListAsPrimitiveTopology = true;
-        const char* kExtraBuiltins = "";
-
-        {
-            uint32_t variablesCount = kMaxInterStageShaderVariables - 1;
-            CheckTestPipeline(true, variablesCount, wgpu::ShaderStage::Vertex, kExtraBuiltins,
-                              kUsePointListAsPrimitiveTopology);
-        }
-        {
-            uint32_t variablesCount = kMaxInterStageShaderVariables;
-            CheckTestPipeline(false, variablesCount, wgpu::ShaderStage::Vertex, kExtraBuiltins,
-                              kUsePointListAsPrimitiveTopology);
-        }
-    }
-
-    // @builtin(position) in fragment shaders shouldn't be counted into the maximum inter-stage
-    // variables count.
-    {
-        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Fragment,
-                          "@builtin(position) fragCoord : vec4f,");
-    }
-
-    // @builtin(front_facing), @builtin(sample_index) and @builtin(sample_mask) should all be
-    // counted into the maximum inter-stage variables count. Then the maximum user-defined
-    // inter-stage shader variables can only be (kMaxInterStageShaderVariables - 1) because these
-    // user-defined inter-stage shader variables always consume 1 shader variable each.
-    {
-        constexpr uint8_t kMaskFrontFacing = 1;
-        constexpr uint8_t kMaskSampleIndex = 1 << 1;
-        constexpr uint8_t kMaskSampleMask = 1 << 2;
-        int useCount = 0;
-        for (uint8_t mask = 1; mask <= 7; ++mask) {
-            std::string builtInDeclarations = "";
-            if (mask & kMaskFrontFacing) {
-                builtInDeclarations += "@builtin(front_facing) frontFacing : bool,";
-                ++useCount;
-            }
-            if (mask & kMaskSampleIndex) {
-                builtInDeclarations += "@builtin(sample_index) sampleIndex : u32,";
-                ++useCount;
-            }
-            if (mask & kMaskSampleMask) {
-                builtInDeclarations += "@builtin(sample_mask) sampleMask : u32,";
-                ++useCount;
-            }
-
-            {
-                uint32_t variablesCount = kMaxInterStageShaderVariables - useCount;
-                CheckTestPipeline(true, variablesCount, wgpu::ShaderStage::Fragment,
-                                  builtInDeclarations.c_str());
-            }
-            {
-                uint32_t variablesCount = kMaxInterStageShaderVariables;
-                CheckTestPipeline(false, variablesCount, wgpu::ShaderStage::Fragment,
-                                  builtInDeclarations.c_str());
-            }
-        }
-    }
-}
-
 // Test that numeric ID must be unique
 TEST_F(ShaderModuleValidationTest, OverridableConstantsNumericIDConflicts) {
     ASSERT_DEVICE_ERROR(utils::CreateShaderModule(device, R"(
@@ -909,6 +736,216 @@
     }
 }
 
+// Validate the number of total inter-stage user-defined variables count and built-in variables
+// cannot exceed kMaxInterStageShaderVariables.
+class ShaderModuleMaxInterStageShaderVariablesValidationTest : public ValidationTest {
+  protected:
+    std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
+        wgpu::SupportedFeatures supportedFeatures;
+        adapter.GetFeatures(&supportedFeatures);
+        std::vector<wgpu::FeatureName> requiredFeatures(
+            supportedFeatures.features,
+            supportedFeatures.features + supportedFeatures.featureCount);
+        return requiredFeatures;
+    }
+};
+
+TEST_F(ShaderModuleMaxInterStageShaderVariablesValidationTest, Test) {
+    auto CheckTestPipeline =
+        [&](bool success, uint32_t totalUserDefinedInterStageShaderVariablesCount,
+            wgpu::ShaderStage failingShaderStage, const char* extraBuiltInDeclarations = "",
+            bool usePointListAsPrimitiveType = false) {
+            std::ostringstream stream;
+
+            // add enables
+            if (device.HasFeature(wgpu::FeatureName::PrimitiveIndex)) {
+                stream << "enable primitive_index;";
+            }
+
+            if (device.HasFeature(wgpu::FeatureName::Subgroups)) {
+                stream << "enable subgroups;";
+            }
+
+            // Build the ShaderIO struct containing totalUserDefinedInterStageShaderVariablesCount
+            // variables.
+            stream << "struct ShaderIO {\n" << extraBuiltInDeclarations << "\n";
+            uint32_t vec4InputLocations = totalUserDefinedInterStageShaderVariablesCount;
+
+            for (uint32_t location = 0; location < vec4InputLocations; ++location) {
+                stream << "@location(" << location << ") var" << location << ": vec4f,\n";
+            }
+
+            if (failingShaderStage == wgpu::ShaderStage::Vertex) {
+                stream << " @builtin(position) pos: vec4f,\n";
+            }
+            stream << "}\n";
+
+            std::string ioStruct = stream.str();
+
+            // Build the test pipeline. Note that it's not possible with just ASSERT_DEVICE_ERROR
+            // whether it is the vertex or fragment shader that fails. So instead we will look for
+            // the string "failingVertex" or "failingFragment" in the error message.
+            utils::ComboRenderPipelineDescriptor pDesc;
+            pDesc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+            if (usePointListAsPrimitiveType) {
+                pDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+            } else {
+                pDesc.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
+            }
+
+            const char* errorMatcher = nullptr;
+            switch (failingShaderStage) {
+                case wgpu::ShaderStage::Vertex: {
+                    if (usePointListAsPrimitiveType) {
+                        errorMatcher = "PointList";
+                    } else {
+                        errorMatcher = "failingVertex";
+                    }
+
+                    std::string shader = ioStruct + R"(
+                    @vertex fn failingVertex() -> ShaderIO {
+                        var shaderIO : ShaderIO;
+                        shaderIO.pos = vec4f(0.0, 0.0, 0.0, 1.0);
+                        return shaderIO;
+                     }
+                    @fragment fn main() -> @location(0) vec4f {
+                        return vec4f(0.0);
+                    })";
+                    wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader);
+
+                    pDesc.vertex.entryPoint = "failingVertex";
+                    pDesc.vertex.module = shaderModule;
+                    pDesc.cFragment.module = shaderModule;
+                    break;
+                }
+
+                case wgpu::ShaderStage::Fragment: {
+                    std::string shader = ioStruct + R"(
+                     @vertex fn main() -> @builtin(position) vec4f {
+                        return vec4f(0.0);
+                     }
+                     @fragment fn failingFragment(io : ShaderIO) -> @location(0) vec4f {
+                        return vec4f(0.0);
+                     })";
+                    wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader);
+
+                    errorMatcher = "failingFragment";
+                    pDesc.cFragment.entryPoint = "failingFragment";
+                    pDesc.cFragment.module = shaderModule;
+                    pDesc.vertex.module = shaderModule;
+                    break;
+                }
+
+                default:
+                    DAWN_UNREACHABLE();
+            }
+
+            if (success) {
+                if (failingShaderStage == wgpu::ShaderStage::Vertex) {
+                    // It is allowed that fragment inputs are a subset of the vertex output
+                    // variables.
+                    device.CreateRenderPipeline(&pDesc);
+                } else {
+                    ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
+                                        testing::HasSubstr("The fragment input at location"));
+                }
+            } else {
+                ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
+                                    testing::HasSubstr(errorMatcher));
+            }
+        };
+
+    // Verify when there is no input builtin variable in a fragment shader, the total user-defined
+    // input variables count must be less than kMaxInterStageShaderVariables.
+    {
+        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Fragment);
+        CheckTestPipeline(false, kMaxInterStageShaderVariables + 1, wgpu::ShaderStage::Fragment);
+    }
+
+    // Verify the total user-defined vertex output variables count must be less than
+    // kMaxInterStageShaderVariables.
+    {
+        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Vertex);
+        CheckTestPipeline(false, kMaxInterStageShaderVariables + 1, wgpu::ShaderStage::Vertex);
+    }
+
+    // Verify the total user-defined vertex output variables count must be less than or equal to
+    // (kMaxInterStageShaderVariables - 1) when the primitive topology is PointList.
+    {
+        constexpr bool kUsePointListAsPrimitiveTopology = true;
+        const char* kExtraBuiltins = "";
+
+        {
+            uint32_t variablesCount = kMaxInterStageShaderVariables - 1;
+            CheckTestPipeline(true, variablesCount, wgpu::ShaderStage::Vertex, kExtraBuiltins,
+                              kUsePointListAsPrimitiveTopology);
+        }
+        {
+            uint32_t variablesCount = kMaxInterStageShaderVariables;
+            CheckTestPipeline(false, variablesCount, wgpu::ShaderStage::Vertex, kExtraBuiltins,
+                              kUsePointListAsPrimitiveTopology);
+        }
+    }
+
+    // @builtin(position) in fragment shaders shouldn't be counted into the maximum inter-stage
+    // variables count.
+    {
+        CheckTestPipeline(true, kMaxInterStageShaderVariables, wgpu::ShaderStage::Fragment,
+                          "@builtin(position) fragCoord : vec4f,");
+    }
+
+    // @builtin(front_facing), @builtin(sample_index), @builtin(sample_mask),
+    // @builtin(primitive_index), @builtin(subgroup_invocation_id) and
+    // @builtin(subgroup_size) should all be counted into the maximum
+    // inter-stage variables count. Then the maximum user-defined inter-stage
+    // shader variables can only be (kMaxInterStageShaderVariables - 1) because
+    // these user-defined inter-stage shader variables always consume 1 shader
+    // variable each.
+    {
+        struct Builtin {
+            const char* name;
+            const char* type;
+            const char* extension;
+            std::optional<wgpu::FeatureName> requiredFeature;
+        };
+        Builtin builtins[] = {
+            {"front_facing", "bool", nullptr, {}},
+            {"sample_index", "u32", nullptr, {}},
+            {"sample_mask", "u32", nullptr, {}},
+            {"primitive_index", "u32", "primitive_index", wgpu::FeatureName::PrimitiveIndex},
+            {"subgroup_invocation_id", "u32", "subgroups", wgpu::FeatureName::Subgroups},
+            {"subgroup_size", "u32", "subgroups", wgpu::FeatureName::Subgroups},
+        };
+        for (uint8_t mask = 1; mask < 1 << std::size(builtins); ++mask) {
+            std::string builtInDeclarations = "";
+            bool canTest = true;
+            for (uint8_t b = 0; b < std::size(builtins); ++b) {
+                if (mask & (1 << b)) {
+                    const Builtin& builtin = builtins[b];
+                    builtInDeclarations += "@builtin(" + std::string(builtin.name) + ") b_" +
+                                           std::string(builtin.name) + ": " +
+                                           std::string(builtin.type) + ",";
+                    if (builtin.requiredFeature.has_value()) {
+                        if (!device.HasFeature(builtin.requiredFeature.value())) {
+                            canTest = false;
+                        }
+                    }
+                }
+            }
+            if (canTest) {
+                uint32_t variablesCount = kMaxInterStageShaderVariables - std::popcount(mask);
+                CheckTestPipeline(true, variablesCount, wgpu::ShaderStage::Fragment,
+                                  builtInDeclarations.c_str());
+            }
+            if (canTest) {
+                uint32_t variablesCount = kMaxInterStageShaderVariables - std::popcount(mask) + 1;
+                CheckTestPipeline(false, variablesCount, wgpu::ShaderStage::Fragment,
+                                  builtInDeclarations.c_str());
+            }
+        }
+    }
+}
+
 struct WGSLExtensionInfo {
     const char* wgslName;
     // Is this WGSL extension experimental, i.e. guarded by AllowUnsafeAPIs toggle
diff --git a/src/tint/lang/wgsl/inspector/entry_point.h b/src/tint/lang/wgsl/inspector/entry_point.h
index a17de12..dd8e8f73 100644
--- a/src/tint/lang/wgsl/inspector/entry_point.h
+++ b/src/tint/lang/wgsl/inspector/entry_point.h
@@ -208,6 +208,12 @@
     bool uses_subgroup_matrix = false;
     /// Does the entry point use dpdxFine, dpdyFine, or fwidthFine
     bool fine_derivative_builtin_used = false;
+    /// Does the entry point use primitive_index
+    bool primitive_index_used = false;
+    /// Does the entry point use subgroup_invocation_id
+    bool subgroup_invocation_id_used = false;
+    /// Does the entry point use subgroup_size
+    bool subgroup_size_used = false;
     /// The array length of the clip_distances builtin. Holding no value means the clip_distances
     /// is not used.
     std::optional<uint32_t> clip_distances_size;
diff --git a/src/tint/lang/wgsl/inspector/inspector.cc b/src/tint/lang/wgsl/inspector/inspector.cc
index 34d8473..321395f 100644
--- a/src/tint/lang/wgsl/inspector/inspector.cc
+++ b/src/tint/lang/wgsl/inspector/inspector.cc
@@ -360,6 +360,13 @@
             core::BuiltinValue::kVertexIndex, param->Type(), param->Declaration()->attributes);
         entry_point.instance_index_used |= ContainsBuiltin(
             core::BuiltinValue::kInstanceIndex, param->Type(), param->Declaration()->attributes);
+        entry_point.primitive_index_used |= ContainsBuiltin(
+            core::BuiltinValue::kPrimitiveIndex, param->Type(), param->Declaration()->attributes);
+        entry_point.subgroup_invocation_id_used |=
+            ContainsBuiltin(core::BuiltinValue::kSubgroupInvocationId, param->Type(),
+                            param->Declaration()->attributes);
+        entry_point.subgroup_size_used |= ContainsBuiltin(
+            core::BuiltinValue::kSubgroupSize, param->Type(), param->Declaration()->attributes);
 
         if (entry_point.stage == PipelineStage::kFragment) {
             entry_point.frag_position_used = ContainsBuiltin(
diff --git a/src/tint/lang/wgsl/inspector/inspector_test.cc b/src/tint/lang/wgsl/inspector/inspector_test.cc
index b604e60..a25ec80e 100644
--- a/src/tint/lang/wgsl/inspector/inspector_test.cc
+++ b/src/tint/lang/wgsl/inspector/inspector_test.cc
@@ -1133,6 +1133,9 @@
     EXPECT_FALSE(result[0].num_workgroups_used);
     EXPECT_FALSE(result[0].frag_depth_used);
     EXPECT_FALSE(result[0].fine_derivative_builtin_used);
+    EXPECT_FALSE(result[0].primitive_index_used);
+    EXPECT_FALSE(result[0].subgroup_invocation_id_used);
+    EXPECT_FALSE(result[0].subgroup_size_used);
 }
 
 TEST_F(InspectorGetEntryPointTest, InputSampleMaskSimpleReferenced) {
@@ -1229,6 +1232,99 @@
     EXPECT_TRUE(result[0].front_facing_used);
 }
 
+TEST_F(InspectorGetEntryPointTest, PrimitiveIndexSimpleReferenced) {
+    auto* src = R"(
+enable primitive_index;
+@fragment
+fn ep_func(@builtin(primitive_index) in_var: u32) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].primitive_index_used);
+}
+
+TEST_F(InspectorGetEntryPointTest, PrimitiveIndexStructReferenced) {
+    auto* src = R"(
+enable primitive_index;
+struct in_struct {
+  @builtin(primitive_index) inner_position: u32,
+}
+@fragment
+fn ep_func(in_var: in_struct) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].primitive_index_used);
+}
+
+TEST_F(InspectorGetEntryPointTest, SubgroupInvocationIdSimpleReferenced) {
+    auto* src = R"(
+enable subgroups;
+@fragment
+fn ep_func(@builtin(subgroup_invocation_id) in_var: u32) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].subgroup_invocation_id_used);
+}
+
+TEST_F(InspectorGetEntryPointTest, SubgroupInvocationIdStructReferenced) {
+    auto* src = R"(
+enable subgroups;
+struct in_struct {
+  @builtin(subgroup_invocation_id) inner_position: u32,
+}
+@fragment
+fn ep_func(in_var: in_struct) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].subgroup_invocation_id_used);
+}
+
+TEST_F(InspectorGetEntryPointTest, SubgroupSizeSimpleReferenced) {
+    auto* src = R"(
+enable subgroups;
+@fragment
+fn ep_func(@builtin(subgroup_size) in_var: u32) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].subgroup_size_used);
+}
+
+TEST_F(InspectorGetEntryPointTest, SubgroupSizeStructReferenced) {
+    auto* src = R"(
+enable subgroups;
+struct in_struct {
+  @builtin(subgroup_size) inner_position: u32,
+}
+@fragment
+fn ep_func(in_var: in_struct) {}
+)";
+    Inspector& inspector = Initialize(src);
+
+    auto result = inspector.GetEntryPoints();
+
+    ASSERT_EQ(1u, result.size());
+    EXPECT_TRUE(result[0].subgroup_size_used);
+}
+
 TEST_F(InspectorGetEntryPointTest, SampleIndexSimpleReferenced) {
     auto* src = R"(
 @fragment