Reland "D3D11: Manage builtin variables with ImmediateConstantTracker"

This is a reland of commit 6547ea2af9ffd3502f3e7e4873089f20a9b1d270

Original change's description:
> D3D11: Manage builtin variables with ImmediateConstantTracker
>
> This CL replaces write to uniform buffer mechanism with
> ImmediateConstantTracker to manage builtin variables in D3D11 backend.
>
> It is the base to switch to use ImmediateConstants to support internal
> immediate constants.
>
> Bug:366291600
>
> Change-Id: I2db6560cbcae29acd9fd088fcd8503d842433bf8
> Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/238474
> Reviewed-by: Quyen Le <lehoangquyen@chromium.org>
> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
> Commit-Queue: Shaobo Yan <shaoboyan@microsoft.com>

Bug: 366291600
Change-Id: Icbd8cf88772468c732763da873e39ef3237e77a5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/244534
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Quyen Le <lehoangquyen@chromium.org>
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
diff --git a/src/dawn/common/ityp_bitset.h b/src/dawn/common/ityp_bitset.h
index 74bdcfb..7938bc02 100644
--- a/src/dawn/common/ityp_bitset.h
+++ b/src/dawn/common/ityp_bitset.h
@@ -33,6 +33,7 @@
 #include <limits>
 
 #include "dawn/common/Assert.h"
+#include "dawn/common/BitSetRangeIterator.h"
 #include "dawn/common/Math.h"
 #include "dawn/common/Platform.h"
 #include "dawn/common/TypedInteger.h"
@@ -242,6 +243,10 @@
         return bitset(static_cast<const Base&>(lhs) ^ static_cast<const Base&>(rhs));
     }
 
+    friend BitSetRangeIterator<N, Index> IterateRanges(const bitset& bitset) {
+        return BitSetRangeIterator<N, Index>(static_cast<const Base&>(bitset));
+    }
+
     friend struct std::hash<bitset>;
 };
 
diff --git a/src/dawn/native/BUILD.gn b/src/dawn/native/BUILD.gn
index 1d6a81f..81329a1 100644
--- a/src/dawn/native/BUILD.gn
+++ b/src/dawn/native/BUILD.gn
@@ -301,6 +301,7 @@
     "Format.cpp",
     "Format.h",
     "Forward.h",
+    "ImmediateConstantsLayout.cpp",
     "ImmediateConstantsLayout.h",
     "ImmediateConstantsTracker.cpp",
     "ImmediateConstantsTracker.h",
diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt
index d86d06b..7ca2a2e 100644
--- a/src/dawn/native/CMakeLists.txt
+++ b/src/dawn/native/CMakeLists.txt
@@ -206,6 +206,7 @@
     "ExternalTexture.cpp"
     "Features.cpp"
     "Format.cpp"
+    "ImmediateConstantsLayout.cpp"
     "ImmediateConstantsTracker.cpp"
     "IndirectDrawMetadata.cpp"
     "IndirectDrawValidationEncoder.cpp"
diff --git a/src/dawn/native/ImmediateConstantsLayout.cpp b/src/dawn/native/ImmediateConstantsLayout.cpp
new file mode 100644
index 0000000..01eaa2a
--- /dev/null
+++ b/src/dawn/native/ImmediateConstantsLayout.cpp
@@ -0,0 +1,35 @@
+// Copyright 2025 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "dawn/native/ImmediateConstantsLayout.h"
+
+namespace dawn::native {
+uint32_t GetImmediateIndexInPipeline(const uint32_t layoutOffset,
+                                     const ImmediateConstantMask& pipelineImmediateMask) {
+    return (((1u << layoutOffset) - 1u) & pipelineImmediateMask).count();
+}
+}  // namespace dawn::native
diff --git a/src/dawn/native/ImmediateConstantsLayout.h b/src/dawn/native/ImmediateConstantsLayout.h
index 7116001..51a5dae 100644
--- a/src/dawn/native/ImmediateConstantsLayout.h
+++ b/src/dawn/native/ImmediateConstantsLayout.h
@@ -115,6 +115,20 @@
            GetImmediateConstantBlockBits(offset, size).to_ulong();
 }
 
+template <typename Object, typename Member>
+std::optional<uint32_t> GetImmediateByteOffsetInPipelineIfAny(
+    Member Object::* ptr,
+    const ImmediateConstantMask& pipelineImmediateMask) {
+    if (!HasImmediateConstants(ptr, pipelineImmediateMask)) {
+        return std::nullopt;
+    }
+
+    return GetImmediateByteOffsetInPipeline(ptr, pipelineImmediateMask);
+}
+
+uint32_t GetImmediateIndexInPipeline(const uint32_t layoutOffset,
+                                     const ImmediateConstantMask& pipelineImmediateMask);
+
 }  // namespace dawn::native
 
 #endif  // SRC_DAWN_NATIVE_IMMEDIATECONSTANTSLAYOUT_H_
diff --git a/src/dawn/native/d3d11/CommandBufferD3D11.cpp b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
index e3a13a3..97e7388 100644
--- a/src/dawn/native/d3d11/CommandBufferD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
@@ -33,6 +33,7 @@
 #include <utility>
 #include <vector>
 
+#include "dawn/common/BitSetRangeIterator.h"
 #include "dawn/common/WindowsUtils.h"
 #include "dawn/native/ApplyClearColorValueWithDrawHelper.h"
 #include "dawn/native/ChainUtils.h"
@@ -40,6 +41,7 @@
 #include "dawn/native/CommandValidation.h"
 #include "dawn/native/Commands.h"
 #include "dawn/native/ExternalTexture.h"
+#include "dawn/native/ImmediateConstantsTracker.h"
 #include "dawn/native/RenderBundle.h"
 #include "dawn/native/d3d/D3DError.h"
 #include "dawn/native/d3d11/BindGroupTrackerD3D11.h"
@@ -216,6 +218,46 @@
     return pixelLocalStorageUAVs;
 }
 
+template <typename T>
+class ImmediateConstantTracker : public T {
+  public:
+    ImmediateConstantTracker() = default;
+
+    MaybeError Apply(const ScopedSwapStateCommandRecordingContext* commandContext) {
+        auto* lastPipeline = this->mLastPipeline;
+        if (!lastPipeline) {
+            return {};
+        }
+
+        ImmediateConstantMask pipelineMask = lastPipeline->GetImmediateMask();
+        ImmediateConstantMask uploadBits = this->mDirty & pipelineMask;
+        uint32_t immediateRangeStartOffset = 0;
+        uint32_t immediateContentStartOffset = 0;
+        for (auto&& [offset, size] : IterateRanges(uploadBits)) {
+            immediateContentStartOffset =
+                static_cast<uint32_t>(offset) * kImmediateConstantElementByteSize;
+            immediateRangeStartOffset =
+                GetImmediateIndexInPipeline(static_cast<uint32_t>(offset), pipelineMask);
+            commandContext->WriteUniformBufferRange(
+                immediateRangeStartOffset,
+                this->mContent.template Get<uint32_t>(immediateContentStartOffset),
+                size * kImmediateConstantElementByteSize);
+        }
+
+        // Reset all dirty bits after uploading.
+        this->mDirty.reset();
+
+        return commandContext->FlushUniformBuffer();
+    }
+
+    uint32_t GetFirstIndexContentStartOffset() {
+        uint32_t startIndex =
+            offsetof(RenderImmediateConstants, firstVertex) / kImmediateConstantElementByteSize;
+        ImmediateConstantMask prefixBits = ImmediateConstantMask((1u << startIndex) - 1u);
+        return (prefixBits & this->mDirty).count() * kImmediateConstantElementByteSize;
+    }
+};
+
 }  // namespace
 
 // Create CommandBuffer
@@ -496,6 +538,8 @@
     ComputePipeline* lastPipeline = nullptr;
     ComputePassBindGroupTracker bindGroupTracker(commandContext);
 
+    ImmediateConstantTracker<ComputeImmediateConstantsTrackerBase> immediates = {};
+
     Command type;
     while (mCommands.NextCommandId(&type)) {
         switch (type) {
@@ -508,8 +552,8 @@
                 DispatchCmd* dispatch = mCommands.NextCommand<DispatchCmd>();
 
                 DAWN_TRY(bindGroupTracker.Apply());
-
-                DAWN_TRY(RecordNumWorkgroupsForDispatch(lastPipeline, commandContext, dispatch));
+                immediates.SetNumWorkgroups(dispatch->x, dispatch->y, dispatch->z);
+                DAWN_TRY(immediates.Apply(commandContext));
                 commandContext->GetD3D11DeviceContext3()->Dispatch(dispatch->x, dispatch->y,
                                                                    dispatch->z);
 
@@ -520,8 +564,8 @@
                 DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
 
                 DAWN_TRY(bindGroupTracker.Apply());
-
                 auto* indirectBuffer = ToGPUUsableBuffer(dispatch->indirectBuffer.Get());
+                DAWN_TRY(immediates.Apply(commandContext));
 
                 if (lastPipeline->UsesNumWorkgroups()) {
                     // Copy indirect args into the uniform buffer for built-in workgroup variables.
@@ -543,6 +587,7 @@
                 lastPipeline = ToBackend(cmd->pipeline).Get();
                 lastPipeline->ApplyNow(commandContext);
                 bindGroupTracker.OnSetPipeline(lastPipeline);
+                immediates.OnSetPipeline(lastPipeline);
                 break;
             }
 
@@ -676,6 +721,8 @@
     std::array<float, 4> blendColor = {0.0f, 0.0f, 0.0f, 0.0f};
     uint32_t stencilReference = 0;
 
+    ImmediateConstantTracker<RenderImmediateConstantsTrackerBase> immediates = {};
+
     auto DoRenderBundleCommand = [&](CommandIterator* iter, Command type) -> MaybeError {
         switch (type) {
             case Command::Draw: {
@@ -683,8 +730,8 @@
 
                 DAWN_TRY(bindGroupTracker.Apply());
                 vertexBufferTracker.Apply(lastPipeline);
-                DAWN_TRY(RecordFirstIndexOffset(lastPipeline, commandContext, draw->firstVertex,
-                                                draw->firstInstance));
+                immediates.SetFirstIndexOffset(draw->firstVertex, draw->firstInstance);
+                DAWN_TRY(immediates.Apply(commandContext));
                 commandContext->GetD3D11DeviceContext3()->DrawInstanced(
                     draw->vertexCount, draw->instanceCount, draw->firstVertex, draw->firstInstance);
 
@@ -696,8 +743,8 @@
 
                 DAWN_TRY(bindGroupTracker.Apply());
                 vertexBufferTracker.Apply(lastPipeline);
-                DAWN_TRY(RecordFirstIndexOffset(lastPipeline, commandContext, draw->baseVertex,
-                                                draw->firstInstance));
+                immediates.SetFirstIndexOffset(draw->baseVertex, draw->firstInstance);
+                DAWN_TRY(immediates.Apply(commandContext));
                 commandContext->GetD3D11DeviceContext3()->DrawIndexedInstanced(
                     draw->indexCount, draw->instanceCount, draw->firstIndex, draw->baseVertex,
                     draw->firstInstance);
@@ -713,6 +760,7 @@
 
                 DAWN_TRY(bindGroupTracker.Apply());
                 vertexBufferTracker.Apply(lastPipeline);
+                DAWN_TRY(immediates.Apply(commandContext));
 
                 if (lastPipeline->UsesVertexIndex() || lastPipeline->UsesInstanceIndex()) {
                     // Copy StartVertexLocation and StartInstanceLocation into the uniform buffer
@@ -722,7 +770,8 @@
                         offsetof(D3D11_DRAW_INSTANCED_INDIRECT_ARGS, StartVertexLocation);
                     DAWN_TRY(Buffer::Copy(commandContext, indirectBuffer, offset,
                                           sizeof(uint32_t) * 2,
-                                          commandContext->GetInternalUniformBuffer(), 0));
+                                          commandContext->GetInternalUniformBuffer(),
+                                          immediates.GetFirstIndexContentStartOffset()));
                 }
 
                 ID3D11Buffer* d3dBuffer;
@@ -742,6 +791,7 @@
 
                 DAWN_TRY(bindGroupTracker.Apply());
                 vertexBufferTracker.Apply(lastPipeline);
+                DAWN_TRY(immediates.Apply(commandContext));
 
                 if (lastPipeline->UsesVertexIndex() || lastPipeline->UsesInstanceIndex()) {
                     // Copy StartVertexLocation and StartInstanceLocation into the uniform buffer
@@ -751,7 +801,8 @@
                         offsetof(D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS, BaseVertexLocation);
                     DAWN_TRY(Buffer::Copy(commandContext, indirectBuffer, offset,
                                           sizeof(uint32_t) * 2,
-                                          commandContext->GetInternalUniformBuffer(), 0));
+                                          commandContext->GetInternalUniformBuffer(),
+                                          immediates.GetFirstIndexContentStartOffset()));
                 }
 
                 ID3D11Buffer* d3dBuffer;
@@ -769,6 +820,7 @@
                 lastPipeline = ToBackend(cmd->pipeline.Get());
                 lastPipeline->ApplyNow(commandContext, blendColor, stencilReference);
                 bindGroupTracker.OnSetPipeline(lastPipeline);
+                immediates.OnSetPipeline(lastPipeline);
 
                 break;
             }
@@ -978,38 +1030,4 @@
     }
 }
 
-MaybeError CommandBuffer::RecordFirstIndexOffset(
-    RenderPipeline* renderPipeline,
-    const ScopedSwapStateCommandRecordingContext* commandContext,
-    uint32_t firstVertex,
-    uint32_t firstInstance) {
-    constexpr uint32_t kFirstVertexOffset = 0;
-    constexpr uint32_t kFirstInstanceOffset = 1;
-
-    if (renderPipeline->UsesVertexIndex()) {
-        commandContext->WriteUniformBuffer(kFirstVertexOffset, firstVertex);
-    }
-    if (renderPipeline->UsesInstanceIndex()) {
-        commandContext->WriteUniformBuffer(kFirstInstanceOffset, firstInstance);
-    }
-
-    return commandContext->FlushUniformBuffer();
-}
-
-MaybeError CommandBuffer::RecordNumWorkgroupsForDispatch(
-    ComputePipeline* computePipeline,
-    const ScopedSwapStateCommandRecordingContext* commandContext,
-    DispatchCmd* dispatchCmd) {
-    if (!computePipeline->UsesNumWorkgroups()) {
-        // Workgroup size is not used in shader, so we don't need to update the uniform buffer. The
-        // original value in the uniform buffer will not be used, so we don't need to clear it.
-        return {};
-    }
-
-    commandContext->WriteUniformBuffer(/*offset=*/0, dispatchCmd->x);
-    commandContext->WriteUniformBuffer(/*offset=*/1, dispatchCmd->y);
-    commandContext->WriteUniformBuffer(/*offset=*/2, dispatchCmd->z);
-    return commandContext->FlushUniformBuffer();
-}
-
 }  // namespace dawn::native::d3d11
diff --git a/src/dawn/native/d3d11/CommandBufferD3D11.h b/src/dawn/native/d3d11/CommandBufferD3D11.h
index beb201a..7975bf2 100644
--- a/src/dawn/native/d3d11/CommandBufferD3D11.h
+++ b/src/dawn/native/d3d11/CommandBufferD3D11.h
@@ -57,15 +57,6 @@
     void HandleDebugCommands(const ScopedSwapStateCommandRecordingContext* commandContext,
                              CommandIterator* iter,
                              Command command);
-
-    MaybeError RecordFirstIndexOffset(RenderPipeline* renderPipeline,
-                                      const ScopedSwapStateCommandRecordingContext* commandContext,
-                                      uint32_t firstVertex,
-                                      uint32_t firstInstance);
-    MaybeError RecordNumWorkgroupsForDispatch(
-        ComputePipeline* computePipeline,
-        const ScopedSwapStateCommandRecordingContext* commandContext,
-        DispatchCmd* dispatchCmd);
 };
 
 }  // namespace dawn::native::d3d11
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
index aa7125e..26019d0 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
@@ -134,12 +134,13 @@
     return Get()->mD3D11DeviceContext3->Flush1(ContextType, hEvent);
 }
 
-void ScopedCommandRecordingContext::WriteUniformBuffer(uint32_t offset, uint32_t element) const {
-    DAWN_ASSERT(offset < CommandRecordingContext::kMaxNumBuiltinElements);
-    if (Get()->mUniformBufferData[offset] != element) {
-        Get()->mUniformBufferData[offset] = element;
-        Get()->mUniformBufferDirty = true;
-    }
+void ScopedCommandRecordingContext::WriteUniformBufferRange(uint32_t offset,
+                                                            const void* data,
+                                                            size_t size) const {
+    DAWN_ASSERT(offset < kMaxImmediateConstantsPerPipeline);
+    DAWN_ASSERT(size <= sizeof(uint32_t) * (kMaxImmediateConstantsPerPipeline - offset));
+    std::memcpy(&Get()->mUniformBufferData[offset], data, size);
+    Get()->mUniformBufferDirty = true;
 }
 
 MaybeError ScopedCommandRecordingContext::FlushUniformBuffer() const {
@@ -308,12 +309,12 @@
 // static
 ResultOrError<Ref<BufferBase>> CommandRecordingContext::CreateInternalUniformBuffer(
     DeviceBase* device) {
-    // Create a uniform buffer for built in variables.
+    // Create a uniform buffer for user and internal ImmediateConstants.
     BufferDescriptor descriptor;
-    descriptor.size = sizeof(uint32_t) * kMaxNumBuiltinElements;
+    descriptor.size = sizeof(uint32_t) * kMaxImmediateConstantsPerPipeline;
     descriptor.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
     descriptor.mappedAtCreation = false;
-    descriptor.label = "BuiltinUniform";
+    descriptor.label = "ImmediateConstantsInternalBuffer";
 
     Ref<BufferBase> uniformBuffer;
     // Lock the device to protect the clearing of the built-in uniform buffer.
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
index 3a8c5ff..97b5e47 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
@@ -30,6 +30,7 @@
 
 #include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
+#include "dawn/common/Constants.h"
 #include "dawn/common/MutexProtected.h"
 #include "dawn/common/NonCopyable.h"
 #include "dawn/common/Ref.h"
@@ -107,11 +108,9 @@
     ComPtr<ID3D11Multithread> mD3D11Multithread;
     ComPtr<ID3DUserDefinedAnnotation> mD3DUserDefinedAnnotation;
 
-    // The maximum number of builtin elements is 4 (vec4). It must be multiple of 4.
-    static constexpr size_t kMaxNumBuiltinElements = 4;
     // The uniform buffer for built-in variables.
     Ref<GPUUsableBuffer> mUniformBuffer;
-    std::array<uint32_t, kMaxNumBuiltinElements> mUniformBufferData;
+    std::array<uint32_t, kMaxImmediateConstantsPerPipeline> mUniformBufferData{};
     bool mUniformBufferDirty = true;
 
     absl::flat_hash_set<Ref<d3d::KeyedMutex>> mAcquiredKeyedMutexes;
@@ -168,8 +167,8 @@
     void Flush() const;
     void Flush1(D3D11_CONTEXT_TYPE ContextType, HANDLE hEvent) const;
 
-    // Write the built-in variable value to the uniform buffer.
-    void WriteUniformBuffer(uint32_t offset, uint32_t element) const;
+    // Write immediate data to the uniform buffer.
+    void WriteUniformBufferRange(uint32_t offset, const void* data, size_t size) const;
     MaybeError FlushUniformBuffer() const;
 
     MaybeError AcquireKeyedMutex(Ref<d3d::KeyedMutex> keyedMutex) const;
diff --git a/src/dawn/native/d3d11/ComputePipelineD3D11.cpp b/src/dawn/native/d3d11/ComputePipelineD3D11.cpp
index f34a3e4..038c754 100644
--- a/src/dawn/native/d3d11/ComputePipelineD3D11.cpp
+++ b/src/dawn/native/d3d11/ComputePipelineD3D11.cpp
@@ -31,6 +31,7 @@
 #include <utility>
 
 #include "dawn/native/CreatePipelineAsyncEvent.h"
+#include "dawn/native/ImmediateConstantsLayout.h"
 #include "dawn/native/d3d/D3DError.h"
 #include "dawn/native/d3d11/DeviceD3D11.h"
 #include "dawn/native/d3d11/ShaderModuleD3D11.h"
@@ -51,6 +52,11 @@
     Device* device = ToBackend(GetDevice());
     uint32_t compileFlags = 0;
 
+    if (UsesNumWorkgroups()) {
+        mImmediateMask |= GetImmediateConstantBlockBits(
+            offsetof(ComputeImmediateConstants, numWorkgroups), sizeof(NumWorkgroupsDimensions));
+    }
+
     if (!device->IsToggleEnabled(Toggle::UseDXC) &&
         !device->IsToggleEnabled(Toggle::FxcOptimizations)) {
         compileFlags |= D3DCOMPILE_OPTIMIZATION_LEVEL0;
@@ -70,9 +76,10 @@
     }
 
     d3d::CompiledShader compiledShader;
-    DAWN_TRY_ASSIGN(compiledShader, ToBackend(programmableStage.module)
-                                        ->Compile(programmableStage, SingleShaderStage::Compute,
-                                                  ToBackend(GetLayout()), compileFlags));
+    DAWN_TRY_ASSIGN(compiledShader,
+                    ToBackend(programmableStage.module)
+                        ->Compile(programmableStage, SingleShaderStage::Compute,
+                                  ToBackend(GetLayout()), compileFlags, GetImmediateMask()));
     DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreateComputeShader(
                               compiledShader.shaderBlob.Data(), compiledShader.shaderBlob.Size(),
                               nullptr, &mComputeShader),
diff --git a/src/dawn/native/d3d11/RenderPipelineD3D11.cpp b/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
index afc1a43..fca67bd 100644
--- a/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
+++ b/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
@@ -36,6 +36,7 @@
 
 #include "dawn/common/Range.h"
 #include "dawn/native/CreatePipelineAsyncEvent.h"
+#include "dawn/native/ImmediateConstantsLayout.h"
 #include "dawn/native/d3d/D3DError.h"
 #include "dawn/native/d3d/ShaderUtils.h"
 #include "dawn/native/d3d11/DeviceD3D11.h"
@@ -234,6 +235,16 @@
       mD3DPrimitiveTopology(D3DPrimitiveTopology(GetPrimitiveTopology())) {}
 
 MaybeError RenderPipeline::InitializeImpl() {
+    // Set firstVertex and firstInstance bits together to ensure non immediate case has correct
+    // offset.
+    // TODO(crbug.com/366291600): Setting these bits respectively after immediate covers all cases.
+    if (UsesVertexIndex() || UsesInstanceIndex()) {
+        mImmediateMask |= GetImmediateConstantBlockBits(
+            offsetof(RenderImmediateConstants, firstVertex), kImmediateConstantElementByteSize);
+        mImmediateMask |= GetImmediateConstantBlockBits(
+            offsetof(RenderImmediateConstants, firstInstance), kImmediateConstantElementByteSize);
+    }
+
     DAWN_TRY(InitializeRasterizerState());
     DAWN_TRY(InitializeBlendState());
     DAWN_TRY(InitializeShaders());
@@ -462,18 +473,16 @@
             additionalCompileFlags |= D3DCOMPILE_IEEE_STRICTNESS;
         }
 
-        DAWN_TRY_ASSIGN(
-            compiledShader[SingleShaderStage::Vertex],
-            ToBackend(programmableStage.module)
-                ->Compile(programmableStage, SingleShaderStage::Vertex, ToBackend(GetLayout()),
-                          compileFlags | additionalCompileFlags, usedInterstageVariables));
+        DAWN_TRY_ASSIGN(compiledShader[SingleShaderStage::Vertex],
+                        ToBackend(programmableStage.module)
+                            ->Compile(programmableStage, SingleShaderStage::Vertex,
+                                      ToBackend(GetLayout()), compileFlags | additionalCompileFlags,
+                                      GetImmediateMask(), usedInterstageVariables));
         const Blob& shaderBlob = compiledShader[SingleShaderStage::Vertex].shaderBlob;
         DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreateVertexShader(
                                   shaderBlob.Data(), shaderBlob.Size(), nullptr, &mVertexShader),
                               "D3D11 create vertex shader"));
         DAWN_TRY(InitializeInputLayout(shaderBlob));
-        mUsesVertexIndex = compiledShader[SingleShaderStage::Vertex].usesVertexIndex;
-        mUsesInstanceIndex = compiledShader[SingleShaderStage::Vertex].usesInstanceIndex;
     }
 
     std::optional<tint::hlsl::writer::PixelLocalOptions> pixelLocalOptions;
@@ -530,11 +539,12 @@
             additionalCompileFlags |= D3DCOMPILE_IEEE_STRICTNESS;
         }
 
-        DAWN_TRY_ASSIGN(compiledShader[SingleShaderStage::Fragment],
-                        ToBackend(programmableStage.module)
-                            ->Compile(programmableStage, SingleShaderStage::Fragment,
-                                      ToBackend(GetLayout()), compileFlags | additionalCompileFlags,
-                                      usedInterstageVariables, pixelLocalOptions));
+        DAWN_TRY_ASSIGN(
+            compiledShader[SingleShaderStage::Fragment],
+            ToBackend(programmableStage.module)
+                ->Compile(programmableStage, SingleShaderStage::Fragment, ToBackend(GetLayout()),
+                          compileFlags | additionalCompileFlags, GetImmediateMask(),
+                          usedInterstageVariables, pixelLocalOptions));
         DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreatePixelShader(
                                   compiledShader[SingleShaderStage::Fragment].shaderBlob.Data(),
                                   compiledShader[SingleShaderStage::Fragment].shaderBlob.Size(),
diff --git a/src/dawn/native/d3d11/RenderPipelineD3D11.h b/src/dawn/native/d3d11/RenderPipelineD3D11.h
index 0ff9900..b2f5e32 100644
--- a/src/dawn/native/d3d11/RenderPipelineD3D11.h
+++ b/src/dawn/native/d3d11/RenderPipelineD3D11.h
@@ -54,9 +54,6 @@
     void ApplyDepthStencilState(const ScopedSwapStateCommandRecordingContext* commandContext,
                                 uint32_t stencilReference);
 
-    bool UsesVertexIndex() const { return mUsesVertexIndex; }
-    bool UsesInstanceIndex() const { return mUsesInstanceIndex; }
-
   private:
     RenderPipeline(Device* device, const UnpackedPtr<RenderPipelineDescriptor>& descriptor);
     ~RenderPipeline() override;
@@ -77,8 +74,6 @@
     ComPtr<ID3D11PixelShader> mPixelShader;
     ComPtr<ID3D11BlendState> mBlendState;
     ComPtr<ID3D11DepthStencilState> mDepthStencilState;
-    bool mUsesVertexIndex = false;
-    bool mUsesInstanceIndex = false;
 };
 
 }  // namespace dawn::native::d3d11
diff --git a/src/dawn/native/d3d11/ShaderModuleD3D11.cpp b/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
index 4554437..d5c7073 100644
--- a/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
+++ b/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
@@ -33,6 +33,7 @@
 
 #include "dawn/common/Assert.h"
 #include "dawn/common/Log.h"
+#include "dawn/native/ImmediateConstantsLayout.h"
 #include "dawn/native/Pipeline.h"
 #include "dawn/native/TintUtils.h"
 #include "dawn/native/d3d/D3DCompilationRequest.h"
@@ -80,6 +81,7 @@
     SingleShaderStage stage,
     const PipelineLayout* layout,
     uint32_t compileFlags,
+    const ImmediateConstantMask& pipelineImmediateMask,
     const std::optional<dawn::native::d3d::InterStageShaderVariablesMask>& usedInterstageVariables,
     const std::optional<tint::hlsl::writer::PixelLocalOptions>& pixelLocalOptions) {
     Device* device = ToBackend(GetDevice());
@@ -193,23 +195,34 @@
     req.hlsl.inputProgram = UseTintProgram();
     req.hlsl.entryPointName = programmableStage.entryPoint.c_str();
     req.hlsl.stage = stage;
-    // Put the firstIndex into the internally reserved group and binding to avoid conflicting with
-    // any existing bindings.
-    if (!useTintIR) {
-        req.hlsl.firstIndexOffsetRegisterSpace = PipelineLayout::kReservedConstantsBindGroupIndex;
-        req.hlsl.firstIndexOffsetShaderRegister = PipelineLayout::kFirstIndexOffsetBindingNumber;
-        // Remap to the desired space and binding, [0, kFirstIndexOffsetConstantBufferSlot].
-        {
-            tint::BindingPoint srcBindingPoint{req.hlsl.firstIndexOffsetRegisterSpace,
-                                               req.hlsl.firstIndexOffsetShaderRegister};
-            // D3D11 (HLSL SM5.0) doesn't support spaces, so we have to put the firstIndex in the
-            // default space(0)
-            tint::BindingPoint dstBindingPoint{0u,
-                                               PipelineLayout::kFirstIndexOffsetConstantBufferSlot};
 
-            bindings.uniform.emplace(srcBindingPoint,
-                                     tint::hlsl::writer::binding::Uniform{dstBindingPoint.group,
+    if (!useTintIR) {
+        if (stage == SingleShaderStage::Vertex) {
+            // Put the firstIndex into the internally reserved group and binding to avoid
+            // conflicting with any existing bindings.
+            req.hlsl.firstIndexOffsetRegisterSpace =
+                PipelineLayout::kReservedConstantsBindGroupIndex;
+            req.hlsl.firstIndexOffsetShaderRegister =
+                PipelineLayout::kFirstIndexOffsetBindingNumber;
+            // Remap to the desired space and binding, [0, kFirstIndexOffsetConstantBufferSlot].
+            {
+                tint::BindingPoint srcBindingPoint{req.hlsl.firstIndexOffsetRegisterSpace,
+                                                   req.hlsl.firstIndexOffsetShaderRegister};
+                // D3D11 (HLSL SM5.0) doesn't support spaces, so we have to put the firstIndex in
+                // the default space(0)
+                tint::BindingPoint dstBindingPoint{
+                    0u, PipelineLayout::kFirstIndexOffsetConstantBufferSlot};
+
+                bindings.uniform.emplace(
+                    srcBindingPoint, tint::hlsl::writer::binding::Uniform{dstBindingPoint.group,
                                                                           dstBindingPoint.binding});
+            }
+        }
+
+        if (entryPoint.usesNumWorkgroups) {
+            DAWN_ASSERT(stage == SingleShaderStage::Compute);
+            req.hlsl.tintOptions.root_constant_binding_point =
+                tint::BindingPoint{0, PipelineLayout::kNumWorkgroupsConstantBufferSlot};
         }
     }
 
@@ -224,14 +237,20 @@
         device->IsToggleEnabled(Toggle::DisableWorkgroupInit);
     req.hlsl.tintOptions.bindings = std::move(bindings);
 
-    if (entryPoint.usesNumWorkgroups) {
-        DAWN_ASSERT(stage == SingleShaderStage::Compute);
-        req.hlsl.tintOptions.root_constant_binding_point =
-            tint::BindingPoint{0, PipelineLayout::kNumWorkgroupsConstantBufferSlot};
-    } else if (useTintIR && stage == SingleShaderStage::Vertex) {
-        // For vertex shaders, use root constant to add FirstIndexOffset, if needed
-        req.hlsl.tintOptions.root_constant_binding_point =
-            tint::BindingPoint{0, PipelineLayout::kFirstIndexOffsetConstantBufferSlot};
+    // Immediate data available in TintIR only.
+    if (useTintIR) {
+        req.hlsl.tintOptions.immediate_binding_point =
+            tint::BindingPoint{0, PipelineLayout::kReservedConstantBufferSlot};
+        if (stage == SingleShaderStage::Compute) {
+            req.hlsl.tintOptions.num_workgroups_start_offset =
+                GetImmediateByteOffsetInPipelineIfAny(&ComputeImmediateConstants::numWorkgroups,
+                                                      pipelineImmediateMask);
+        } else {
+            req.hlsl.tintOptions.first_index_offset = GetImmediateByteOffsetInPipelineIfAny(
+                &RenderImmediateConstants::firstVertex, pipelineImmediateMask);
+            req.hlsl.tintOptions.first_instance_offset = GetImmediateByteOffsetInPipelineIfAny(
+                &RenderImmediateConstants::firstInstance, pipelineImmediateMask);
+        }
     }
 
     if (stage == SingleShaderStage::Vertex) {
diff --git a/src/dawn/native/d3d11/ShaderModuleD3D11.h b/src/dawn/native/d3d11/ShaderModuleD3D11.h
index 30ee9f3..eb0d88f 100644
--- a/src/dawn/native/d3d11/ShaderModuleD3D11.h
+++ b/src/dawn/native/d3d11/ShaderModuleD3D11.h
@@ -62,6 +62,7 @@
         SingleShaderStage stage,
         const PipelineLayout* layout,
         uint32_t compileFlags,
+        const ImmediateConstantMask& pipelineImmediateMask,
         const std::optional<dawn::native::d3d::InterStageShaderVariablesMask>&
             usedInterstageVariables = {},
         const std::optional<tint::hlsl::writer::PixelLocalOptions>& pixelLocalOptions = {});
diff --git a/src/dawn/tests/end2end/FirstIndexOffsetTests.cpp b/src/dawn/tests/end2end/FirstIndexOffsetTests.cpp
index ad4ffa4..23217a7 100644
--- a/src/dawn/tests/end2end/FirstIndexOffsetTests.cpp
+++ b/src/dawn/tests/end2end/FirstIndexOffsetTests.cpp
@@ -66,11 +66,15 @@
 
 namespace {
 
+struct FirstIndexOffset {
+    uint32_t firstVertex = 0;
+    uint32_t firstInstance = 0;
+};
 class FirstIndexOffsetTests : public DawnTest {
   public:
-    void TestVertexIndex(DrawMode mode, uint32_t firstVertex);
-    void TestInstanceIndex(DrawMode mode, uint32_t firstInstance);
-    void TestBothIndices(DrawMode mode, uint32_t firstVertex, uint32_t firstInstance);
+    void TestVertexIndex(DrawMode mode, const std::vector<FirstIndexOffset>& offsets);
+    void TestInstanceIndex(DrawMode mode, const std::vector<FirstIndexOffset>& offsets);
+    void TestBothIndices(DrawMode mode, const std::vector<FirstIndexOffset>& offsets);
 
   protected:
     std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
@@ -81,25 +85,36 @@
     }
 
   private:
+    wgpu::Buffer CreateVertexBuffer(uint32_t firstVertexOffset) {
+        std::vector<float> vertexData(firstVertexOffset * kComponentsPerVertex);
+        vertexData.insert(vertexData.end(), {0, 0, 0, 1});
+        vertexData.insert(vertexData.end(), {0, 0, 0, 1});
+        return utils::CreateBufferFromData(device, vertexData.data(),
+                                           vertexData.size() * sizeof(float),
+                                           wgpu::BufferUsage::Vertex);
+    }
+
     void TestImpl(DrawMode mode,
                   CheckIndex checkIndex,
-                  uint32_t vertexIndex,
-                  uint32_t instanceIndex);
+                  const std::vector<FirstIndexOffset>& offsets);
+
+    static constexpr uint32_t kComponentsPerVertex = 4;
 };
 
-void FirstIndexOffsetTests::TestVertexIndex(DrawMode mode, uint32_t firstVertex) {
-    TestImpl(mode, CheckIndex::Vertex, firstVertex, 0);
+void FirstIndexOffsetTests::TestVertexIndex(DrawMode mode,
+                                            const std::vector<FirstIndexOffset>& offsets) {
+    TestImpl(mode, CheckIndex::Vertex, offsets);
 }
 
-void FirstIndexOffsetTests::TestInstanceIndex(DrawMode mode, uint32_t firstInstance) {
-    TestImpl(mode, CheckIndex::Instance, 0, firstInstance);
+void FirstIndexOffsetTests::TestInstanceIndex(DrawMode mode,
+                                              const std::vector<FirstIndexOffset>& offsets) {
+    TestImpl(mode, CheckIndex::Instance, offsets);
 }
 
 void FirstIndexOffsetTests::TestBothIndices(DrawMode mode,
-                                            uint32_t firstVertex,
-                                            uint32_t firstInstance) {
+                                            const std::vector<FirstIndexOffset>& offsets) {
     using wgpu::operator|;
-    TestImpl(mode, CheckIndex::Vertex | CheckIndex::Instance, firstVertex, firstInstance);
+    TestImpl(mode, CheckIndex::Vertex | CheckIndex::Instance, offsets);
 }
 
 // Conditionally tests if first/baseVertex and/or firstInstance have been correctly passed to the
@@ -108,8 +123,7 @@
 // values to a storage buffer. If vertex index is used, the vertex buffer is padded with 0s.
 void FirstIndexOffsetTests::TestImpl(DrawMode mode,
                                      CheckIndex checkIndex,
-                                     uint32_t firstVertex,
-                                     uint32_t firstInstance) {
+                                     const std::vector<FirstIndexOffset>& offsets) {
     // Compatibility mode does not support @interpolate(flat, first).
     // It only supports @interpolate(flat, either).
     DAWN_TEST_UNSUPPORTED_IF(IsCompatibilityMode());
@@ -172,8 +186,6 @@
 
     utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, kRTSize, kRTSize);
 
-    constexpr uint32_t kComponentsPerVertex = 4;
-
     utils::ComboRenderPipelineDescriptor pipelineDesc;
     pipelineDesc.vertex.module = utils::CreateShaderModule(device, vertexShader.c_str());
     pipelineDesc.cFragment.module = utils::CreateShaderModule(device, fragmentShader.c_str());
@@ -187,14 +199,11 @@
 
     wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDesc);
 
-    std::vector<float> vertexData(firstVertex * kComponentsPerVertex);
-    vertexData.insert(vertexData.end(), {0, 0, 0, 1});
-    vertexData.insert(vertexData.end(), {0, 0, 0, 1});
-    wgpu::Buffer vertices = utils::CreateBufferFromData(
-        device, vertexData.data(), vertexData.size() * sizeof(float), wgpu::BufferUsage::Vertex);
+    // Create reusable buffers.
     wgpu::Buffer indices =
         utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Index, {0});
 
+    // Using arbitrary values for the initial vertex and instance indices.
     const uint32_t bufferInitialVertex =
         checkIndex & CheckIndex::Vertex ? std::numeric_limits<uint32_t>::max() : 0;
     const uint32_t bufferInitialInstance =
@@ -203,145 +212,242 @@
         utils::CreateBufferFromData(device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::Storage,
                                     {bufferInitialVertex, bufferInitialInstance});
 
-    wgpu::Buffer indirectBuffer;
-    switch (mode) {
-        case DrawMode::NonIndexed:
-        case DrawMode::Indexed:
-            break;
-        case DrawMode::NonIndexedIndirect:
-            indirectBuffer = utils::CreateBufferFromData<uint32_t>(
-                device, wgpu::BufferUsage::Indirect, {1, 1, firstVertex, firstInstance});
-            break;
-        case DrawMode::IndexedIndirect:
-            indirectBuffer = utils::CreateBufferFromData<uint32_t>(
-                device, wgpu::BufferUsage::Indirect, {1, 1, 0, firstVertex, firstInstance});
-            break;
-        default:
-            FAIL();
-    }
-
     wgpu::BindGroup bindGroup =
         utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer}});
 
     wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
     wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+
+    std::array<uint32_t, 2> expected = {};
+
     pass.SetPipeline(pipeline);
-    pass.SetVertexBuffer(0, vertices);
     pass.SetBindGroup(0, bindGroup);
-    // Do a first draw to make sure the offset values are correctly updated on the next draw.
-    // We should only see the values from the second draw.
-    std::array<uint32_t, 2> firstDrawValues = {firstVertex + 1, firstInstance + 1};
-    pass.Draw(1, 1, firstDrawValues[0], firstDrawValues[1]);
-    switch (mode) {
-        case DrawMode::NonIndexed:
-            pass.Draw(1, 1, firstVertex, firstInstance);
-            break;
-        case DrawMode::Indexed:
-            pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
-            pass.DrawIndexed(1, 1, 0, firstVertex, firstInstance);
-            break;
-        case DrawMode::NonIndexedIndirect:
-            pass.DrawIndirect(indirectBuffer, 0);
-            break;
-        case DrawMode::IndexedIndirect:
-            pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
-            pass.DrawIndexedIndirect(indirectBuffer, 0);
-            break;
-        default:
-            FAIL();
+
+    // Recording draws with different firstVertex and firstInstance values
+    for (const auto& offset : offsets) {
+        uint32_t firstVertex = offset.firstVertex;
+        uint32_t firstInstance = offset.firstInstance;
+
+        wgpu::Buffer vertices = CreateVertexBuffer(firstVertex);
+
+        wgpu::Buffer indirectBuffer;
+        switch (mode) {
+            case DrawMode::NonIndexed:
+            case DrawMode::Indexed:
+                break;
+            case DrawMode::NonIndexedIndirect:
+                indirectBuffer = utils::CreateBufferFromData<uint32_t>(
+                    device, wgpu::BufferUsage::Indirect, {1, 1, firstVertex, firstInstance});
+                break;
+            case DrawMode::IndexedIndirect:
+                indirectBuffer = utils::CreateBufferFromData<uint32_t>(
+                    device, wgpu::BufferUsage::Indirect, {1, 1, 0, firstVertex, firstInstance});
+                break;
+            default:
+                FAIL();
+        }
+
+        pass.SetVertexBuffer(0, vertices);
+
+        switch (mode) {
+            case DrawMode::NonIndexed:
+                pass.Draw(1, 1, firstVertex, firstInstance);
+                break;
+            case DrawMode::Indexed:
+                pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
+                pass.DrawIndexed(1, 1, 0, firstVertex, firstInstance);
+                break;
+            case DrawMode::NonIndexedIndirect:
+                pass.DrawIndirect(indirectBuffer, 0);
+                break;
+            case DrawMode::IndexedIndirect:
+                pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
+                pass.DrawIndexedIndirect(indirectBuffer, 0);
+                break;
+            default:
+                FAIL();
+        }
+
+        expected[0] = firstVertex;
+        expected[1] = firstInstance;
+
+        // Per the specification, if validation is enabled and indirect-first-instance is not
+        // enabled, Draw[Indexed]Indirect with firstInstance > 0 will be a no-op. The buffer should
+        // still have the values from the first draw.
+        if (firstInstance > 0 && IsIndirectDraw(mode) &&
+            !device.HasFeature(wgpu::FeatureName::IndirectFirstInstance) &&
+            !HasToggleEnabled("skip_validation")) {
+            expected = {bufferInitialVertex, bufferInitialInstance};
+        }
     }
     pass.End();
     wgpu::CommandBuffer commands = encoder.Finish();
     queue.Submit(1, &commands);
 
-    std::array<uint32_t, 2> expected = {firstVertex, firstInstance};
-
-    // Per the specification, if validation is enabled and indirect-first-instance is not enabled,
-    // Draw[Indexed]Indirect with firstInstance > 0 will be a no-op. The buffer should still have
-    // the values from the first draw.
-    if (firstInstance > 0 && IsIndirectDraw(mode) &&
-        !device.HasFeature(wgpu::FeatureName::IndirectFirstInstance) &&
-        !HasToggleEnabled("skip_validation")) {
-        expected = {checkIndex & CheckIndex::Vertex ? firstDrawValues[0] : 0, firstDrawValues[1]};
-    }
-
     EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), buffer, 0, expected.size());
 }
 
 // Test that vertex_index starts at 7 when drawn using Draw()
 TEST_P(FirstIndexOffsetTests, NonIndexedVertexOffset) {
-    TestVertexIndex(DrawMode::NonIndexed, 7);
+    // Draw once: vertex_index starts at 9
+    {
+        TestVertexIndex(DrawMode::NonIndexed, {{9, 0}});
+    }
+
+    // Draw twice: vertex_index starts at 9 and 7
+    {
+        TestVertexIndex(DrawMode::NonIndexed, {{9, 0}, {7, 0}});
+    }
 }
 
-// Test that instance_index starts at 11 when drawn using Draw()
+// Test that instance_index when drawn using Draw()
 TEST_P(FirstIndexOffsetTests, NonIndexedInstanceOffset) {
-    TestInstanceIndex(DrawMode::NonIndexed, 11);
+    // Draw once: instance_index starts at 13
+    {
+        TestInstanceIndex(DrawMode::NonIndexed, {{0, 13}});
+    }
+
+    // Draw twice: instance_index starts at 13 and 11
+    {
+        TestInstanceIndex(DrawMode::NonIndexed, {{0, 13}, {0, 11}});
+    }
 }
 
-// Test that vertex_index and instance_index start at 7 and 11 respectively when drawn using Draw()
+// Test that vertex_index and instance_index respectively when drawn using Draw()
 TEST_P(FirstIndexOffsetTests, NonIndexedBothOffset) {
-    TestBothIndices(DrawMode::NonIndexed, 7, 11);
+    // Draw once: vertex_index starts at 7 and instance_index starts at 13
+    {
+        TestBothIndices(DrawMode::NonIndexed, {{7, 13}});
+    }
+    // Draw twice: vertex_index starts at 7 , instance_index starts at 13 and 11
+    {
+        TestBothIndices(DrawMode::NonIndexed, {{7, 13}, {7, 11}});
+    }
 }
 
 // Test that vertex_index starts at 7 when drawn using DrawIndexed()
 TEST_P(FirstIndexOffsetTests, IndexedVertex) {
-    TestVertexIndex(DrawMode::Indexed, 7);
+    // Draw once: vertex_index starts at 9
+    {
+        TestVertexIndex(DrawMode::Indexed, {{9, 0}});
+    }
+
+    // Draw twice: vertex_index starts at 9 and 7
+    {
+        TestVertexIndex(DrawMode::Indexed, {{9, 0}, {7, 0}});
+    }
 }
 
-// Test that instance_index starts at 11 when drawn using DrawIndexed()
+// Test that instance_index when drawn using DrawIndexed()
 TEST_P(FirstIndexOffsetTests, IndexedInstance) {
-    TestInstanceIndex(DrawMode::Indexed, 11);
+    // Draw once: instance_index starts at 13
+    {
+        TestInstanceIndex(DrawMode::Indexed, {{0, 13}});
+    }
+
+    // Draw twice: instance_index starts at 13 and 11
+    {
+        TestInstanceIndex(DrawMode::Indexed, {{0, 13}, {0, 11}});
+    }
 }
 
-// Test that vertex_index and instance_index start at 7 and 11 respectively when drawn using
+// Test that vertex_index and instance_index respectively when drawn using
 // DrawIndexed()
 TEST_P(FirstIndexOffsetTests, IndexedBothOffset) {
-    TestBothIndices(DrawMode::Indexed, 7, 11);
+    // Draw once: vertex_index starts at 7 and instance_index starts at 13
+    {
+        TestBothIndices(DrawMode::Indexed, {{7, 13}});
+    }
+    // Draw twice: vertex_index starts at 7 , instance_index starts at 13 and 11
+    {
+        TestBothIndices(DrawMode::Indexed, {{7, 13}, {7, 11}});
+    }
 }
 
-// Test that vertex_index starts at 7 when drawn using DrawIndirect()
+// Test that vertex_index when drawn using DrawIndirect()
 TEST_P(FirstIndexOffsetTests, NonIndexedIndirectVertexOffset) {
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
     // TODO(crbug.com/dawn/1429): Fails with the full validation turned on.
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
-    TestVertexIndex(DrawMode::NonIndexedIndirect, 7);
+
+    // Draw once: vertex_index starts at 9
+    {
+        TestVertexIndex(DrawMode::NonIndexedIndirect, {{9, 0}});
+    }
+
+    // Draw twice: vertex_index starts at 9 and 7
+    {
+        TestVertexIndex(DrawMode::NonIndexedIndirect, {{9, 0}, {7, 0}});
+    }
 }
 
-// Test that instance_index starts at 11 when drawn using DrawIndirect()
+// Test that instance_index when drawn using DrawIndirect()
 TEST_P(FirstIndexOffsetTests, NonIndexedIndirectInstanceOffset) {
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
-    TestInstanceIndex(DrawMode::NonIndexedIndirect, 11);
+    // Draw once: instance_index starts at 13
+    {
+        TestInstanceIndex(DrawMode::NonIndexedIndirect, {{0, 13}});
+    }
+
+    // Draw twice: instance_index starts at 13 and 11
+    {
+        TestInstanceIndex(DrawMode::NonIndexedIndirect, {{0, 13}, {0, 11}});
+    }
 }
 
-// Test that vertex_index and instance_index start at 7 and 11 respectively when drawn using
+// Test that vertex_index and instance_index respectively when drawn using
 // DrawIndirect()
 TEST_P(FirstIndexOffsetTests, NonIndexedIndirectBothOffset) {
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
-    TestBothIndices(DrawMode::NonIndexedIndirect, 7, 11);
+    // Draw once: vertex_index starts at 7 and instance_index starts at 13
+    {
+        TestBothIndices(DrawMode::NonIndexedIndirect, {{9, 0}});
+    }
+    // Draw twice: vertex_index starts at 7 , instance_index starts at 13 and 11
+    {
+        TestBothIndices(DrawMode::NonIndexedIndirect, {{9, 0}, {7, 0}});
+    }
 }
 
-// Test that vertex_index starts at 7 when drawn using DrawIndexedIndirect()
+// Test that vertex_index when drawn using DrawIndexedIndirect()
 TEST_P(FirstIndexOffsetTests, IndexedIndirectVertex) {
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
     // TODO(crbug.com/dawn/1429): Fails with the full validation turned on.
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
-    TestVertexIndex(DrawMode::IndexedIndirect, 7);
+
+    // Draw once: vertex_index starts at 9
+    {
+        TestVertexIndex(DrawMode::IndexedIndirect, {{9, 0}});
+    }
+
+    // Draw twice: vertex_index starts at 9 and 7
+    {
+        TestVertexIndex(DrawMode::IndexedIndirect, {{9, 0}, {7, 0}});
+    }
 }
 
-// Test that instance_index starts at 11 when drawn using DrawIndexed()
+// Test that instance_index when drawn using DrawIndexed()
 TEST_P(FirstIndexOffsetTests, IndexedIndirectInstance) {
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
-    TestInstanceIndex(DrawMode::IndexedIndirect, 11);
+    // Draw once: instance_index starts at 13
+    {
+        TestInstanceIndex(DrawMode::IndexedIndirect, {{0, 13}});
+    }
+
+    // Draw twice: instance_index starts at 13 and 11
+    {
+        TestInstanceIndex(DrawMode::IndexedIndirect, {{0, 13}, {0, 11}});
+    }
 }
 
 // Test that vertex_index and instance_index start at 7 and 11 respectively when drawn using
@@ -350,11 +456,19 @@
     // TODO(crbug.com/347223100): failing on ANGLE/D3D11
     DAWN_SUPPRESS_TEST_IF(IsOpenGLES() && IsANGLED3D11());
 
-    TestBothIndices(DrawMode::IndexedIndirect, 7, 11);
+    // Draw once: vertex_index starts at 7 and instance_index starts at 13
+    {
+        TestBothIndices(DrawMode::IndexedIndirect, {{7, 13}});
+    }
+    // Draw twice: vertex_index starts at 7 , instance_index starts at 13 and 11
+    {
+        TestBothIndices(DrawMode::IndexedIndirect, {{7, 13}, {7, 11}});
+    }
 }
 
 DAWN_INSTANTIATE_TEST(FirstIndexOffsetTests,
                       D3D11Backend(),
+                      D3D11Backend({"use_tint_ir"}),
                       D3D12Backend(),
                       MetalBackend(),
                       OpenGLBackend(),