Reduce allocations in BindGroupTracker

Profiling showed that the resize done on
mDynamicOffsets was causing allocations at an
inopportune time when submitting command buffers.

Attempts to resolve that by using a static array
with the maximum number of dynamic offsets and
tracking the count separately.

Bug: 424506429
Change-Id: Ic0b6a57e55e369413599bf453943e7d2b0748373
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/246794
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/dawn/common/Constants.h b/src/dawn/common/Constants.h
index edfe485..d294cf3 100644
--- a/src/dawn/common/Constants.h
+++ b/src/dawn/common/Constants.h
@@ -108,6 +108,10 @@
 // Timestamp query quantization mask to perform a granularity of ~0.1ms.
 static constexpr uint32_t kTimestampQuantizationMask = 0xFFFF0000;
 
+// Max dynamic offset counts used to optimize Dawn internals.
+static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 16u;
+static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 16u;
+
 }  // namespace dawn
 
 #endif  // SRC_DAWN_COMMON_CONSTANTS_H_
diff --git a/src/dawn/native/BindGroupTracker.h b/src/dawn/native/BindGroupTracker.h
index f448ff8..959cb1c 100644
--- a/src/dawn/native/BindGroupTracker.h
+++ b/src/dawn/native/BindGroupTracker.h
@@ -52,6 +52,7 @@
                         uint32_t dynamicOffsetCount,
                         uint32_t* dynamicOffsets) {
         DAWN_ASSERT(index < kMaxBindGroupsTyped);
+        DAWN_ASSERT(dynamicOffsetCount <= kMaxDynamicOffsetsPerBindGroup);
 
         if (mBindGroupLayoutsMask[index]) {
             // It is okay to only dirty bind groups that are used by the current pipeline
@@ -69,9 +70,9 @@
         }
 
         mBindGroups[index] = bindGroup;
-        mDynamicOffsets[index].resize(BindingIndex(dynamicOffsetCount));
+        mDynamicOffsets[index].count = BindingIndex(dynamicOffsetCount);
         std::copy(dynamicOffsets, dynamicOffsets + dynamicOffsetCount,
-                  mDynamicOffsets[index].begin());
+                  mDynamicOffsets[index].offsets.begin());
     }
 
     void OnSetPipeline(PipelineBase* pipeline) { mPipelineLayout = pipeline->GetLayout(); }
@@ -79,6 +80,11 @@
   protected:
     virtual bool AreLayoutsCompatible() { return mLastAppliedPipelineLayout == mPipelineLayout; }
 
+    ityp::span<BindingIndex, DynamicOffset> GetDynamicOffsets(BindGroupIndex index) {
+        return ityp::span<BindingIndex, DynamicOffset>(mDynamicOffsets[index].offsets.data(),
+                                                       mDynamicOffsets[index].count);
+    }
+
     // The Derived class should call this before it applies bind groups.
     void BeforeApply() {
         if (AreLayoutsCompatible()) {
@@ -123,7 +129,6 @@
     BindGroupMask mDirtyBindGroupsObjectChangedOrIsDynamic = 0;
     BindGroupMask mBindGroupLayoutsMask = 0;
     PerBindGroup<BindGroupBase*> mBindGroups = {};
-    PerBindGroup<ityp::vector<BindingIndex, DynamicOffset>> mDynamicOffsets = {};
 
     // |mPipelineLayout| is the current pipeline layout set on the command buffer.
     // |mLastAppliedPipelineLayout| is the last pipeline layout for which we applied changes
@@ -133,6 +138,20 @@
     // freed from underneath this class.
     RAW_PTR_EXCLUSION PipelineLayoutBase* mPipelineLayout = nullptr;
     RAW_PTR_EXCLUSION PipelineLayoutBase* mLastAppliedPipelineLayout = nullptr;
+
+  private:
+    // Max possible dynamic offsets per bind group. Uses the per-pipeline limits because it's
+    // possible that one bind group uses all the available dynamic offsets and every other bind
+    // group uses none.
+    static constexpr uint32_t kMaxDynamicOffsetsPerBindGroup =
+        kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout;
+
+    struct BindingDynamicOffsets {
+        ityp::array<BindingIndex, DynamicOffset, kMaxDynamicOffsetsPerBindGroup> offsets = {};
+        BindingIndex count = {};
+    };
+
+    PerBindGroup<BindingDynamicOffsets> mDynamicOffsets = {};
 };
 
 }  // namespace dawn::native
diff --git a/src/dawn/native/Limits.cpp b/src/dawn/native/Limits.cpp
index a58c6b6..450f155 100644
--- a/src/dawn/native/Limits.cpp
+++ b/src/dawn/native/Limits.cpp
@@ -415,6 +415,33 @@
         std::min(limits->v1.maxUniformBuffersPerShaderStage, kMaxUniformBuffersPerShaderStage);
     limits->v1.maxImmediateSize =
         std::min(limits->v1.maxImmediateSize, kMaxSupportedImmediateDataBytes);
+
+    if (limits->v1.maxDynamicUniformBuffersPerPipelineLayout >
+        kMaxDynamicUniformBuffersPerPipelineLayout) {
+        dawn::WarningLog() << "maxDynamicUniformBuffersPerPipelineLayout artificially reduced from "
+                           << limits->v1.maxDynamicUniformBuffersPerPipelineLayout << " to "
+                           << kMaxDynamicUniformBuffersPerPipelineLayout
+                           << " to fit dynamic offset allocation limit.";
+        limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
+            kMaxDynamicUniformBuffersPerPipelineLayout;
+    }
+
+    if (limits->v1.maxDynamicStorageBuffersPerPipelineLayout >
+        kMaxDynamicStorageBuffersPerPipelineLayout) {
+        dawn::WarningLog() << "maxDynamicStorageBuffersPerPipelineLayout artificially reduced from "
+                           << limits->v1.maxDynamicStorageBuffersPerPipelineLayout << " to "
+                           << kMaxDynamicStorageBuffersPerPipelineLayout
+                           << " to fit dynamic offset allocation limit.";
+        limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
+            kMaxDynamicStorageBuffersPerPipelineLayout;
+    }
+
+    limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
+        std::min(limits->v1.maxDynamicUniformBuffersPerPipelineLayout,
+                 kMaxDynamicUniformBuffersPerPipelineLayout);
+    limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
+        std::min(limits->v1.maxDynamicStorageBuffersPerPipelineLayout,
+                 kMaxDynamicStorageBuffersPerPipelineLayout);
     // Compat limits.
     limits->compat.maxStorageBuffersInVertexStage =
         std::min(limits->compat.maxStorageBuffersInVertexStage, kMaxStorageBuffersPerShaderStage);
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
index 84d801d..1107aed 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
@@ -114,7 +114,7 @@
     BindGroupBase* group,
     BindingIndex bindingIndex,
     const BufferBindingInfo& layout,
-    const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+    const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
     const BindingInfo& bindingInfo = group->GetLayout()->GetBindingInfo(bindingIndex);
 
     BufferBinding binding = group->GetBindingAsBufferBinding(bindingIndex);
@@ -336,7 +336,7 @@
     BindGroupBase* group,
     BindingIndex bindingIndex,
     const BufferBindingInfo& layout,
-    const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+    const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
     const auto& [bindingInfo, binding] =
         ExtractBufferBindingInfo(group, bindingIndex, layout, dynamicOffsets);
 
@@ -364,7 +364,7 @@
     BindGroupBase* group,
     BindingIndex bindingIndex,
     const BufferBindingInfo& layout,
-    const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+    const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
     const auto& [bindingInfo, binding] =
         ExtractBufferBindingInfo(group, bindingIndex, layout, dynamicOffsets);
 
@@ -462,7 +462,7 @@
     constexpr wgpu::ShaderStage kVisibleCompute = wgpu::ShaderStage::Compute & kVisibleStage;
 
     BindGroupBase* group = mBindGroups[index];
-    const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets = mDynamicOffsets[index];
+    const ityp::span<BindingIndex, uint64_t>& dynamicOffsets = GetDynamicOffsets(index);
     const auto& indices = ToBackend(mPipelineLayout)->GetBindingTableIndexMap()[index];
 
     for (BindingIndex bindingIndex : Range(group->GetLayout()->GetBindingCount())) {
@@ -752,7 +752,7 @@
 
     for (BindGroupIndex index : uavBindGroups) {
         BindGroupBase* group = mBindGroups[index];
-        const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets = mDynamicOffsets[index];
+        const ityp::span<BindingIndex, uint64_t>& dynamicOffsets = GetDynamicOffsets(index);
         const auto& indices = ToBackend(mPipelineLayout)->GetBindingTableIndexMap()[index];
 
         // D3D11 uav slot allocated in reverse order.
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
index b0546f4..e5b2002 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
@@ -86,7 +86,7 @@
         BindGroupBase* group,
         BindingIndex bindingIndex,
         const BufferBindingInfo& layout,
-        const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets);
+        const ityp::span<BindingIndex, uint64_t>& dynamicOffsets);
 
     template <typename T>
     ResultOrError<ComPtr<T>> GetTextureD3DView(BindGroupBase* group, BindingIndex bindingIndex);
@@ -104,7 +104,7 @@
         BindGroupBase* group,
         BindingIndex bindingIndex,
         const BufferBindingInfo& layout,
-        const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets);
+        const ityp::span<BindingIndex, uint64_t>& dynamicOffsets);
 
     ResultOrError<ComPtr<ID3D11ShaderResourceView>> GetTextureShaderResourceView(
         BindGroupBase* group,
diff --git a/src/dawn/native/d3d12/CommandBufferD3D12.cpp b/src/dawn/native/d3d12/CommandBufferD3D12.cpp
index 9d36dfb..6ee2f7d 100644
--- a/src/dawn/native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn/native/d3d12/CommandBufferD3D12.cpp
@@ -469,7 +469,7 @@
         for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
             BindGroup* group = ToBackend(mBindGroups[index]);
             ApplyBindGroup(commandList, ToBackend(mPipelineLayout), index, group,
-                           mDynamicOffsets[index]);
+                           GetDynamicOffsets(index));
         }
 
         AfterApply();
@@ -571,7 +571,7 @@
                         const PipelineLayout* pipelineLayout,
                         BindGroupIndex index,
                         BindGroup* group,
-                        const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+                        const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
         DAWN_ASSERT(dynamicOffsets.size() == group->GetLayout()->GetDynamicBufferCount());
 
         // Usually, the application won't set the same offsets many times,
diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm
index 0ae4096..5084f3f 100644
--- a/src/dawn/native/metal/CommandBufferMTL.mm
+++ b/src/dawn/native/metal/CommandBufferMTL.mm
@@ -525,7 +525,7 @@
     void Apply(Encoder encoder) {
         BeforeApply();
         for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
-            ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), mDynamicOffsets[index],
+            ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), GetDynamicOffsets(index),
                            ToBackend(mPipelineLayout));
         }
         AfterApply();
@@ -540,7 +540,7 @@
                             id<MTLComputeCommandEncoder> compute,
                             BindGroupIndex index,
                             BindGroup* group,
-                            const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets,
+                            const ityp::span<BindingIndex, uint64_t>& dynamicOffsets,
                             PipelineLayout* pipelineLayout) {
         // TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup
         // so that we only have to do one setVertexBuffers and one setFragmentBuffers
diff --git a/src/dawn/native/opengl/CommandBufferGL.cpp b/src/dawn/native/opengl/CommandBufferGL.cpp
index 433a73e..e672e44 100644
--- a/src/dawn/native/opengl/CommandBufferGL.cpp
+++ b/src/dawn/native/opengl/CommandBufferGL.cpp
@@ -298,7 +298,7 @@
     MaybeError Apply(const OpenGLFunctions& gl) {
         BeforeApply();
         for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
-            DAWN_TRY(ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsets[index]));
+            DAWN_TRY(ApplyBindGroup(gl, index, mBindGroups[index], GetDynamicOffsets(index)));
         }
         DAWN_TRY(ApplyInternalUniforms(gl));
         DAWN_TRY(ApplyInternalArrayLengthUniforms(gl));
@@ -326,7 +326,7 @@
     MaybeError ApplyBindGroup(const OpenGLFunctions& gl,
                               BindGroupIndex groupIndex,
                               BindGroupBase* group,
-                              const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+                              const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
         const auto& indices = ToBackend(mPipelineLayout)->GetBindingIndexInfo()[groupIndex];
 
         for (BindingIndex bindingIndex : Range(group->GetLayout()->GetBindingCount())) {
diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp
index 1e8573d..81883e6 100644
--- a/src/dawn/native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn/native/vulkan/CommandBufferVk.cpp
@@ -175,9 +175,9 @@
         BeforeApply();
         for (BindGroupIndex dirtyIndex : mDirtyBindGroupsObjectChangedOrIsDynamic) {
             VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle();
-            uint32_t count = static_cast<uint32_t>(mDynamicOffsets[dirtyIndex].size());
-            const uint32_t* dynamicOffset =
-                count > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
+            const auto dynamicOffsetSpan = GetDynamicOffsets(dirtyIndex);
+            uint32_t count = static_cast<uint32_t>(dynamicOffsetSpan.size());
+            const uint32_t* dynamicOffset = count > 0 ? dynamicOffsetSpan.data() : nullptr;
             device->fn.CmdBindDescriptorSets(recordingContext->commandBuffer, bindPoint, mVkLayout,
                                              static_cast<uint32_t>(dirtyIndex), 1, &*set, count,
                                              dynamicOffset);