Reduce allocations in BindGroupTracker
Profiling showed that the resize done on
mDynamicOffsets was causing allocations at an
inopportune time when submitting command buffers.
Attempts to resolve that by using a static array
with the maximum number of dynamic offsets and
tracking the count separately.
Bug: 424506429
Change-Id: Ic0b6a57e55e369413599bf453943e7d2b0748373
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/246794
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/dawn/common/Constants.h b/src/dawn/common/Constants.h
index edfe485..d294cf3 100644
--- a/src/dawn/common/Constants.h
+++ b/src/dawn/common/Constants.h
@@ -108,6 +108,10 @@
// Timestamp query quantization mask to perform a granularity of ~0.1ms.
static constexpr uint32_t kTimestampQuantizationMask = 0xFFFF0000;
+// Max dynamic offset counts used to optimize Dawn internals.
+static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 16u;
+static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 16u;
+
} // namespace dawn
#endif // SRC_DAWN_COMMON_CONSTANTS_H_
diff --git a/src/dawn/native/BindGroupTracker.h b/src/dawn/native/BindGroupTracker.h
index f448ff8..959cb1c 100644
--- a/src/dawn/native/BindGroupTracker.h
+++ b/src/dawn/native/BindGroupTracker.h
@@ -52,6 +52,7 @@
uint32_t dynamicOffsetCount,
uint32_t* dynamicOffsets) {
DAWN_ASSERT(index < kMaxBindGroupsTyped);
+ DAWN_ASSERT(dynamicOffsetCount <= kMaxDynamicOffsetsPerBindGroup);
if (mBindGroupLayoutsMask[index]) {
// It is okay to only dirty bind groups that are used by the current pipeline
@@ -69,9 +70,9 @@
}
mBindGroups[index] = bindGroup;
- mDynamicOffsets[index].resize(BindingIndex(dynamicOffsetCount));
+ mDynamicOffsets[index].count = BindingIndex(dynamicOffsetCount);
std::copy(dynamicOffsets, dynamicOffsets + dynamicOffsetCount,
- mDynamicOffsets[index].begin());
+ mDynamicOffsets[index].offsets.begin());
}
void OnSetPipeline(PipelineBase* pipeline) { mPipelineLayout = pipeline->GetLayout(); }
@@ -79,6 +80,11 @@
protected:
virtual bool AreLayoutsCompatible() { return mLastAppliedPipelineLayout == mPipelineLayout; }
+ ityp::span<BindingIndex, DynamicOffset> GetDynamicOffsets(BindGroupIndex index) {
+ return ityp::span<BindingIndex, DynamicOffset>(mDynamicOffsets[index].offsets.data(),
+ mDynamicOffsets[index].count);
+ }
+
// The Derived class should call this before it applies bind groups.
void BeforeApply() {
if (AreLayoutsCompatible()) {
@@ -123,7 +129,6 @@
BindGroupMask mDirtyBindGroupsObjectChangedOrIsDynamic = 0;
BindGroupMask mBindGroupLayoutsMask = 0;
PerBindGroup<BindGroupBase*> mBindGroups = {};
- PerBindGroup<ityp::vector<BindingIndex, DynamicOffset>> mDynamicOffsets = {};
// |mPipelineLayout| is the current pipeline layout set on the command buffer.
// |mLastAppliedPipelineLayout| is the last pipeline layout for which we applied changes
@@ -133,6 +138,20 @@
// freed from underneath this class.
RAW_PTR_EXCLUSION PipelineLayoutBase* mPipelineLayout = nullptr;
RAW_PTR_EXCLUSION PipelineLayoutBase* mLastAppliedPipelineLayout = nullptr;
+
+ private:
+ // Max possible dynamic offsets per bind group. Uses the per-pipeline limits because it's
+ // possible that one bind group uses all the available dynamic offsets and every other bind
+ // group uses none.
+ static constexpr uint32_t kMaxDynamicOffsetsPerBindGroup =
+ kMaxDynamicUniformBuffersPerPipelineLayout + kMaxDynamicStorageBuffersPerPipelineLayout;
+
+ struct BindingDynamicOffsets {
+ ityp::array<BindingIndex, DynamicOffset, kMaxDynamicOffsetsPerBindGroup> offsets = {};
+ BindingIndex count = {};
+ };
+
+ PerBindGroup<BindingDynamicOffsets> mDynamicOffsets = {};
};
} // namespace dawn::native
diff --git a/src/dawn/native/Limits.cpp b/src/dawn/native/Limits.cpp
index a58c6b6..450f155 100644
--- a/src/dawn/native/Limits.cpp
+++ b/src/dawn/native/Limits.cpp
@@ -415,6 +415,33 @@
std::min(limits->v1.maxUniformBuffersPerShaderStage, kMaxUniformBuffersPerShaderStage);
limits->v1.maxImmediateSize =
std::min(limits->v1.maxImmediateSize, kMaxSupportedImmediateDataBytes);
+
+ if (limits->v1.maxDynamicUniformBuffersPerPipelineLayout >
+ kMaxDynamicUniformBuffersPerPipelineLayout) {
+ dawn::WarningLog() << "maxDynamicUniformBuffersPerPipelineLayout artificially reduced from "
+ << limits->v1.maxDynamicUniformBuffersPerPipelineLayout << " to "
+ << kMaxDynamicUniformBuffersPerPipelineLayout
+ << " to fit dynamic offset allocation limit.";
+ limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
+ kMaxDynamicUniformBuffersPerPipelineLayout;
+ }
+
+ if (limits->v1.maxDynamicStorageBuffersPerPipelineLayout >
+ kMaxDynamicStorageBuffersPerPipelineLayout) {
+ dawn::WarningLog() << "maxDynamicStorageBuffersPerPipelineLayout artificially reduced from "
+ << limits->v1.maxDynamicStorageBuffersPerPipelineLayout << " to "
+ << kMaxDynamicStorageBuffersPerPipelineLayout
+ << " to fit dynamic offset allocation limit.";
+ limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
+ kMaxDynamicStorageBuffersPerPipelineLayout;
+ }
+
+ limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
+ std::min(limits->v1.maxDynamicUniformBuffersPerPipelineLayout,
+ kMaxDynamicUniformBuffersPerPipelineLayout);
+ limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
+ std::min(limits->v1.maxDynamicStorageBuffersPerPipelineLayout,
+ kMaxDynamicStorageBuffersPerPipelineLayout);
// Compat limits.
limits->compat.maxStorageBuffersInVertexStage =
std::min(limits->compat.maxStorageBuffersInVertexStage, kMaxStorageBuffersPerShaderStage);
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
index 84d801d..1107aed 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
@@ -114,7 +114,7 @@
BindGroupBase* group,
BindingIndex bindingIndex,
const BufferBindingInfo& layout,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
const BindingInfo& bindingInfo = group->GetLayout()->GetBindingInfo(bindingIndex);
BufferBinding binding = group->GetBindingAsBufferBinding(bindingIndex);
@@ -336,7 +336,7 @@
BindGroupBase* group,
BindingIndex bindingIndex,
const BufferBindingInfo& layout,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
const auto& [bindingInfo, binding] =
ExtractBufferBindingInfo(group, bindingIndex, layout, dynamicOffsets);
@@ -364,7 +364,7 @@
BindGroupBase* group,
BindingIndex bindingIndex,
const BufferBindingInfo& layout,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
const auto& [bindingInfo, binding] =
ExtractBufferBindingInfo(group, bindingIndex, layout, dynamicOffsets);
@@ -462,7 +462,7 @@
constexpr wgpu::ShaderStage kVisibleCompute = wgpu::ShaderStage::Compute & kVisibleStage;
BindGroupBase* group = mBindGroups[index];
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets = mDynamicOffsets[index];
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets = GetDynamicOffsets(index);
const auto& indices = ToBackend(mPipelineLayout)->GetBindingTableIndexMap()[index];
for (BindingIndex bindingIndex : Range(group->GetLayout()->GetBindingCount())) {
@@ -752,7 +752,7 @@
for (BindGroupIndex index : uavBindGroups) {
BindGroupBase* group = mBindGroups[index];
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets = mDynamicOffsets[index];
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets = GetDynamicOffsets(index);
const auto& indices = ToBackend(mPipelineLayout)->GetBindingTableIndexMap()[index];
// D3D11 uav slot allocated in reverse order.
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
index b0546f4..e5b2002 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
@@ -86,7 +86,7 @@
BindGroupBase* group,
BindingIndex bindingIndex,
const BufferBindingInfo& layout,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets);
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets);
template <typename T>
ResultOrError<ComPtr<T>> GetTextureD3DView(BindGroupBase* group, BindingIndex bindingIndex);
@@ -104,7 +104,7 @@
BindGroupBase* group,
BindingIndex bindingIndex,
const BufferBindingInfo& layout,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets);
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets);
ResultOrError<ComPtr<ID3D11ShaderResourceView>> GetTextureShaderResourceView(
BindGroupBase* group,
diff --git a/src/dawn/native/d3d12/CommandBufferD3D12.cpp b/src/dawn/native/d3d12/CommandBufferD3D12.cpp
index 9d36dfb..6ee2f7d 100644
--- a/src/dawn/native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn/native/d3d12/CommandBufferD3D12.cpp
@@ -469,7 +469,7 @@
for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
BindGroup* group = ToBackend(mBindGroups[index]);
ApplyBindGroup(commandList, ToBackend(mPipelineLayout), index, group,
- mDynamicOffsets[index]);
+ GetDynamicOffsets(index));
}
AfterApply();
@@ -571,7 +571,7 @@
const PipelineLayout* pipelineLayout,
BindGroupIndex index,
BindGroup* group,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
DAWN_ASSERT(dynamicOffsets.size() == group->GetLayout()->GetDynamicBufferCount());
// Usually, the application won't set the same offsets many times,
diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm
index 0ae4096..5084f3f 100644
--- a/src/dawn/native/metal/CommandBufferMTL.mm
+++ b/src/dawn/native/metal/CommandBufferMTL.mm
@@ -525,7 +525,7 @@
void Apply(Encoder encoder) {
BeforeApply();
for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
- ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), mDynamicOffsets[index],
+ ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]), GetDynamicOffsets(index),
ToBackend(mPipelineLayout));
}
AfterApply();
@@ -540,7 +540,7 @@
id<MTLComputeCommandEncoder> compute,
BindGroupIndex index,
BindGroup* group,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets,
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets,
PipelineLayout* pipelineLayout) {
// TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup
// so that we only have to do one setVertexBuffers and one setFragmentBuffers
diff --git a/src/dawn/native/opengl/CommandBufferGL.cpp b/src/dawn/native/opengl/CommandBufferGL.cpp
index 433a73e..e672e44 100644
--- a/src/dawn/native/opengl/CommandBufferGL.cpp
+++ b/src/dawn/native/opengl/CommandBufferGL.cpp
@@ -298,7 +298,7 @@
MaybeError Apply(const OpenGLFunctions& gl) {
BeforeApply();
for (BindGroupIndex index : mDirtyBindGroupsObjectChangedOrIsDynamic) {
- DAWN_TRY(ApplyBindGroup(gl, index, mBindGroups[index], mDynamicOffsets[index]));
+ DAWN_TRY(ApplyBindGroup(gl, index, mBindGroups[index], GetDynamicOffsets(index)));
}
DAWN_TRY(ApplyInternalUniforms(gl));
DAWN_TRY(ApplyInternalArrayLengthUniforms(gl));
@@ -326,7 +326,7 @@
MaybeError ApplyBindGroup(const OpenGLFunctions& gl,
BindGroupIndex groupIndex,
BindGroupBase* group,
- const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets) {
+ const ityp::span<BindingIndex, uint64_t>& dynamicOffsets) {
const auto& indices = ToBackend(mPipelineLayout)->GetBindingIndexInfo()[groupIndex];
for (BindingIndex bindingIndex : Range(group->GetLayout()->GetBindingCount())) {
diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp
index 1e8573d..81883e6 100644
--- a/src/dawn/native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn/native/vulkan/CommandBufferVk.cpp
@@ -175,9 +175,9 @@
BeforeApply();
for (BindGroupIndex dirtyIndex : mDirtyBindGroupsObjectChangedOrIsDynamic) {
VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle();
- uint32_t count = static_cast<uint32_t>(mDynamicOffsets[dirtyIndex].size());
- const uint32_t* dynamicOffset =
- count > 0 ? mDynamicOffsets[dirtyIndex].data() : nullptr;
+ const auto dynamicOffsetSpan = GetDynamicOffsets(dirtyIndex);
+ uint32_t count = static_cast<uint32_t>(dynamicOffsetSpan.size());
+ const uint32_t* dynamicOffset = count > 0 ? dynamicOffsetSpan.data() : nullptr;
device->fn.CmdBindDescriptorSets(recordingContext->commandBuffer, bindPoint, mVkLayout,
static_cast<uint32_t>(dirtyIndex), 1, &*set, count,
dynamicOffset);