D3D12: Dynamic shader-visible heap allocation.
Allocates shader-visible descriptor heaps at a much
smaller size then pool-allocates them upon reaching
the max size. This strategy avoids always wasting memory
for lighter users while still maximizing performance for
heavy users.
BUG=dawn:155
Change-Id: I0519235c901d0283b98ee824eeb0cda6de70b210
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25620
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
index ca30889..ba1b493 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
@@ -20,10 +20,32 @@
namespace dawn_native { namespace d3d12 {
+ // Limits the min/max heap size to always be some known value for testing.
// Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete.
static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {1024, 512};
- uint32_t GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
+ uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
+ bool useSmallSize) {
+ if (useSmallSize) {
+ return kShaderVisibleSmallHeapSizes[heapType];
+ }
+
+ // Minimum heap size must be large enough to satisfy the largest descriptor allocation
+ // request and to amortize the cost of sub-allocation. But small enough to avoid wasting
+ // memory should only a tiny fraction ever be used.
+ // TODO(dawn:155): Figure out these values.
+ switch (heapType) {
+ case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
+ return 4096;
+ case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
+ return 256;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
+ bool useSmallSize) {
if (useSmallSize) {
return kShaderVisibleSmallHeapSizes[heapType];
}
@@ -62,7 +84,10 @@
D3D12_DESCRIPTOR_HEAP_TYPE heapType)
: mHeapType(heapType),
mDevice(device),
- mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)) {
+ mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
+ mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize(
+ heapType,
+ mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) {
ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
}
@@ -107,60 +132,74 @@
mAllocator.Deallocate(completedSerial);
}
+ ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>>
+ ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const {
+ // The size in bytes of a descriptor heap is best calculated by the increment size
+ // multiplied by the number of descriptors. In practice, this is only an estimate and
+ // the actual size may vary depending on the driver.
+ const uint64_t kSize = mSizeIncrement * descriptorCount;
+
+ DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
+
+ ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
+ D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
+ heapDescriptor.Type = mHeapType;
+ heapDescriptor.NumDescriptors = descriptorCount;
+ heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
+ heapDescriptor.NodeMask = 0;
+ DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
+ &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
+ "ID3D12Device::CreateDescriptorHeap"));
+
+ std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap =
+ std::make_unique<ShaderVisibleDescriptorHeap>(std::move(d3d12DescriptorHeap), kSize);
+
+ // We must track the allocation in the LRU when it is created, otherwise the residency
+ // manager will see the allocation as non-resident in the later call to LockAllocation.
+ mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
+
+ return std::move(descriptorHeap);
+ }
+
// Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() {
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap;
- // Return the switched out heap to the pool and retrieve the oldest heap that is no longer
- // used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy
- // users.
- // TODO(dawn:256): Consider periodically triming to avoid OOM.
+ // Dynamically allocate using a two-phase allocation strategy.
+ // The first phase increasingly grows a small heap in binary sizes for light users while the
+ // second phase pool-allocates largest sized heaps for heavy users.
if (mHeap != nullptr) {
mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get());
- mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
- }
- // Recycle existing heap if possible.
- if (!mPool.empty() && mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
- descriptorHeap = std::move(mPool.front().heap);
- mPool.pop_front();
+ const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize(
+ mHeapType,
+ mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+ if (mDescriptorCount < maxDescriptorCount) {
+ // Phase #1. Grow the heaps in powers-of-two.
+ mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap());
+ mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount);
+ } else {
+ // Phase #2. Pool-allocate heaps.
+ // Return the switched out heap to the pool and retrieve the oldest heap that is no
+ // longer used by GPU. This maintains a heap buffer to avoid frequently re-creating
+ // heaps for heavy users.
+ // TODO(dawn:256): Consider periodically triming to avoid OOM.
+ mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
+ if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
+ descriptorHeap = std::move(mPool.front().heap);
+ mPool.pop_front();
+ }
+ }
}
- // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
- // should the developer not allocate any bindings for the heap type.
- // Consider dynamically re-sizing GPU heaps.
- const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(
- mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
-
if (descriptorHeap == nullptr) {
- // The size in bytes of a descriptor heap is best calculated by the increment size
- // multiplied by the number of descriptors. In practice, this is only an estimate and
- // the actual size may vary depending on the driver.
- const uint64_t kSize = mSizeIncrement * descriptorCount;
-
- DAWN_TRY(
- mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
-
- ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
- D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
- heapDescriptor.Type = mHeapType;
- heapDescriptor.NumDescriptors = descriptorCount;
- heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
- heapDescriptor.NodeMask = 0;
- DAWN_TRY(
- CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
- &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
- "ID3D12Device::CreateDescriptorHeap"));
- descriptorHeap = std::make_unique<ShaderVisibleDescriptorHeap>(
- std::move(d3d12DescriptorHeap), kSize);
- // We must track the allocation in the LRU when it is created, otherwise the residency
- // manager will see the allocation as non-resident in the later call to LockAllocation.
- mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
+ DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount));
}
DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get()));
+
// Create a FIFO buffer from the recently created heap.
mHeap = std::move(descriptorHeap);
- mAllocator = RingBufferAllocator(descriptorCount);
+ mAllocator = RingBufferAllocator(mDescriptorCount);
// Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
// serial. When a bindgroup attempts to re-populate, it will compare with its recorded
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
index 564eb95..d93e57a 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
@@ -78,6 +78,9 @@
std::unique_ptr<ShaderVisibleDescriptorHeap> heap;
};
+ ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>> AllocateHeap(
+ uint32_t descriptorCount) const;
+
std::unique_ptr<ShaderVisibleDescriptorHeap> mHeap;
RingBufferAllocator mAllocator;
std::list<SerialDescriptorHeap> mPool;
@@ -91,6 +94,10 @@
Serial mHeapSerial = 0;
uint32_t mSizeIncrement;
+
+ // The descriptor count is the current size of the heap in number of descriptors.
+ // This is stored on the allocator to avoid extra conversions.
+ uint32_t mDescriptorCount = 0;
};
}} // namespace dawn_native::d3d12
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index 6750586..43a1179 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -220,6 +220,10 @@
// Verify shader-visible heaps can be recycled for multiple submits.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
+ // Use small heaps to count only pool-allocated switches.
+ DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+ dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
ShaderVisibleDescriptorAllocator* allocator =
mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
@@ -253,6 +257,10 @@
// Verify shader-visible heaps do not recycle in a pending submit.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
+ // Use small heaps to count only pool-allocated switches.
+ DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+ dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
constexpr uint32_t kNumOfSwitches = 5;
ShaderVisibleDescriptorAllocator* allocator =
@@ -280,6 +288,10 @@
// Verify switching shader-visible heaps do not recycle in a pending submit but do so
// once no longer pending.
TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
+ // Use small heaps to count only pool-allocated switches.
+ DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+ dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
constexpr uint32_t kNumOfSwitches = 5;
ShaderVisibleDescriptorAllocator* allocator =
@@ -319,6 +331,91 @@
EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfSwitches);
}
+// Verify shader-visible heaps do not recycle in multiple submits.
+TEST_P(D3D12DescriptorHeapTests, GrowHeapsInMultipleSubmits) {
+ ShaderVisibleDescriptorAllocator* allocator =
+ mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+ const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
+
+ std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+ // Growth: Allocate + Tick() and ensure heaps are always unique.
+ while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
+ EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+ ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+ EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+ heaps.insert(heap);
+ mD3DDevice->Tick();
+ }
+
+ // Verify the number of switches equals the size of heaps allocated (minus the initial).
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
+ EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
+}
+
+// Verify shader-visible heaps do not recycle in a pending submit.
+TEST_P(D3D12DescriptorHeapTests, GrowHeapsInPendingSubmit) {
+ ShaderVisibleDescriptorAllocator* allocator =
+ mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+ const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
+
+ std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+ // Growth: Allocate new heaps.
+ while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
+ EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+ ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+ EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+ heaps.insert(heap);
+ }
+
+ // Verify the number of switches equals the size of heaps allocated (minus the initial).
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
+ EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
+}
+
+// Verify switching shader-visible heaps do not recycle in a pending submit but do so
+// once no longer pending.
+// Switches over many times until |kNumOfPooledHeaps| heaps are pool-allocated.
+TEST_P(D3D12DescriptorHeapTests, GrowAndPoolHeapsInPendingAndMultipleSubmits) {
+ ShaderVisibleDescriptorAllocator* allocator =
+ mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+ std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+ uint32_t kNumOfPooledHeaps = 5;
+ while (allocator->GetShaderVisiblePoolSizeForTesting() < kNumOfPooledHeaps) {
+ EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+ ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+ EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+ heaps.insert(heap);
+ }
+
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
+
+ // Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|.
+ for (uint32_t i = 0; i < kFrameDepth; i++) {
+ mD3DDevice->Tick();
+ }
+
+ // Switch-over the pool-allocated heaps.
+ for (uint32_t i = 0; i < kNumOfPooledHeaps; i++) {
+ EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+ ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+ EXPECT_FALSE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+ }
+
+ EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
+}
+
// Verify encoding multiple heaps worth of bindgroups.
// Shader-visible heaps will switch out |kNumOfHeaps| times.
TEST_P(D3D12DescriptorHeapTests, EncodeManyUBO) {