D3D12: Dynamic shader-visible heap allocation.

Allocates shader-visible descriptor heaps at a much
smaller size then pool-allocates them upon reaching
the max size. This strategy avoids always wasting memory
for lighter users while still maximizing performance for
heavy users.

BUG=dawn:155

Change-Id: I0519235c901d0283b98ee824eeb0cda6de70b210
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/25620
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
index ca30889..ba1b493 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
@@ -20,10 +20,32 @@
 
 namespace dawn_native { namespace d3d12 {
 
+    // Limits the min/max heap size to always be some known value for testing.
     // Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete.
     static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {1024, 512};
 
-    uint32_t GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
+    uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
+                                              bool useSmallSize) {
+        if (useSmallSize) {
+            return kShaderVisibleSmallHeapSizes[heapType];
+        }
+
+        // Minimum heap size must be large enough to satisfy the largest descriptor allocation
+        // request and to amortize the cost of sub-allocation. But small enough to avoid wasting
+        // memory should only a tiny fraction ever be used.
+        // TODO(dawn:155): Figure out these values.
+        switch (heapType) {
+            case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
+                return 4096;
+            case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
+                return 256;
+            default:
+                UNREACHABLE();
+        }
+    }
+
+    uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
+                                              bool useSmallSize) {
         if (useSmallSize) {
             return kShaderVisibleSmallHeapSizes[heapType];
         }
@@ -62,7 +84,10 @@
         D3D12_DESCRIPTOR_HEAP_TYPE heapType)
         : mHeapType(heapType),
           mDevice(device),
-          mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)) {
+          mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
+          mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize(
+              heapType,
+              mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) {
         ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
                heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
     }
@@ -107,60 +132,74 @@
         mAllocator.Deallocate(completedSerial);
     }
 
+    ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>>
+    ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const {
+        // The size in bytes of a descriptor heap is best calculated by the increment size
+        // multiplied by the number of descriptors. In practice, this is only an estimate and
+        // the actual size may vary depending on the driver.
+        const uint64_t kSize = mSizeIncrement * descriptorCount;
+
+        DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
+
+        ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
+        D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
+        heapDescriptor.Type = mHeapType;
+        heapDescriptor.NumDescriptors = descriptorCount;
+        heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
+        heapDescriptor.NodeMask = 0;
+        DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
+                                             &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
+                                         "ID3D12Device::CreateDescriptorHeap"));
+
+        std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap =
+            std::make_unique<ShaderVisibleDescriptorHeap>(std::move(d3d12DescriptorHeap), kSize);
+
+        // We must track the allocation in the LRU when it is created, otherwise the residency
+        // manager will see the allocation as non-resident in the later call to LockAllocation.
+        mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
+
+        return std::move(descriptorHeap);
+    }
+
     // Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
     MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() {
         std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap;
-        // Return the switched out heap to the pool and retrieve the oldest heap that is no longer
-        // used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy
-        // users.
-        // TODO(dawn:256): Consider periodically triming to avoid OOM.
+        // Dynamically allocate using a two-phase allocation strategy.
+        // The first phase increasingly grows a small heap in binary sizes for light users while the
+        // second phase pool-allocates largest sized heaps for heavy users.
         if (mHeap != nullptr) {
             mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get());
-            mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
-        }
 
-        // Recycle existing heap if possible.
-        if (!mPool.empty() && mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
-            descriptorHeap = std::move(mPool.front().heap);
-            mPool.pop_front();
+            const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize(
+                mHeapType,
+                mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+            if (mDescriptorCount < maxDescriptorCount) {
+                // Phase #1. Grow the heaps in powers-of-two.
+                mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap());
+                mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount);
+            } else {
+                // Phase #2. Pool-allocate heaps.
+                // Return the switched out heap to the pool and retrieve the oldest heap that is no
+                // longer used by GPU. This maintains a heap buffer to avoid frequently re-creating
+                // heaps for heavy users.
+                // TODO(dawn:256): Consider periodically triming to avoid OOM.
+                mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
+                if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
+                    descriptorHeap = std::move(mPool.front().heap);
+                    mPool.pop_front();
+                }
+            }
         }
 
-        // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
-        // should the developer not allocate any bindings for the heap type.
-        // Consider dynamically re-sizing GPU heaps.
-        const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(
-            mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
-
         if (descriptorHeap == nullptr) {
-            // The size in bytes of a descriptor heap is best calculated by the increment size
-            // multiplied by the number of descriptors. In practice, this is only an estimate and
-            // the actual size may vary depending on the driver.
-            const uint64_t kSize = mSizeIncrement * descriptorCount;
-
-            DAWN_TRY(
-                mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
-
-            ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
-            D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
-            heapDescriptor.Type = mHeapType;
-            heapDescriptor.NumDescriptors = descriptorCount;
-            heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
-            heapDescriptor.NodeMask = 0;
-            DAWN_TRY(
-                CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
-                                            &heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
-                                        "ID3D12Device::CreateDescriptorHeap"));
-            descriptorHeap = std::make_unique<ShaderVisibleDescriptorHeap>(
-                std::move(d3d12DescriptorHeap), kSize);
-            // We must track the allocation in the LRU when it is created, otherwise the residency
-            // manager will see the allocation as non-resident in the later call to LockAllocation.
-            mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
+            DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount));
         }
 
         DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get()));
+
         // Create a FIFO buffer from the recently created heap.
         mHeap = std::move(descriptorHeap);
-        mAllocator = RingBufferAllocator(descriptorCount);
+        mAllocator = RingBufferAllocator(mDescriptorCount);
 
         // Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
         // serial. When a bindgroup attempts to re-populate, it will compare with its recorded
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
index 564eb95..d93e57a 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
@@ -78,6 +78,9 @@
             std::unique_ptr<ShaderVisibleDescriptorHeap> heap;
         };
 
+        ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>> AllocateHeap(
+            uint32_t descriptorCount) const;
+
         std::unique_ptr<ShaderVisibleDescriptorHeap> mHeap;
         RingBufferAllocator mAllocator;
         std::list<SerialDescriptorHeap> mPool;
@@ -91,6 +94,10 @@
         Serial mHeapSerial = 0;
 
         uint32_t mSizeIncrement;
+
+        // The descriptor count is the current size of the heap in number of descriptors.
+        // This is stored on the allocator to avoid extra conversions.
+        uint32_t mDescriptorCount = 0;
     };
 }}  // namespace dawn_native::d3d12
 
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index 6750586..43a1179 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -220,6 +220,10 @@
 
 // Verify shader-visible heaps can be recycled for multiple submits.
 TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
+    // Use small heaps to count only pool-allocated switches.
+    DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+        dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
     ShaderVisibleDescriptorAllocator* allocator =
         mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
 
@@ -253,6 +257,10 @@
 
 // Verify shader-visible heaps do not recycle in a pending submit.
 TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
+    // Use small heaps to count only pool-allocated switches.
+    DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+        dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
     constexpr uint32_t kNumOfSwitches = 5;
 
     ShaderVisibleDescriptorAllocator* allocator =
@@ -280,6 +288,10 @@
 // Verify switching shader-visible heaps do not recycle in a pending submit but do so
 // once no longer pending.
 TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
+    // Use small heaps to count only pool-allocated switches.
+    DAWN_SKIP_TEST_IF(!mD3DDevice->IsToggleEnabled(
+        dawn_native::Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
+
     constexpr uint32_t kNumOfSwitches = 5;
 
     ShaderVisibleDescriptorAllocator* allocator =
@@ -319,6 +331,91 @@
     EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfSwitches);
 }
 
+// Verify shader-visible heaps do not recycle in multiple submits.
+TEST_P(D3D12DescriptorHeapTests, GrowHeapsInMultipleSubmits) {
+    ShaderVisibleDescriptorAllocator* allocator =
+        mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+    const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
+
+    std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+    // Growth: Allocate + Tick() and ensure heaps are always unique.
+    while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.insert(heap);
+        mD3DDevice->Tick();
+    }
+
+    // Verify the number of switches equals the size of heaps allocated (minus the initial).
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
+    EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
+}
+
+// Verify shader-visible heaps do not recycle in a pending submit.
+TEST_P(D3D12DescriptorHeapTests, GrowHeapsInPendingSubmit) {
+    ShaderVisibleDescriptorAllocator* allocator =
+        mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+    const Serial heapSerial = allocator->GetShaderVisibleHeapSerialForTesting();
+
+    std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+    // Growth: Allocate new heaps.
+    while (allocator->GetShaderVisiblePoolSizeForTesting() == 0) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.insert(heap);
+    }
+
+    // Verify the number of switches equals the size of heaps allocated (minus the initial).
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 1u);
+    EXPECT_EQ(allocator->GetShaderVisibleHeapSerialForTesting(), heapSerial + heaps.size() - 1);
+}
+
+// Verify switching shader-visible heaps do not recycle in a pending submit but do so
+// once no longer pending.
+// Switches over many times until |kNumOfPooledHeaps| heaps are pool-allocated.
+TEST_P(D3D12DescriptorHeapTests, GrowAndPoolHeapsInPendingAndMultipleSubmits) {
+    ShaderVisibleDescriptorAllocator* allocator =
+        mD3DDevice->GetSamplerShaderVisibleDescriptorAllocator();
+
+    std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {allocator->GetShaderVisibleHeap()};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), 0u);
+
+    uint32_t kNumOfPooledHeaps = 5;
+    while (allocator->GetShaderVisiblePoolSizeForTesting() < kNumOfPooledHeaps) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.insert(heap);
+    }
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
+
+    // Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|.
+    for (uint32_t i = 0; i < kFrameDepth; i++) {
+        mD3DDevice->Tick();
+    }
+
+    // Switch-over the pool-allocated heaps.
+    for (uint32_t i = 0; i < kNumOfPooledHeaps; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeap().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
+        EXPECT_FALSE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+    }
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(), kNumOfPooledHeaps);
+}
+
 // Verify encoding multiple heaps worth of bindgroups.
 // Shader-visible heaps will switch out |kNumOfHeaps| times.
 TEST_P(D3D12DescriptorHeapTests, EncodeManyUBO) {