D3D12: Pool-allocate shader-visible descriptor heaps.

Rather than destory GPU descriptor heaps upon being switched out,
heaps are stored in a list where they can be re-used once the GPU
is no longer using them.

BUG=dawn:155

Change-Id: I2074573e354f114c45afe9895e8515980d325852
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/16282
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Bryan Bernhart <bryan.bernhart@intel.com>
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
index 6843d21..76e2db2 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.cpp
@@ -55,22 +55,21 @@
 
     MaybeError ShaderVisibleDescriptorAllocator::Initialize() {
         ASSERT(mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap.Get() == nullptr);
+        mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heapType =
+            D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+
         ASSERT(mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap.Get() == nullptr);
+        mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heapType =
+            D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+
         DAWN_TRY(AllocateAndSwitchShaderVisibleHeaps());
+
         return {};
     }
 
     MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeaps() {
-        // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
-        // should the developer not allocate any bindings for the heap type.
-        // Consider dynamically re-sizing GPU heaps.
-        DAWN_TRY(
-            AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
-                            GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV),
-                            GetD3D12HeapFlags(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)));
-        DAWN_TRY(AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
-                                 GetD3D12ShaderVisibleHeapSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER),
-                                 GetD3D12HeapFlags(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)));
+        DAWN_TRY(AllocateGPUHeap(&mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV]));
+        DAWN_TRY(AllocateGPUHeap(&mShaderVisibleBuffers[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER]));
 
         // Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
         // serial. When a bindgroup attempts to re-populate, it will compare with its recorded
@@ -122,28 +121,45 @@
 
     // Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
     MaybeError ShaderVisibleDescriptorAllocator::AllocateGPUHeap(
-        D3D12_DESCRIPTOR_HEAP_TYPE heapType,
-        uint32_t descriptorCount,
-        D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags) {
-        ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
-               heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
-        if (mShaderVisibleBuffers[heapType].heap != nullptr) {
-            mDevice->ReferenceUntilUnused(std::move(mShaderVisibleBuffers[heapType].heap));
+        ShaderVisibleBuffer* shaderVisibleBuffer) {
+        ComPtr<ID3D12DescriptorHeap> heap;
+        // Return the switched out heap to the pool and retrieve the oldest heap that is no longer
+        // used by GPU. This maintains a heap buffer to avoid frequently re-creating heaps for heavy
+        // users.
+        // TODO(dawn:256): Consider periodically triming to avoid OOM.
+        if (shaderVisibleBuffer->heap != nullptr) {
+            shaderVisibleBuffer->pool.push_back(
+                {mDevice->GetPendingCommandSerial(), std::move(shaderVisibleBuffer->heap)});
         }
 
-        D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
-        heapDescriptor.Type = heapType;
-        heapDescriptor.NumDescriptors = descriptorCount;
-        heapDescriptor.Flags = heapFlags;
-        heapDescriptor.NodeMask = 0;
-        ComPtr<ID3D12DescriptorHeap> heap;
-        DAWN_TRY(CheckOutOfMemoryHRESULT(
-            mDevice->GetD3D12Device()->CreateDescriptorHeap(&heapDescriptor, IID_PPV_ARGS(&heap)),
-            "ID3D12Device::CreateDescriptorHeap"));
+        // Recycle existing heap if possible.
+        if (!shaderVisibleBuffer->pool.empty() &&
+            shaderVisibleBuffer->pool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
+            heap = std::move(shaderVisibleBuffer->pool.front().heap);
+            shaderVisibleBuffer->pool.pop_front();
+        }
+
+        const D3D12_DESCRIPTOR_HEAP_TYPE heapType = shaderVisibleBuffer->heapType;
+
+        // TODO(bryan.bernhart@intel.com): Allocating to max heap size wastes memory
+        // should the developer not allocate any bindings for the heap type.
+        // Consider dynamically re-sizing GPU heaps.
+        const uint32_t descriptorCount = GetD3D12ShaderVisibleHeapSize(heapType);
+
+        if (heap == nullptr) {
+            D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
+            heapDescriptor.Type = heapType;
+            heapDescriptor.NumDescriptors = descriptorCount;
+            heapDescriptor.Flags = GetD3D12HeapFlags(heapType);
+            heapDescriptor.NodeMask = 0;
+            DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
+                                                 &heapDescriptor, IID_PPV_ARGS(&heap)),
+                                             "ID3D12Device::CreateDescriptorHeap"));
+        }
 
         // Create a FIFO buffer from the recently created heap.
-        mShaderVisibleBuffers[heapType].heap = std::move(heap);
-        mShaderVisibleBuffers[heapType].allocator = RingBufferAllocator(descriptorCount);
+        shaderVisibleBuffer->heap = std::move(heap);
+        shaderVisibleBuffer->allocator = RingBufferAllocator(descriptorCount);
         return {};
     }
 
@@ -158,6 +174,20 @@
         return mShaderVisibleBuffers[heapType].allocator.GetSize();
     }
 
+    ComPtr<ID3D12DescriptorHeap> ShaderVisibleDescriptorAllocator::GetShaderVisibleHeapForTesting(
+        D3D12_DESCRIPTOR_HEAP_TYPE heapType) const {
+        ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
+               heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+        return mShaderVisibleBuffers[heapType].heap;
+    }
+
+    uint64_t ShaderVisibleDescriptorAllocator::GetShaderVisiblePoolSizeForTesting(
+        D3D12_DESCRIPTOR_HEAP_TYPE heapType) const {
+        ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
+               heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+        return mShaderVisibleBuffers[heapType].pool.size();
+    }
+
     bool ShaderVisibleDescriptorAllocator::IsAllocationStillValid(Serial lastUsageSerial,
                                                                   Serial heapSerial) const {
         // Consider valid if allocated for the pending submit and the shader visible heaps
diff --git a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
index 5c6241e..66f63f5 100644
--- a/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
+++ b/src/dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h
@@ -20,6 +20,7 @@
 #include "dawn_native/d3d12/DescriptorHeapAllocationD3D12.h"
 
 #include <array>
+#include <list>
 
 namespace dawn_native { namespace d3d12 {
 
@@ -44,19 +45,27 @@
         MaybeError AllocateAndSwitchShaderVisibleHeaps();
 
         uint64_t GetShaderVisibleHeapSizeForTesting(D3D12_DESCRIPTOR_HEAP_TYPE heapType) const;
+        ComPtr<ID3D12DescriptorHeap> GetShaderVisibleHeapForTesting(
+            D3D12_DESCRIPTOR_HEAP_TYPE heapType) const;
+        uint64_t GetShaderVisiblePoolSizeForTesting(D3D12_DESCRIPTOR_HEAP_TYPE heapType) const;
 
         bool IsAllocationStillValid(Serial lastUsageSerial, Serial heapSerial) const;
 
       private:
-        MaybeError AllocateGPUHeap(D3D12_DESCRIPTOR_HEAP_TYPE heapType,
-                                   uint32_t descriptorCount,
-                                   D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags);
+        struct SerialDescriptorHeap {
+            Serial heapSerial;
+            ComPtr<ID3D12DescriptorHeap> heap;
+        };
 
         struct ShaderVisibleBuffer {
             ComPtr<ID3D12DescriptorHeap> heap;
             RingBufferAllocator allocator;
+            std::list<SerialDescriptorHeap> pool;
+            D3D12_DESCRIPTOR_HEAP_TYPE heapType;
         };
 
+        MaybeError AllocateGPUHeap(ShaderVisibleBuffer* shaderVisibleBuffer);
+
         Device* mDevice;
 
         // The serial value of 0 means the shader-visible heaps have not been allocated.
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index 283f4a3..7dc0a91 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -21,13 +21,21 @@
 
 constexpr uint32_t kRTSize = 4;
 
+// Pooling tests are required to advance the GPU completed serial to reuse heaps.
+// This requires Tick() to be called at-least |kFrameDepth| times. This constant
+// should be updated if the internals of Tick() change.
+constexpr uint32_t kFrameDepth = 2;
+
 using namespace dawn_native::d3d12;
 
 class D3D12DescriptorHeapTests : public DawnTest {
-  private:
+  protected:
     void TestSetUp() override {
         DAWN_SKIP_TEST_IF(UsesWire());
+        mD3DDevice = reinterpret_cast<Device*>(device.Get());
     }
+
+    Device* mD3DDevice = nullptr;
 };
 
 // Verify the shader visible heaps switch over within a single submit.
@@ -85,4 +93,109 @@
     EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + 1);
 }
 
+// Verify shader-visible heaps can be recycled for multiple submits.
+TEST_P(D3D12DescriptorHeapTests, PoolHeapsInMultipleSubmits) {
+    constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+
+    ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator();
+
+    std::list<ComPtr<ID3D12DescriptorHeap>> heaps = {
+        allocator->GetShaderVisibleHeapForTesting(heapType)};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u);
+
+    // Allocate + Tick() up to |kFrameDepth| and ensure heaps are always unique.
+    for (uint32_t i = 0; i < kFrameDepth; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeapForTesting(heapType);
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.push_back(heap);
+        mD3DDevice->Tick();
+    }
+
+    // Repeat up to |kFrameDepth| again but ensure heaps are the same in the expected order
+    // (oldest heaps are recycled first). The "+ 1" is so we also include the very first heap in the
+    // check.
+    for (uint32_t i = 0; i < kFrameDepth + 1; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeapForTesting(heapType);
+        EXPECT_TRUE(heaps.front() == heap);
+        heaps.pop_front();
+        mD3DDevice->Tick();
+    }
+
+    EXPECT_TRUE(heaps.empty());
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kFrameDepth);
+}
+
+// Verify shader-visible heaps do not recycle in a pending submit.
+TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingSubmit) {
+    constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+    constexpr uint32_t kNumOfSwitches = 5;
+
+    ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator();
+
+    const Serial heapSerial = allocator->GetShaderVisibleHeapsSerial();
+
+    std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {
+        allocator->GetShaderVisibleHeapForTesting(heapType)};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u);
+
+    // Switch-over |kNumOfSwitches| and ensure heaps are always unique.
+    for (uint32_t i = 0; i < kNumOfSwitches; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeapForTesting(heapType);
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.insert(heap);
+    }
+
+    // After |kNumOfSwitches|, no heaps are recycled.
+    EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches);
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches);
+}
+
+// Verify switching shader-visible heaps do not recycle in a pending submit but do so
+// once no longer pending.
+TEST_P(D3D12DescriptorHeapTests, PoolHeapsInPendingAndMultipleSubmits) {
+    constexpr D3D12_DESCRIPTOR_HEAP_TYPE heapType = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+    constexpr uint32_t kNumOfSwitches = 5;
+
+    ShaderVisibleDescriptorAllocator* allocator = mD3DDevice->GetShaderVisibleDescriptorAllocator();
+    const Serial heapSerial = allocator->GetShaderVisibleHeapsSerial();
+
+    std::set<ComPtr<ID3D12DescriptorHeap>> heaps = {
+        allocator->GetShaderVisibleHeapForTesting(heapType)};
+
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), 0u);
+
+    // Switch-over |kNumOfSwitches| to create a pool of unique heaps.
+    for (uint32_t i = 0; i < kNumOfSwitches; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeapForTesting(heapType);
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
+        heaps.insert(heap);
+    }
+
+    EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches);
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches);
+
+    // Ensure switched-over heaps can be recycled by advancing the GPU by at-least |kFrameDepth|.
+    for (uint32_t i = 0; i < kFrameDepth; i++) {
+        mD3DDevice->Tick();
+    }
+
+    // Switch-over |kNumOfSwitches| again reusing the same heaps.
+    for (uint32_t i = 0; i < kNumOfSwitches; i++) {
+        EXPECT_TRUE(allocator->AllocateAndSwitchShaderVisibleHeaps().IsSuccess());
+        ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeapForTesting(heapType);
+        EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) != heaps.end());
+        heaps.erase(heap);
+    }
+
+    // After switching-over |kNumOfSwitches| x 2, ensure no additional heaps exist.
+    EXPECT_EQ(allocator->GetShaderVisibleHeapsSerial(), heapSerial + kNumOfSwitches * 2);
+    EXPECT_EQ(allocator->GetShaderVisiblePoolSizeForTesting(heapType), kNumOfSwitches);
+}
+
 DAWN_INSTANTIATE_TEST(D3D12DescriptorHeapTests, D3D12Backend());