Reland "D3D12: Stage BindGroups on CPU descriptor heaps."

This reverts commit c7f454c24191131eb29b8f33f0e9d5b1702fd289
and relands commit 2479860e4bb0ef5a12d269557a088bace53f0f30.

> D3D12: Stage BindGroups on CPU descriptor heaps.
>
> Instead of directly populating GPU heaps, pre-encoded
> BindGroups are staged on CPU heaps then copied over
> to the GPU. Non-shader visible allocators are stored
> on the BGL, which hands out fixed-size chunks to
> simplify memory managment. To enable memory re-use,
> CPU allocations are tied to the lifetime of BindGroup
> objects.

Reason for revert: We can reland this CL now that the CTS suppression merged.

Note: Adds validation to ensure binding size > 0.

Bug: dawn:155
Bug: dawn:375
Change-Id: I75b9773bbb7c70bcea803a7ad8b6480d21ea90f7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/18904
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index 4431f01..eeb4412 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -15,7 +15,9 @@
 #include "tests/DawnTest.h"
 
 #include "dawn_native/Toggles.h"
+#include "dawn_native/d3d12/BindGroupLayoutD3D12.h"
 #include "dawn_native/d3d12/DeviceD3D12.h"
+#include "dawn_native/d3d12/NonShaderVisibleDescriptorAllocatorD3D12.h"
 #include "dawn_native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
 #include "utils/ComboRenderPipelineDescriptor.h"
 #include "utils/WGPUHelpers.h"
@@ -93,6 +95,31 @@
     wgpu::ShaderModule mSimpleFSModule;
 };
 
+class DummyNonShaderVisibleDescriptorAllocator {
+  public:
+    DummyNonShaderVisibleDescriptorAllocator(Device* device,
+                                             uint32_t descriptorCount,
+                                             uint32_t allocationsPerHeap)
+        : mAllocator(device,
+                     descriptorCount,
+                     allocationsPerHeap * descriptorCount,
+                     D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER) {
+    }
+
+    CPUDescriptorHeapAllocation AllocateCPUDescriptors() {
+        dawn_native::ResultOrError<CPUDescriptorHeapAllocation> result =
+            mAllocator.AllocateCPUDescriptors();
+        return (result.IsSuccess()) ? result.AcquireSuccess() : CPUDescriptorHeapAllocation{};
+    }
+
+    void Deallocate(CPUDescriptorHeapAllocation& allocation) {
+        mAllocator.Deallocate(&allocation);
+    }
+
+  private:
+    NonShaderVisibleDescriptorAllocator mAllocator;
+};
+
 // Verify the shader visible heaps switch over within a single submit.
 TEST_P(D3D12DescriptorHeapTests, SwitchOverHeaps) {
     utils::ComboRenderPipelineDescriptor renderPipelineDescriptor(device);
@@ -688,6 +715,157 @@
     }
 }
 
+// Verify a single allocate/deallocate.
+// One non-shader visible heap will be created.
+TEST_P(D3D12DescriptorHeapTests, Single) {
+    constexpr uint32_t kDescriptorCount = 4;
+    constexpr uint32_t kAllocationsPerHeap = 3;
+    DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
+                                                       kAllocationsPerHeap);
+
+    CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+    EXPECT_EQ(allocation.GetHeapIndex(), 0u);
+    EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
+
+    allocator.Deallocate(allocation);
+    EXPECT_FALSE(allocation.IsValid());
+}
+
+// Verify allocating many times causes the pool to increase in size.
+// Creates |kNumOfHeaps| non-shader visible heaps.
+TEST_P(D3D12DescriptorHeapTests, Sequential) {
+    constexpr uint32_t kDescriptorCount = 4;
+    constexpr uint32_t kAllocationsPerHeap = 3;
+    DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
+                                                       kAllocationsPerHeap);
+
+    // Allocate |kNumOfHeaps| worth.
+    constexpr uint32_t kNumOfHeaps = 2;
+
+    std::set<uint32_t> allocatedHeaps;
+
+    std::vector<CPUDescriptorHeapAllocation> allocations;
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumOfHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        EXPECT_EQ(allocation.GetHeapIndex(), i / kAllocationsPerHeap);
+        EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
+        allocations.push_back(allocation);
+        allocatedHeaps.insert(allocation.GetHeapIndex());
+    }
+
+    EXPECT_EQ(allocatedHeaps.size(), kNumOfHeaps);
+
+    // Deallocate all.
+    for (CPUDescriptorHeapAllocation& allocation : allocations) {
+        allocator.Deallocate(allocation);
+        EXPECT_FALSE(allocation.IsValid());
+    }
+}
+
+// Verify that re-allocating a number of allocations < pool size, all heaps are reused.
+// Creates and reuses |kNumofHeaps| non-shader visible heaps.
+TEST_P(D3D12DescriptorHeapTests, ReuseFreedHeaps) {
+    constexpr uint32_t kDescriptorCount = 4;
+    constexpr uint32_t kAllocationsPerHeap = 25;
+    DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
+                                                       kAllocationsPerHeap);
+
+    constexpr uint32_t kNumofHeaps = 10;
+
+    std::list<CPUDescriptorHeapAllocation> allocations;
+    std::set<size_t> allocationPtrs;
+
+    // Allocate |kNumofHeaps| heaps worth.
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        allocations.push_back(allocation);
+        EXPECT_TRUE(allocationPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second);
+    }
+
+    // Deallocate all.
+    for (CPUDescriptorHeapAllocation& allocation : allocations) {
+        allocator.Deallocate(allocation);
+        EXPECT_FALSE(allocation.IsValid());
+    }
+
+    allocations.clear();
+
+    // Re-allocate all again.
+    std::set<size_t> reallocatedPtrs;
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        allocations.push_back(allocation);
+        EXPECT_TRUE(reallocatedPtrs.insert(allocation.OffsetFrom(0, 0).ptr).second);
+        EXPECT_TRUE(std::find(allocationPtrs.begin(), allocationPtrs.end(),
+                              allocation.OffsetFrom(0, 0).ptr) != allocationPtrs.end());
+    }
+
+    // Deallocate all again.
+    for (CPUDescriptorHeapAllocation& allocation : allocations) {
+        allocator.Deallocate(allocation);
+        EXPECT_FALSE(allocation.IsValid());
+    }
+}
+
+// Verify allocating then deallocating many times.
+TEST_P(D3D12DescriptorHeapTests, AllocateDeallocateMany) {
+    constexpr uint32_t kDescriptorCount = 4;
+    constexpr uint32_t kAllocationsPerHeap = 25;
+    DummyNonShaderVisibleDescriptorAllocator allocator(mD3DDevice, kDescriptorCount,
+                                                       kAllocationsPerHeap);
+
+    std::list<CPUDescriptorHeapAllocation> list3;
+    std::list<CPUDescriptorHeapAllocation> list5;
+    std::list<CPUDescriptorHeapAllocation> allocations;
+
+    constexpr uint32_t kNumofHeaps = 2;
+
+    // Allocate |kNumofHeaps| heaps worth.
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
+        if (i % 3 == 0) {
+            list3.push_back(allocation);
+        } else {
+            allocations.push_back(allocation);
+        }
+    }
+
+    // Deallocate every 3rd allocation.
+    for (auto it = list3.begin(); it != list3.end(); it = list3.erase(it)) {
+        allocator.Deallocate(*it);
+    }
+
+    // Allocate again.
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
+        if (i % 5 == 0) {
+            list5.push_back(allocation);
+        } else {
+            allocations.push_back(allocation);
+        }
+    }
+
+    // Deallocate every 5th allocation.
+    for (auto it = list5.begin(); it != list5.end(); it = list5.erase(it)) {
+        allocator.Deallocate(*it);
+    }
+
+    // Allocate again.
+    for (uint32_t i = 0; i < kAllocationsPerHeap * kNumofHeaps; i++) {
+        CPUDescriptorHeapAllocation allocation = allocator.AllocateCPUDescriptors();
+        EXPECT_NE(allocation.OffsetFrom(0, 0).ptr, 0u);
+        allocations.push_back(allocation);
+    }
+
+    // Deallocate remaining.
+    for (CPUDescriptorHeapAllocation& allocation : allocations) {
+        allocator.Deallocate(allocation);
+        EXPECT_FALSE(allocation.IsValid());
+    }
+}
+
 DAWN_INSTANTIATE_TEST(D3D12DescriptorHeapTests,
                       D3D12Backend(),
                       D3D12Backend({"use_d3d12_small_shader_visible_heap"}));