Copy from a zeroed-out buffer to clear in D3D12

When setting a buffer to zero now uses copies from a previously
allocated, zeroed out buffer to perform the clear rather than making new
allocations every time.

Bug: dawn:1160
Change-Id: I0c8e7e56b2afcb5961723e352d8bbdf276f4557c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/70760
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Brandon Jones <bajones@chromium.org>
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index 301b952..7233150 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -469,6 +469,8 @@
                                  "D3D12 map at clear buffer"));
             memset(mMappedData, clearValue, size);
             UnmapImpl();
+        } else if (clearValue == 0u) {
+            DAWN_TRY(device->ClearBufferToZero(commandContext, this, offset, size));
         } else {
             // TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage
             // includes STORAGE.
diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp
index a9bf033..ce17abf 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@@ -15,6 +15,7 @@
 #include "dawn_native/d3d12/DeviceD3D12.h"
 
 #include "common/GPUInfo.h"
+#include "dawn_native/DynamicUploader.h"
 #include "dawn_native/Instance.h"
 #include "dawn_native/d3d12/AdapterD3D12.h"
 #include "dawn_native/d3d12/BackendD3D12.h"
@@ -49,6 +50,9 @@
     static constexpr uint16_t kShaderVisibleDescriptorHeapSize = 1024;
     static constexpr uint8_t kAttachmentDescriptorHeapSize = 64;
 
+    // Value may change in the future to better accomodate large clears.
+    static constexpr uint64_t kZeroBufferSize = 1024 * 1024 * 4;  // 4 Mb
+
     static constexpr uint64_t kMaxDebugMessagesToPrint = 5;
 
     // static
@@ -166,6 +170,9 @@
         // The environment can only use DXC when it's available. Override the decision if it is not
         // applicable.
         DAWN_TRY(ApplyUseDxcToggle());
+
+        DAWN_TRY(CreateZeroBuffer());
+
         return {};
     }
 
@@ -251,6 +258,59 @@
         return &mPendingCommands;
     }
 
+    MaybeError Device::CreateZeroBuffer() {
+        BufferDescriptor zeroBufferDescriptor;
+        zeroBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+        zeroBufferDescriptor.size = kZeroBufferSize;
+        zeroBufferDescriptor.label = "ZeroBuffer_Internal";
+        DAWN_TRY_ASSIGN(mZeroBuffer, Buffer::Create(this, &zeroBufferDescriptor));
+
+        return {};
+    }
+
+    MaybeError Device::ClearBufferToZero(CommandRecordingContext* commandContext,
+                                         BufferBase* destination,
+                                         uint64_t offset,
+                                         uint64_t size) {
+        // TODO(crbug.com/dawn/852): It would be ideal to clear the buffer in CreateZeroBuffer, but
+        // the allocation of the staging buffer causes various end2end tests that monitor heap usage
+        // to fail if it's done during device creation. Perhaps ClearUnorderedAccessView*() can be
+        // used to avoid that.
+        if (!mZeroBuffer->IsDataInitialized()) {
+            DynamicUploader* uploader = GetDynamicUploader();
+            UploadHandle uploadHandle;
+            DAWN_TRY_ASSIGN(uploadHandle,
+                            uploader->Allocate(kZeroBufferSize, GetPendingCommandSerial(),
+                                               kCopyBufferToBufferOffsetAlignment));
+
+            memset(uploadHandle.mappedBuffer, 0u, kZeroBufferSize);
+
+            CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
+                                        uploadHandle.startOffset, mZeroBuffer.Get(), 0,
+                                        kZeroBufferSize);
+
+            mZeroBuffer->SetIsDataInitialized();
+        }
+
+        Buffer* dstBuffer = ToBackend(destination);
+
+        // Necessary to ensure residency of the zero buffer.
+        mZeroBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopySrc);
+        dstBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst);
+
+        while (size > 0) {
+            uint64_t copySize = std::min(kZeroBufferSize, size);
+            commandContext->GetCommandList()->CopyBufferRegion(
+                dstBuffer->GetD3D12Resource(), offset, mZeroBuffer->GetD3D12Resource(), 0,
+                copySize);
+
+            offset += copySize;
+            size -= copySize;
+        }
+
+        return {};
+    }
+
     MaybeError Device::TickImpl() {
         // Perform cleanup operations to free unused objects
         ExecutionSerial completedSerial = GetCompletedCommandSerial();
diff --git a/src/dawn_native/d3d12/DeviceD3D12.h b/src/dawn_native/d3d12/DeviceD3D12.h
index 42810e3..e6b7234 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.h
+++ b/src/dawn_native/d3d12/DeviceD3D12.h
@@ -72,6 +72,11 @@
 
         ResultOrError<CommandRecordingContext*> GetPendingCommandContext();
 
+        MaybeError ClearBufferToZero(CommandRecordingContext* commandContext,
+                                     BufferBase* destination,
+                                     uint64_t destinationOffset,
+                                     uint64_t size);
+
         const D3D12DeviceInfo& GetDeviceInfo() const;
 
         MaybeError NextSerial();
@@ -191,6 +196,8 @@
 
         MaybeError ApplyUseDxcToggle();
 
+        MaybeError CreateZeroBuffer();
+
         ComPtr<ID3D12Fence> mFence;
         HANDLE mFenceEvent = nullptr;
         ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
@@ -246,6 +253,10 @@
         // release is called.
         std::unique_ptr<SamplerHeapCache> mSamplerHeapCache;
 
+        // A buffer filled with zeros that is used to copy into other buffers when they need to be
+        // cleared.
+        Ref<Buffer> mZeroBuffer;
+
         // The number of nanoseconds required for a timestamp query to be incremented by 1
         float mTimestampPeriod = 1.0f;
     };
diff --git a/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp b/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
index abf722f..ffc1e1b 100644
--- a/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
+++ b/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
@@ -351,7 +351,6 @@
     // Places an artifical cap on Dawn's budget so we can test in a predictable manner. If used,
     // this function must be called before any resources have been created.
     void ResidencyManager::RestrictBudgetForTesting(uint64_t artificialBudgetCap) {
-        ASSERT(mVideoMemoryInfo.local.lruCache.empty());
         ASSERT(mVideoMemoryInfo.nonLocal.lruCache.empty());
         ASSERT(!mRestrictBudgetForTesting);