Copy from a zeroed-out buffer to clear in D3D12
When setting a buffer to zero now uses copies from a previously
allocated, zeroed out buffer to perform the clear rather than making new
allocations every time.
Bug: dawn:1160
Change-Id: I0c8e7e56b2afcb5961723e352d8bbdf276f4557c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/70760
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Brandon Jones <bajones@chromium.org>
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index 301b952..7233150 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -469,6 +469,8 @@
"D3D12 map at clear buffer"));
memset(mMappedData, clearValue, size);
UnmapImpl();
+ } else if (clearValue == 0u) {
+ DAWN_TRY(device->ClearBufferToZero(commandContext, this, offset, size));
} else {
// TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage
// includes STORAGE.
diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp
index a9bf033..ce17abf 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@@ -15,6 +15,7 @@
#include "dawn_native/d3d12/DeviceD3D12.h"
#include "common/GPUInfo.h"
+#include "dawn_native/DynamicUploader.h"
#include "dawn_native/Instance.h"
#include "dawn_native/d3d12/AdapterD3D12.h"
#include "dawn_native/d3d12/BackendD3D12.h"
@@ -49,6 +50,9 @@
static constexpr uint16_t kShaderVisibleDescriptorHeapSize = 1024;
static constexpr uint8_t kAttachmentDescriptorHeapSize = 64;
+ // Value may change in the future to better accomodate large clears.
+ static constexpr uint64_t kZeroBufferSize = 1024 * 1024 * 4; // 4 Mb
+
static constexpr uint64_t kMaxDebugMessagesToPrint = 5;
// static
@@ -166,6 +170,9 @@
// The environment can only use DXC when it's available. Override the decision if it is not
// applicable.
DAWN_TRY(ApplyUseDxcToggle());
+
+ DAWN_TRY(CreateZeroBuffer());
+
return {};
}
@@ -251,6 +258,59 @@
return &mPendingCommands;
}
+ MaybeError Device::CreateZeroBuffer() {
+ BufferDescriptor zeroBufferDescriptor;
+ zeroBufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+ zeroBufferDescriptor.size = kZeroBufferSize;
+ zeroBufferDescriptor.label = "ZeroBuffer_Internal";
+ DAWN_TRY_ASSIGN(mZeroBuffer, Buffer::Create(this, &zeroBufferDescriptor));
+
+ return {};
+ }
+
+ MaybeError Device::ClearBufferToZero(CommandRecordingContext* commandContext,
+ BufferBase* destination,
+ uint64_t offset,
+ uint64_t size) {
+ // TODO(crbug.com/dawn/852): It would be ideal to clear the buffer in CreateZeroBuffer, but
+ // the allocation of the staging buffer causes various end2end tests that monitor heap usage
+ // to fail if it's done during device creation. Perhaps ClearUnorderedAccessView*() can be
+ // used to avoid that.
+ if (!mZeroBuffer->IsDataInitialized()) {
+ DynamicUploader* uploader = GetDynamicUploader();
+ UploadHandle uploadHandle;
+ DAWN_TRY_ASSIGN(uploadHandle,
+ uploader->Allocate(kZeroBufferSize, GetPendingCommandSerial(),
+ kCopyBufferToBufferOffsetAlignment));
+
+ memset(uploadHandle.mappedBuffer, 0u, kZeroBufferSize);
+
+ CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
+ uploadHandle.startOffset, mZeroBuffer.Get(), 0,
+ kZeroBufferSize);
+
+ mZeroBuffer->SetIsDataInitialized();
+ }
+
+ Buffer* dstBuffer = ToBackend(destination);
+
+ // Necessary to ensure residency of the zero buffer.
+ mZeroBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopySrc);
+ dstBuffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst);
+
+ while (size > 0) {
+ uint64_t copySize = std::min(kZeroBufferSize, size);
+ commandContext->GetCommandList()->CopyBufferRegion(
+ dstBuffer->GetD3D12Resource(), offset, mZeroBuffer->GetD3D12Resource(), 0,
+ copySize);
+
+ offset += copySize;
+ size -= copySize;
+ }
+
+ return {};
+ }
+
MaybeError Device::TickImpl() {
// Perform cleanup operations to free unused objects
ExecutionSerial completedSerial = GetCompletedCommandSerial();
diff --git a/src/dawn_native/d3d12/DeviceD3D12.h b/src/dawn_native/d3d12/DeviceD3D12.h
index 42810e3..e6b7234 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.h
+++ b/src/dawn_native/d3d12/DeviceD3D12.h
@@ -72,6 +72,11 @@
ResultOrError<CommandRecordingContext*> GetPendingCommandContext();
+ MaybeError ClearBufferToZero(CommandRecordingContext* commandContext,
+ BufferBase* destination,
+ uint64_t destinationOffset,
+ uint64_t size);
+
const D3D12DeviceInfo& GetDeviceInfo() const;
MaybeError NextSerial();
@@ -191,6 +196,8 @@
MaybeError ApplyUseDxcToggle();
+ MaybeError CreateZeroBuffer();
+
ComPtr<ID3D12Fence> mFence;
HANDLE mFenceEvent = nullptr;
ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
@@ -246,6 +253,10 @@
// release is called.
std::unique_ptr<SamplerHeapCache> mSamplerHeapCache;
+ // A buffer filled with zeros that is used to copy into other buffers when they need to be
+ // cleared.
+ Ref<Buffer> mZeroBuffer;
+
// The number of nanoseconds required for a timestamp query to be incremented by 1
float mTimestampPeriod = 1.0f;
};
diff --git a/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp b/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
index abf722f..ffc1e1b 100644
--- a/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
+++ b/src/dawn_native/d3d12/ResidencyManagerD3D12.cpp
@@ -351,7 +351,6 @@
// Places an artifical cap on Dawn's budget so we can test in a predictable manner. If used,
// this function must be called before any resources have been created.
void ResidencyManager::RestrictBudgetForTesting(uint64_t artificialBudgetCap) {
- ASSERT(mVideoMemoryInfo.local.lruCache.empty());
ASSERT(mVideoMemoryInfo.nonLocal.lruCache.empty());
ASSERT(!mRestrictBudgetForTesting);