d3d11: Cache the staging buffer for WriteInternal

The overhead of creating a new staging buffer for each write is not
trivial. This caches a handy staging buffer in DeviceD3D11 to eliminate
the overhead.

Bug: chromium:1485789
Change-Id: Iaced53ffdb080b07dddf59f9bf54f05b5f2a7a41
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/167180
Reviewed-by: Peng Huang <penghuang@chromium.org>
Reviewed-by: Peng Huang <penghuang@google.com>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn/native/d3d11/BufferD3D11.cpp b/src/dawn/native/d3d11/BufferD3D11.cpp
index 1e79f14..50eb886 100644
--- a/src/dawn/native/d3d11/BufferD3D11.cpp
+++ b/src/dawn/native/d3d11/BufferD3D11.cpp
@@ -591,14 +591,8 @@
 
     // If the mD3d11NonConstantBuffer is null, we have to create a staging buffer for transfer the
     // data to mD3d11ConstantBuffer.
-    BufferDescriptor descriptor;
-    descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
-    descriptor.size = Align(size, D3D11BufferSizeAlignment(descriptor.usage));
-    descriptor.mappedAtCreation = false;
-    descriptor.label = "DawnWriteStagingBuffer";
     Ref<BufferBase> stagingBuffer;
-    DAWN_TRY_ASSIGN(stagingBuffer,
-                    Buffer::Create(ToBackend(GetDevice()), Unpack(&descriptor), commandContext));
+    DAWN_TRY_ASSIGN(stagingBuffer, ToBackend(GetDevice())->GetStagingBuffer(commandContext, size));
 
     DAWN_TRY(ToBackend(stagingBuffer)->WriteInternal(commandContext, 0, data, size));
 
diff --git a/src/dawn/native/d3d11/DeviceD3D11.cpp b/src/dawn/native/d3d11/DeviceD3D11.cpp
index ebdf352..2c5c3d1 100644
--- a/src/dawn/native/d3d11/DeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/DeviceD3D11.cpp
@@ -533,4 +533,30 @@
     return mImplicitPixelLocalStorageAttachmentTextureViews[implicitAttachmentIndex].Get();
 }
 
+ResultOrError<Ref<BufferBase>> Device::GetStagingBuffer(
+    const ScopedCommandRecordingContext* commandContext,
+    uint64_t size) {
+    constexpr uint64_t kMinSize = 4 * 1024;
+    constexpr uint64_t kMaxSize = 16 * 1024 * 1024;
+    uint64_t bufferSize = mStagingBuffer.Get() ? mStagingBuffer->GetSize() : 0;
+    if (size > bufferSize) {
+        bufferSize = Align(size, kMinSize);
+        BufferDescriptor descriptor;
+        descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+        descriptor.size = bufferSize;
+        descriptor.mappedAtCreation = false;
+        descriptor.label = "DawnDeviceStagingBuffer";
+        Ref<BufferBase> buffer;
+        DAWN_TRY_ASSIGN(buffer, Buffer::Create(this, Unpack(&descriptor), commandContext));
+        // We don't cache the buffer if it's too large.
+        if (bufferSize > kMaxSize) {
+            return buffer;
+        }
+        mStagingBuffer = buffer;
+    }
+    // Ensure there is no more than 1 active usage of the staging buffer.
+    DAWN_ASSERT(mStagingBuffer->GetRefCountForTesting() <= 1);
+    return mStagingBuffer;
+}
+
 }  // namespace dawn::native::d3d11
diff --git a/src/dawn/native/d3d11/DeviceD3D11.h b/src/dawn/native/d3d11/DeviceD3D11.h
index 7815c97..6aacd73 100644
--- a/src/dawn/native/d3d11/DeviceD3D11.h
+++ b/src/dawn/native/d3d11/DeviceD3D11.h
@@ -101,6 +101,13 @@
         uint32_t height,
         uint32_t implicitAttachmentIndex);
 
+    // Grab a staging buffer, the size of which is no less than 'size'.
+    // Note: We assume only 1 staging buffer is active, so the client should release it as soon as
+    // possbile once the buffer usage is done.
+    ResultOrError<Ref<BufferBase>> GetStagingBuffer(
+        const ScopedCommandRecordingContext* commandContext,
+        uint64_t size);
+
   private:
     using Base = d3d::Device;
     using Base::Base;
@@ -157,6 +164,9 @@
 
     // TODO(dawn:1704): decide when to clear the cached implicit pixel local storage attachments.
     std::array<Ref<TextureViewBase>, kMaxPLSSlots> mImplicitPixelLocalStorageAttachmentTextureViews;
+
+    // The cached staging buffer.
+    Ref<BufferBase> mStagingBuffer;
 };
 
 }  // namespace dawn::native::d3d11