Native: Add buffer's internal CopySrc usage

Standard WebGPU spec doesn't allow readback buffer (MapRead) to have
CopySrc usage. However some workarounds such as T2B using compute shader
might need to create a temporary buffer acting as compute shader's
output. Because T2B might copy with bytesPerRow or rowsPerImage >
copy size, some padding bytes in the destination buffer must be
preserved. In order to do that we need to copy the readback buffer's old
content to the temporary buffer. An internal CopySrc usage will be
needed in this case.

Bug: 348654098
Change-Id: Ie745c941d296b4d8f0fc7d5d47277b0e5f4f833c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/195399
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index adcbedc..0c2b901 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -1478,7 +1478,8 @@
                                  destination);
                 DAWN_TRY(ValidateB2BCopyAlignment(size, sourceOffset, destinationOffset));
 
-                DAWN_TRY_CONTEXT(ValidateCanUseAs(source, wgpu::BufferUsage::CopySrc),
+                DAWN_TRY_CONTEXT(ValidateCanUseAsInternal(
+                                     source, wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer),
                                  "validating source %s usage.", source);
                 DAWN_TRY_CONTEXT(ValidateCanUseAs(destination, wgpu::BufferUsage::CopyDst),
                                  "validating destination %s usage.", destination);
diff --git a/src/dawn/native/CommandValidation.cpp b/src/dawn/native/CommandValidation.cpp
index 8ed9dab..1b9989f 100644
--- a/src/dawn/native/CommandValidation.cpp
+++ b/src/dawn/native/CommandValidation.cpp
@@ -655,6 +655,12 @@
     return {};
 }
 
+MaybeError ValidateCanUseAsInternal(const BufferBase* buffer, wgpu::BufferUsage usage) {
+    DAWN_INVALID_IF(!(buffer->GetUsage() & usage), "%s internal usage (%s) doesn't include %s.",
+                    buffer, buffer->GetUsage(), usage);
+    return {};
+}
+
 namespace {
 std::string TextureFormatsToString(const ColorAttachmentFormats& formats) {
     std::ostringstream ss;
diff --git a/src/dawn/native/CommandValidation.h b/src/dawn/native/CommandValidation.h
index b07ad87..36e737f 100644
--- a/src/dawn/native/CommandValidation.h
+++ b/src/dawn/native/CommandValidation.h
@@ -114,6 +114,7 @@
                             wgpu::TextureUsage usage,
                             UsageValidationMode mode);
 MaybeError ValidateCanUseAs(const BufferBase* buffer, wgpu::BufferUsage usage);
+MaybeError ValidateCanUseAsInternal(const BufferBase* buffer, wgpu::BufferUsage usage);
 
 using ColorAttachmentFormats = absl::InlinedVector<const Format*, kMaxColorAttachments>;
 MaybeError ValidateColorAttachmentBytesPerSample(DeviceBase* device,
diff --git a/src/dawn/native/d3d11/BufferD3D11.cpp b/src/dawn/native/d3d11/BufferD3D11.cpp
index af37447..1346d40 100644
--- a/src/dawn/native/d3d11/BufferD3D11.cpp
+++ b/src/dawn/native/d3d11/BufferD3D11.cpp
@@ -52,7 +52,8 @@
 
 namespace {
 
-constexpr wgpu::BufferUsage kCopyUsages = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+constexpr wgpu::BufferUsage kCopyUsages =
+    wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst | kInternalCopySrcBuffer;
 
 constexpr wgpu::BufferUsage kStagingUsages = kMappableBufferUsages | kCopyUsages;
 
@@ -75,7 +76,9 @@
 }
 
 bool IsUpload(wgpu::BufferUsage usage) {
-    return usage == (wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::MapWrite);
+    return usage & wgpu::BufferUsage::MapWrite &&
+           IsSubset(usage, kInternalCopySrcBuffer | wgpu::BufferUsage::CopySrc |
+                               wgpu::BufferUsage::MapWrite);
 }
 
 bool IsStaging(wgpu::BufferUsage usage) {
@@ -829,7 +832,8 @@
     // We need to create a separate storage for uniform usage, because D3D11 doesn't allow constant
     // buffer to be used for other purposes.
     if (usagesToHandle & wgpu::BufferUsage::Uniform) {
-        usagesToHandle &= ~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+        usagesToHandle &=
+            ~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer);
 
         // Since D3D11 doesn't allow both CPU & GPU to write to a buffer, we need separate
         // storages for CPU writes and GPU writes.
diff --git a/src/dawn/native/dawn_platform.h b/src/dawn/native/dawn_platform.h
index 1fe5e37..f30940f 100644
--- a/src/dawn/native/dawn_platform.h
+++ b/src/dawn/native/dawn_platform.h
@@ -51,8 +51,15 @@
 static constexpr wgpu::BufferUsage kReadOnlyStorageBuffer =
     static_cast<wgpu::BufferUsage>(1u << 30);
 
+// Add an extra buffer usage (copy-src buffer usage) that can be combined with MapRead
+static constexpr wgpu::BufferUsage kInternalCopySrcBuffer =
+    static_cast<wgpu::BufferUsage>(1u << 29);
+
+// TODO(350497225): We should store Buffer's internal and external usage in separate member
+// variables, so that the external usage can be queried directly without bit hacks using this
+// special flag.
 static constexpr wgpu::BufferUsage kAllInternalBufferUsages =
-    kInternalStorageBuffer | kReadOnlyStorageBuffer;
+    kInternalStorageBuffer | kReadOnlyStorageBuffer | kInternalCopySrcBuffer;
 
 // Extra texture usages
 // Usage to denote an extra tag value used in system specific ways.
diff --git a/src/dawn/native/vulkan/BufferVk.cpp b/src/dawn/native/vulkan/BufferVk.cpp
index 57a3f12..46bd395 100644
--- a/src/dawn/native/vulkan/BufferVk.cpp
+++ b/src/dawn/native/vulkan/BufferVk.cpp
@@ -55,7 +55,7 @@
 VkBufferUsageFlags VulkanBufferUsage(wgpu::BufferUsage usage) {
     VkBufferUsageFlags flags = 0;
 
-    if (usage & wgpu::BufferUsage::CopySrc) {
+    if (usage & (wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer)) {
         flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
     }
     if (usage & wgpu::BufferUsage::CopyDst) {
@@ -89,7 +89,8 @@
     if (usage & kMappableBufferUsages) {
         flags |= VK_PIPELINE_STAGE_HOST_BIT;
     }
-    if (usage & (wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst)) {
+    if (usage &
+        (wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst | kInternalCopySrcBuffer)) {
         flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
     }
     if (usage & (wgpu::BufferUsage::Index | wgpu::BufferUsage::Vertex)) {
@@ -125,7 +126,7 @@
     if (usage & wgpu::BufferUsage::MapWrite) {
         flags |= VK_ACCESS_HOST_WRITE_BIT;
     }
-    if (usage & wgpu::BufferUsage::CopySrc) {
+    if (usage & (wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer)) {
         flags |= VK_ACCESS_TRANSFER_READ_BIT;
     }
     if (usage & wgpu::BufferUsage::CopyDst) {