Add validation on the buffer-to-buffer copies within same buffer

This patch adds the validation on the buffer-to-buffer copies within the
same buffer. When the source and destination buffer are the same one in
CopyBufferToBuffer(), the copy region cannot overlap or it will cause
undefined behaviors on Metal and Vulkan.

BUG=dawn:17
TEST=dawn_unittests, dawn_end2end_tests

Change-Id: I63ab790787ec0a973ae22787a9348bddfb6a5373
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/21602
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index f0dd1d1..dbd2069 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -102,6 +102,20 @@
             return {};
         }
 
+        MaybeError ValidateB2BCopyWithinSameBuffer(uint64_t dataSize,
+                                                   uint64_t srcOffset,
+                                                   uint64_t dstOffset) {
+            uint64_t maxOffset = std::max(srcOffset, dstOffset);
+            uint64_t minOffset = std::min(srcOffset, dstOffset);
+
+            if (minOffset + dataSize > maxOffset) {
+                return DAWN_VALIDATION_ERROR(
+                    "Copy regions cannot overlap when copy within the same buffer");
+            }
+
+            return {};
+        }
+
         MaybeError ValidateTexelBufferOffset(const BufferCopyView& bufferCopy,
                                              const Format& format) {
             if (bufferCopy.offset % format.blockByteSize != 0) {
@@ -648,6 +662,11 @@
                 DAWN_TRY(ValidateCopySizeFitsInBuffer(destination, destinationOffset, size));
                 DAWN_TRY(ValidateB2BCopyAlignment(size, sourceOffset, destinationOffset));
 
+                if (source == destination) {
+                    DAWN_TRY(
+                        ValidateB2BCopyWithinSameBuffer(size, sourceOffset, destinationOffset));
+                }
+
                 DAWN_TRY(ValidateCanUseAs(source, wgpu::BufferUsage::CopySrc));
                 DAWN_TRY(ValidateCanUseAs(destination, wgpu::BufferUsage::CopyDst));
 
diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp
index f00df82..7265a2d 100644
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -71,6 +71,8 @@
         }
 };
 
+class CopyTests_B2B : public CopyTests {};
+
 class CopyTests_T2B : public CopyTests {
     protected:
 
@@ -399,6 +401,57 @@
     }
 };
 
+// Test that copying within the same buffer works
+TEST_P(CopyTests_B2B, CopyWithinSameBuffer) {
+    // Copy the first 2 uint32_t values to the 4th and 5th uint32_t values.
+    {
+        // Create a buffer with 6 uint32_t values.
+        wgpu::Buffer buffer = utils::CreateBufferFromData(
+            device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
+            {1u, 2u, 3u, 4u, 5u, 6u});
+
+        constexpr uint32_t kSrcOffset = 0u;
+        constexpr uint32_t kDstOffset = 3u * sizeof(uint32_t);
+        constexpr uint32_t kSize = 2u * sizeof(uint32_t);
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kSize);
+        wgpu::CommandBuffer commands = encoder.Finish();
+        queue.Submit(1, &commands);
+
+        // Verify the first two uint32_t values are correctly copied to the locations where the 4th
+        // and 5th uint32_t values are stored.
+        std::array<uint32_t, 6> kExpected = {{1u, 2u, 3u, 1u, 2u, 6u}};
+        EXPECT_BUFFER_U32_RANGE_EQ(kExpected.data(), buffer, 0, kExpected.size());
+    }
+
+    // Copy the 4th and 5th uint32_t values to the first two uint32_t values.
+    {
+        // Create a buffer with 6 uint32_t values.
+        wgpu::Buffer buffer = utils::CreateBufferFromData(
+            device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
+            {1u, 2u, 3u, 4u, 5u, 6u});
+
+        constexpr uint32_t kSrcOffset = 3u * sizeof(uint32_t);
+        constexpr uint32_t kDstOffset = 0u;
+        constexpr uint32_t kSize = 2u * sizeof(uint32_t);
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kSize);
+        wgpu::CommandBuffer commands = encoder.Finish();
+        queue.Submit(1, &commands);
+
+        // Verify the 4th and 5th uint32_t values are correctly copied to the locations where the
+        // first uint32_t values are stored.
+        std::array<uint32_t, 6> kExpected = {{4u, 5u, 3u, 4u, 5u, 6u}};
+        EXPECT_BUFFER_U32_RANGE_EQ(kExpected.data(), buffer, 0, kExpected.size());
+    }
+}
+
+DAWN_INSTANTIATE_TEST(CopyTests_B2B,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      OpenGLBackend(),
+                      VulkanBackend());
+
 // Test that copying an entire texture with 256-byte aligned dimensions works
 TEST_P(CopyTests_T2B, FullTextureAligned) {
     constexpr uint32_t kWidth = 256;
diff --git a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
index 8d195f2..ef6ad1e 100644
--- a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
+++ b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
@@ -266,6 +266,61 @@
     }
 }
 
+// Test B2B copies within same buffer.
+TEST_F(CopyCommandTest_B2B, CopyWithinSameBuffer) {
+    constexpr uint32_t kBufferSize = 16u;
+    wgpu::Buffer buffer =
+        CreateBuffer(kBufferSize, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
+
+    // When srcOffset < dstOffset, and srcOffset + copySize > dstOffset, it is not allowed because
+    // the copy regions are overlapping.
+    {
+        constexpr uint32_t kSrcOffset = 0u;
+        constexpr uint32_t kDstOffset = 4u;
+        constexpr uint32_t kCopySize = 8u;
+        ASSERT(kDstOffset > kSrcOffset && kDstOffset < kSrcOffset + kCopySize);
+
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kCopySize);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    // When srcOffset < dstOffset, and srcOffset + copySize == dstOffset, it is allowed
+    // because the copy regions are not overlapping.
+    {
+        constexpr uint32_t kSrcOffset = 0u;
+        constexpr uint32_t kDstOffset = 8u;
+        constexpr uint32_t kCopySize = kDstOffset - kSrcOffset;
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kCopySize);
+        encoder.Finish();
+    }
+
+    // When srcOffset > dstOffset, and srcOffset < dstOffset + copySize, it is not allowed because
+    // the copy regions are overlapping.
+    {
+        constexpr uint32_t kSrcOffset = 4u;
+        constexpr uint32_t kDstOffset = 0u;
+        constexpr uint32_t kCopySize = 8u;
+        ASSERT(kSrcOffset > kDstOffset && kSrcOffset < kDstOffset + kCopySize);
+
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kCopySize);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    // When srcOffset > dstOffset, and srcOffset + copySize == dstOffset, it is allowed
+    // because the copy regions are not overlapping.
+    {
+        constexpr uint32_t kSrcOffset = 8u;
+        constexpr uint32_t kDstOffset = 0u;
+        constexpr uint32_t kCopySize = kSrcOffset - kDstOffset;
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.CopyBufferToBuffer(buffer, kSrcOffset, buffer, kDstOffset, kCopySize);
+        encoder.Finish();
+    }
+}
+
 class CopyCommandTest_B2T : public CopyCommandTest {};
 
 // Test a successfull B2T copy