Deprecate Buffer::SetSubData in favor of Queue::WriteBuffer

Bug: dawn:22
Change-Id: I00b3cd65ac4eb494b05918251f4b3b2bcaf24f71
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22200
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
diff --git a/dawn.json b/dawn.json
index d44c029..d398a4e 100644
--- a/dawn.json
+++ b/dawn.json
@@ -902,6 +902,15 @@
                 "args": [
                     {"name": "descriptor", "type": "fence descriptor", "annotation": "const*", "optional": true}
                 ]
+            },
+            {
+                "name": "write buffer",
+                "args": [
+                    {"name": "buffer", "type": "buffer"},
+                    {"name": "buffer offset", "type": "uint64_t"},
+                    {"name": "data", "type": "void", "annotation": "const*", "length": "size"},
+                    {"name": "size", "type": "size_t"}
+                ]
             }
         ]
     },
@@ -1636,6 +1645,9 @@
     "int32_t": {
         "category": "native"
     },
+    "size_t": {
+        "category": "native"
+    },
     "uint64_t": {
         "category": "native"
     },
diff --git a/dawn_wire.json b/dawn_wire.json
index cdd0d97..5291c3e 100644
--- a/dawn_wire.json
+++ b/dawn_wire.json
@@ -47,6 +47,13 @@
         "destroy object": [
             { "name": "object type", "type": "ObjectType" },
             { "name": "object id", "type": "ObjectId" }
+        ],
+        "queue write buffer internal": [
+            {"name": "queue id", "type": "ObjectId" },
+            {"name": "buffer id", "type": "ObjectId" },
+            {"name": "buffer offset", "type": "uint64_t"},
+            {"name": "data", "type": "uint8_t", "annotation": "const*", "length": "size"},
+            {"name": "size", "type": "size_t"}
         ]
     },
     "return commands": {
@@ -94,7 +101,8 @@
             "DeviceSetDeviceLostCallback",
             "DeviceSetUncapturedErrorCallback",
             "FenceGetCompletedValue",
-            "FenceOnCompletion"
+            "FenceOnCompletion",
+            "QueueWriteBuffer"
         ],
         "client_handwritten_commands": [
             "BufferDestroy",
diff --git a/docs/testing.md b/docs/testing.md
index 20d715f..7b6f4b6 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -52,7 +52,7 @@
 
 **BufferUploadPerf**
 
-Tests repetitively uploading data to the GPU using either `SetSubData` or `CreateBufferMapped`.
+Tests repetitively uploading data to the GPU using either `WriteBuffer` or `CreateBufferMapped`.
 
 **DrawCallPerf**
 
diff --git a/examples/Animometer.cpp b/examples/Animometer.cpp
index 04b2ac6..dfd041c 100644
--- a/examples/Animometer.cpp
+++ b/examples/Animometer.cpp
@@ -150,7 +150,7 @@
     for (auto& data : shaderData) {
         data.time = f / 60.0f;
     }
-    ubo.SetSubData(0, kNumTriangles * sizeof(ShaderData), shaderData.data());
+    queue.WriteBuffer(ubo, 0, shaderData.data(), kNumTriangles * sizeof(ShaderData));
 
     utils::ComboRenderPassDescriptor renderPass({backbufferView});
     wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
diff --git a/examples/ComputeBoids.cpp b/examples/ComputeBoids.cpp
index 37bd594..59538df 100644
--- a/examples/ComputeBoids.cpp
+++ b/examples/ComputeBoids.cpp
@@ -89,9 +89,9 @@
             wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Storage;
         particleBuffers[i] = device.CreateBuffer(&descriptor);
 
-        particleBuffers[i].SetSubData(0,
-            sizeof(Particle) * kNumParticles,
-            reinterpret_cast<uint8_t*>(initialParticles.data()));
+        queue.WriteBuffer(particleBuffers[i], 0,
+                          reinterpret_cast<uint8_t*>(initialParticles.data()),
+                          sizeof(Particle) * kNumParticles);
     }
 }
 
diff --git a/examples/CubeReflection.cpp b/examples/CubeReflection.cpp
index 61e363c..becec87 100644
--- a/examples/CubeReflection.cpp
+++ b/examples/CubeReflection.cpp
@@ -262,7 +262,7 @@
         glm::vec3(0.0f, 1.0f, 0.0f)
     );
 
-    cameraBuffer.SetSubData(0, sizeof(CameraData), &cameraData);
+    queue.WriteBuffer(cameraBuffer, 0, &cameraData, sizeof(CameraData));
 
     wgpu::TextureView backbufferView = swapchain.GetCurrentTextureView();
     utils::ComboRenderPassDescriptor renderPass({backbufferView}, depthStencilView);
diff --git a/src/dawn_native/Buffer.cpp b/src/dawn_native/Buffer.cpp
index 6da550b..2b738b2 100644
--- a/src/dawn_native/Buffer.cpp
+++ b/src/dawn_native/Buffer.cpp
@@ -19,6 +19,7 @@
 #include "dawn_native/DynamicUploader.h"
 #include "dawn_native/ErrorData.h"
 #include "dawn_native/MapRequestTracker.h"
+#include "dawn_native/Queue.h"
 #include "dawn_native/ValidationUtils_autogen.h"
 
 #include <cstdio>
@@ -62,10 +63,6 @@
                 return {};
             }
 
-            MaybeError SetSubDataImpl(uint32_t start, uint32_t count, const void* data) override {
-                UNREACHABLE();
-                return {};
-            }
             MaybeError MapReadAsyncImpl(uint32_t serial) override {
                 UNREACHABLE();
                 return {};
@@ -186,7 +183,7 @@
         return {};
     }
 
-    MaybeError BufferBase::ValidateCanUseInSubmitNow() const {
+    MaybeError BufferBase::ValidateCanUseOnQueueNow() const {
         ASSERT(!IsError());
 
         switch (mState) {
@@ -244,14 +241,10 @@
     }
 
     void BufferBase::SetSubData(uint32_t start, uint32_t count, const void* data) {
-        if (GetDevice()->ConsumedError(ValidateSetSubData(start, count))) {
-            return;
-        }
-        ASSERT(!IsError());
-
-        if (GetDevice()->ConsumedError(SetSubDataImpl(start, count, data))) {
-            return;
-        }
+        Ref<QueueBase> queue = AcquireRef(GetDevice()->GetDefaultQueue());
+        GetDevice()->EmitDeprecationWarning(
+            "Buffer::SetSubData is deprecated, use Queue::WriteBuffer instead");
+        queue->WriteBuffer(this, start, data, count);
     }
 
     void BufferBase::MapReadAsync(WGPUBufferMapReadCallback callback, void* userdata) {
@@ -279,22 +272,6 @@
         tracker->Track(this, mMapSerial, false);
     }
 
-    MaybeError BufferBase::SetSubDataImpl(uint32_t start, uint32_t count, const void* data) {
-        DynamicUploader* uploader = GetDevice()->GetDynamicUploader();
-
-        UploadHandle uploadHandle;
-        DAWN_TRY_ASSIGN(uploadHandle,
-                        uploader->Allocate(count, GetDevice()->GetPendingCommandSerial()));
-        ASSERT(uploadHandle.mappedBuffer != nullptr);
-
-        memcpy(uploadHandle.mappedBuffer, data, count);
-
-        DAWN_TRY(GetDevice()->CopyFromStagingToBuffer(
-            uploadHandle.stagingBuffer, uploadHandle.startOffset, this, start, count));
-
-        return {};
-    }
-
     void BufferBase::MapWriteAsync(WGPUBufferMapWriteCallback callback, void* userdata) {
         WGPUBufferMapAsyncStatus status;
         if (GetDevice()->ConsumedError(ValidateMap(wgpu::BufferUsage::MapWrite, &status))) {
@@ -378,45 +355,6 @@
         mMapUserdata = 0;
     }
 
-    MaybeError BufferBase::ValidateSetSubData(uint32_t start, uint32_t count) const {
-        DAWN_TRY(GetDevice()->ValidateIsAlive());
-        DAWN_TRY(GetDevice()->ValidateObject(this));
-
-        switch (mState) {
-            case BufferState::Mapped:
-                return DAWN_VALIDATION_ERROR("Buffer is mapped");
-            case BufferState::Destroyed:
-                return DAWN_VALIDATION_ERROR("Buffer is destroyed");
-            case BufferState::Unmapped:
-                break;
-        }
-
-        if (count > GetSize()) {
-            return DAWN_VALIDATION_ERROR("Buffer subdata with too much data");
-        }
-
-        // Metal requests buffer to buffer copy size must be a multiple of 4 bytes on macOS
-        if (count % 4 != 0) {
-            return DAWN_VALIDATION_ERROR("Buffer subdata size must be a multiple of 4 bytes");
-        }
-
-        // Metal requests offset of buffer to buffer copy must be a multiple of 4 bytes on macOS
-        if (start % 4 != 0) {
-            return DAWN_VALIDATION_ERROR("Start position must be a multiple of 4 bytes");
-        }
-
-        // Note that no overflow can happen because we already checked for GetSize() >= count
-        if (start > GetSize() - count) {
-            return DAWN_VALIDATION_ERROR("Buffer subdata out of range");
-        }
-
-        if (!(mUsage & wgpu::BufferUsage::CopyDst)) {
-            return DAWN_VALIDATION_ERROR("Buffer needs the CopyDst usage bit");
-        }
-
-        return {};
-    }
-
     MaybeError BufferBase::ValidateMap(wgpu::BufferUsage requiredUsage,
                                        WGPUBufferMapAsyncStatus* status) const {
         *status = WGPUBufferMapAsyncStatus_DeviceLost;
diff --git a/src/dawn_native/Buffer.h b/src/dawn_native/Buffer.h
index 4e348fe..ec20452 100644
--- a/src/dawn_native/Buffer.h
+++ b/src/dawn_native/Buffer.h
@@ -52,7 +52,7 @@
         MaybeError MapAtCreation(uint8_t** mappedPointer);
         void OnMapCommandSerialFinished(uint32_t mapSerial, bool isWrite);
 
-        MaybeError ValidateCanUseInSubmitNow() const;
+        MaybeError ValidateCanUseOnQueueNow() const;
 
         // Dawn API
         void SetSubData(uint32_t start, uint32_t count, const void* data);
@@ -80,7 +80,6 @@
 
       private:
         virtual MaybeError MapAtCreationImpl(uint8_t** mappedPointer) = 0;
-        virtual MaybeError SetSubDataImpl(uint32_t start, uint32_t count, const void* data);
         virtual MaybeError MapReadAsyncImpl(uint32_t serial) = 0;
         virtual MaybeError MapWriteAsyncImpl(uint32_t serial) = 0;
         virtual void UnmapImpl() = 0;
@@ -90,7 +89,6 @@
         virtual bool IsMapWritable() const = 0;
         MaybeError CopyFromStagingBuffer();
 
-        MaybeError ValidateSetSubData(uint32_t start, uint32_t count) const;
         MaybeError ValidateMap(wgpu::BufferUsage requiredUsage,
                                WGPUBufferMapAsyncStatus* status) const;
         MaybeError ValidateUnmap() const;
diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp
index 3af330a..3b8a054 100644
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@@ -17,6 +17,7 @@
 #include "dawn_native/Buffer.h"
 #include "dawn_native/CommandBuffer.h"
 #include "dawn_native/Device.h"
+#include "dawn_native/DynamicUploader.h"
 #include "dawn_native/ErrorScope.h"
 #include "dawn_native/ErrorScopeTracker.h"
 #include "dawn_native/Fence.h"
@@ -91,8 +92,42 @@
         return new Fence(this, descriptor);
     }
 
+    void QueueBase::WriteBuffer(BufferBase* buffer,
+                                uint64_t bufferOffset,
+                                const void* data,
+                                size_t size) {
+        GetDevice()->ConsumedError(WriteBufferInternal(buffer, bufferOffset, data, size));
+    }
+
+    MaybeError QueueBase::WriteBufferInternal(BufferBase* buffer,
+                                              uint64_t bufferOffset,
+                                              const void* data,
+                                              size_t size) {
+        DAWN_TRY(ValidateWriteBuffer(buffer, bufferOffset, size));
+        return WriteBufferImpl(buffer, bufferOffset, data, size);
+    }
+
+    MaybeError QueueBase::WriteBufferImpl(BufferBase* buffer,
+                                          uint64_t bufferOffset,
+                                          const void* data,
+                                          size_t size) {
+        DeviceBase* device = GetDevice();
+
+        UploadHandle uploadHandle;
+        DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
+                                          size, device->GetPendingCommandSerial()));
+        ASSERT(uploadHandle.mappedBuffer != nullptr);
+
+        memcpy(uploadHandle.mappedBuffer, data, size);
+
+        DAWN_TRY(device->CopyFromStagingToBuffer(
+            uploadHandle.stagingBuffer, uploadHandle.startOffset, buffer, bufferOffset, size));
+
+        return {};
+    }
+
     MaybeError QueueBase::ValidateSubmit(uint32_t commandCount,
-                                         CommandBufferBase* const* commands) {
+                                         CommandBufferBase* const* commands) const {
         TRACE_EVENT0(GetDevice()->GetPlatform(), Validation, "Queue::ValidateSubmit");
         DAWN_TRY(GetDevice()->ValidateObject(this));
 
@@ -103,7 +138,7 @@
 
             for (const PassResourceUsage& passUsages : usages.perPass) {
                 for (const BufferBase* buffer : passUsages.buffers) {
-                    DAWN_TRY(buffer->ValidateCanUseInSubmitNow());
+                    DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
                 }
                 for (const TextureBase* texture : passUsages.textures) {
                     DAWN_TRY(texture->ValidateCanUseInSubmitNow());
@@ -111,7 +146,7 @@
             }
 
             for (const BufferBase* buffer : usages.topLevelBuffers) {
-                DAWN_TRY(buffer->ValidateCanUseInSubmitNow());
+                DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
             }
             for (const TextureBase* texture : usages.topLevelTextures) {
                 DAWN_TRY(texture->ValidateCanUseInSubmitNow());
@@ -121,7 +156,7 @@
         return {};
     }
 
-    MaybeError QueueBase::ValidateSignal(const Fence* fence, uint64_t signalValue) {
+    MaybeError QueueBase::ValidateSignal(const Fence* fence, uint64_t signalValue) const {
         DAWN_TRY(GetDevice()->ValidateIsAlive());
         DAWN_TRY(GetDevice()->ValidateObject(this));
         DAWN_TRY(GetDevice()->ValidateObject(fence));
@@ -136,7 +171,7 @@
         return {};
     }
 
-    MaybeError QueueBase::ValidateCreateFence(const FenceDescriptor* descriptor) {
+    MaybeError QueueBase::ValidateCreateFence(const FenceDescriptor* descriptor) const {
         DAWN_TRY(GetDevice()->ValidateIsAlive());
         DAWN_TRY(GetDevice()->ValidateObject(this));
         if (descriptor != nullptr) {
@@ -146,4 +181,30 @@
         return {};
     }
 
+    MaybeError QueueBase::ValidateWriteBuffer(const BufferBase* buffer,
+                                              uint64_t bufferOffset,
+                                              size_t size) const {
+        DAWN_TRY(GetDevice()->ValidateIsAlive());
+        DAWN_TRY(GetDevice()->ValidateObject(this));
+        DAWN_TRY(GetDevice()->ValidateObject(buffer));
+
+        if (bufferOffset % 4 != 0) {
+            return DAWN_VALIDATION_ERROR("Queue::WriteBuffer bufferOffset must be a multiple of 4");
+        }
+        if (size % 4 != 0) {
+            return DAWN_VALIDATION_ERROR("Queue::WriteBuffer size must be a multiple of 4");
+        }
+
+        uint64_t bufferSize = buffer->GetSize();
+        if (bufferOffset > bufferSize || size > (bufferSize - bufferOffset)) {
+            return DAWN_VALIDATION_ERROR("Queue::WriteBuffer out of range");
+        }
+
+        if (!(buffer->GetUsage() & wgpu::BufferUsage::CopyDst)) {
+            return DAWN_VALIDATION_ERROR("Buffer needs the CopyDst usage bit");
+        }
+
+        return buffer->ValidateCanUseOnQueueNow();
+    }
+
 }  // namespace dawn_native
diff --git a/src/dawn_native/Queue.h b/src/dawn_native/Queue.h
index fd9d291..5fd722d 100644
--- a/src/dawn_native/Queue.h
+++ b/src/dawn_native/Queue.h
@@ -33,15 +33,28 @@
         void Submit(uint32_t commandCount, CommandBufferBase* const* commands);
         void Signal(Fence* fence, uint64_t signalValue);
         Fence* CreateFence(const FenceDescriptor* descriptor);
+        void WriteBuffer(BufferBase* buffer, uint64_t bufferOffset, const void* data, size_t size);
 
       private:
         QueueBase(DeviceBase* device, ObjectBase::ErrorTag tag);
 
-        virtual MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands);
+        MaybeError WriteBufferInternal(BufferBase* buffer,
+                                       uint64_t bufferOffset,
+                                       const void* data,
+                                       size_t size);
 
-        MaybeError ValidateSubmit(uint32_t commandCount, CommandBufferBase* const* commands);
-        MaybeError ValidateSignal(const Fence* fence, uint64_t signalValue);
-        MaybeError ValidateCreateFence(const FenceDescriptor* descriptor);
+        virtual MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands);
+        virtual MaybeError WriteBufferImpl(BufferBase* buffer,
+                                           uint64_t bufferOffset,
+                                           const void* data,
+                                           size_t size);
+
+        MaybeError ValidateSubmit(uint32_t commandCount, CommandBufferBase* const* commands) const;
+        MaybeError ValidateSignal(const Fence* fence, uint64_t signalValue) const;
+        MaybeError ValidateCreateFence(const FenceDescriptor* descriptor) const;
+        MaybeError ValidateWriteBuffer(const BufferBase* buffer,
+                                       uint64_t bufferOffset,
+                                       size_t size) const;
     };
 
 }  // namespace dawn_native
diff --git a/src/dawn_native/d3d12/PageableD3D12.h b/src/dawn_native/d3d12/PageableD3D12.h
index 4729b4b..6b07adb 100644
--- a/src/dawn_native/d3d12/PageableD3D12.h
+++ b/src/dawn_native/d3d12/PageableD3D12.h
@@ -66,7 +66,7 @@
         Serial mLastUsage = 0;
         // mLastSubmission denotes the last time this pageable was submitted to the GPU. Note that
         // although this variable often contains the same value as mLastUsage, it can differ in some
-        // situations. When some asynchronous APIs (like SetSubData) are called, mLastUsage is
+        // situations. When some asynchronous APIs (like WriteBuffer) are called, mLastUsage is
         // updated upon the call, but the backend operation is deferred until the next submission
         // to the GPU. This makes mLastSubmission unique from mLastUsage, and allows us to
         // accurately identify when a pageable can be evicted.
@@ -77,4 +77,4 @@
     };
 }}  // namespace dawn_native::d3d12
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/dawn_native/metal/CommandRecordingContext.mm b/src/dawn_native/metal/CommandRecordingContext.mm
index 33df959..971691a 100644
--- a/src/dawn_native/metal/CommandRecordingContext.mm
+++ b/src/dawn_native/metal/CommandRecordingContext.mm
@@ -47,7 +47,7 @@
             return nil;
         }
 
-        // A blit encoder can be left open from SetSubData, make sure we close it.
+        // A blit encoder can be left open from WriteBuffer, make sure we close it.
         EndBlit();
 
         ASSERT(!mInEncoder);
diff --git a/src/dawn_native/null/DeviceNull.cpp b/src/dawn_native/null/DeviceNull.cpp
index 77ef491..d6d8fff 100644
--- a/src/dawn_native/null/DeviceNull.cpp
+++ b/src/dawn_native/null/DeviceNull.cpp
@@ -304,11 +304,10 @@
         memcpy(mBackingData.get() + destinationOffset, ptr + sourceOffset, size);
     }
 
-    MaybeError Buffer::SetSubDataImpl(uint32_t start, uint32_t count, const void* data) {
-        ASSERT(start + count <= GetSize());
+    void Buffer::DoWriteBuffer(uint64_t bufferOffset, const void* data, size_t size) {
+        ASSERT(bufferOffset + size <= GetSize());
         ASSERT(mBackingData);
-        memcpy(mBackingData.get() + start, data, count);
-        return {};
+        memcpy(mBackingData.get() + bufferOffset, data, size);
     }
 
     MaybeError Buffer::MapReadAsyncImpl(uint32_t serial) {
@@ -366,6 +365,14 @@
         return {};
     }
 
+    MaybeError Queue::WriteBufferImpl(BufferBase* buffer,
+                                      uint64_t bufferOffset,
+                                      const void* data,
+                                      size_t size) {
+        ToBackend(buffer)->DoWriteBuffer(bufferOffset, data, size);
+        return {};
+    }
+
     // SwapChain
 
     SwapChain::SwapChain(Device* device,
diff --git a/src/dawn_native/null/DeviceNull.h b/src/dawn_native/null/DeviceNull.h
index 8d2eeee..7a93822 100644
--- a/src/dawn_native/null/DeviceNull.h
+++ b/src/dawn_native/null/DeviceNull.h
@@ -187,11 +187,12 @@
                              uint64_t destinationOffset,
                              uint64_t size);
 
+        void DoWriteBuffer(uint64_t bufferOffset, const void* data, size_t size);
+
       private:
         ~Buffer() override;
 
         // Dawn API
-        MaybeError SetSubDataImpl(uint32_t start, uint32_t count, const void* data) override;
         MaybeError MapReadAsyncImpl(uint32_t serial) override;
         MaybeError MapWriteAsyncImpl(uint32_t serial) override;
         void UnmapImpl() override;
@@ -222,6 +223,10 @@
       private:
         ~Queue() override;
         MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) override;
+        MaybeError WriteBufferImpl(BufferBase* buffer,
+                                   uint64_t bufferOffset,
+                                   const void* data,
+                                   size_t size) override;
     };
 
     class SwapChain final : public NewSwapChainBase {
diff --git a/src/dawn_native/opengl/BufferGL.cpp b/src/dawn_native/opengl/BufferGL.cpp
index 0ccb726..a7f880a 100644
--- a/src/dawn_native/opengl/BufferGL.cpp
+++ b/src/dawn_native/opengl/BufferGL.cpp
@@ -50,14 +50,6 @@
         return {};
     }
 
-    MaybeError Buffer::SetSubDataImpl(uint32_t start, uint32_t count, const void* data) {
-        const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
-
-        gl.BindBuffer(GL_ARRAY_BUFFER, mBuffer);
-        gl.BufferSubData(GL_ARRAY_BUFFER, start, count, data);
-        return {};
-    }
-
     MaybeError Buffer::MapReadAsyncImpl(uint32_t serial) {
         const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
 
diff --git a/src/dawn_native/opengl/BufferGL.h b/src/dawn_native/opengl/BufferGL.h
index 01177f3..9949829 100644
--- a/src/dawn_native/opengl/BufferGL.h
+++ b/src/dawn_native/opengl/BufferGL.h
@@ -32,7 +32,6 @@
       private:
         ~Buffer() override;
         // Dawn API
-        MaybeError SetSubDataImpl(uint32_t start, uint32_t count, const void* data) override;
         MaybeError MapReadAsyncImpl(uint32_t serial) override;
         MaybeError MapWriteAsyncImpl(uint32_t serial) override;
         void UnmapImpl() override;
diff --git a/src/dawn_native/opengl/QueueGL.cpp b/src/dawn_native/opengl/QueueGL.cpp
index 9e08f6a..a33cbd0 100644
--- a/src/dawn_native/opengl/QueueGL.cpp
+++ b/src/dawn_native/opengl/QueueGL.cpp
@@ -14,6 +14,7 @@
 
 #include "dawn_native/opengl/QueueGL.h"
 
+#include "dawn_native/opengl/BufferGL.h"
 #include "dawn_native/opengl/CommandBufferGL.h"
 #include "dawn_native/opengl/DeviceGL.h"
 #include "dawn_platform/DawnPlatform.h"
@@ -37,4 +38,15 @@
         return {};
     }
 
+    MaybeError Queue::WriteBufferImpl(BufferBase* buffer,
+                                      uint64_t bufferOffset,
+                                      const void* data,
+                                      size_t size) {
+        const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
+
+        gl.BindBuffer(GL_ARRAY_BUFFER, ToBackend(buffer)->GetHandle());
+        gl.BufferSubData(GL_ARRAY_BUFFER, bufferOffset, size, data);
+        return {};
+    }
+
 }}  // namespace dawn_native::opengl
diff --git a/src/dawn_native/opengl/QueueGL.h b/src/dawn_native/opengl/QueueGL.h
index d62e90d..301b1ad 100644
--- a/src/dawn_native/opengl/QueueGL.h
+++ b/src/dawn_native/opengl/QueueGL.h
@@ -28,6 +28,10 @@
 
       private:
         MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) override;
+        MaybeError WriteBufferImpl(BufferBase* buffer,
+                                   uint64_t bufferOffset,
+                                   const void* data,
+                                   size_t size) override;
     };
 
 }}  // namespace dawn_native::opengl
diff --git a/src/dawn_wire/client/ApiProcs.cpp b/src/dawn_wire/client/ApiProcs.cpp
index 9819b22..6106901 100644
--- a/src/dawn_wire/client/ApiProcs.cpp
+++ b/src/dawn_wire/client/ApiProcs.cpp
@@ -366,6 +366,27 @@
         cmd.Serialize(allocatedBuffer, *fence->device->GetClient());
     }
 
+    void ClientHandwrittenQueueWriteBuffer(WGPUQueue cQueue,
+                                           WGPUBuffer cBuffer,
+                                           uint64_t bufferOffset,
+                                           const void* data,
+                                           size_t size) {
+        Queue* queue = reinterpret_cast<Queue*>(cQueue);
+        Buffer* buffer = reinterpret_cast<Buffer*>(cBuffer);
+
+        QueueWriteBufferInternalCmd cmd;
+        cmd.queueId = queue->id;
+        cmd.bufferId = buffer->id;
+        cmd.bufferOffset = bufferOffset;
+        cmd.data = static_cast<const uint8_t*>(data);
+        cmd.size = size;
+
+        Client* wireClient = buffer->device->GetClient();
+        size_t requiredSize = cmd.GetRequiredSize();
+        char* allocatedBuffer = static_cast<char*>(wireClient->GetCmdSpace(requiredSize));
+        cmd.Serialize(allocatedBuffer);
+    }
+
     void ClientDeviceReference(WGPUDevice) {
     }
 
diff --git a/src/dawn_wire/server/ServerQueue.cpp b/src/dawn_wire/server/ServerQueue.cpp
index 4ec808e..6e47492 100644
--- a/src/dawn_wire/server/ServerQueue.cpp
+++ b/src/dawn_wire/server/ServerQueue.cpp
@@ -38,4 +38,21 @@
         return true;
     }
 
+    bool Server::DoQueueWriteBufferInternal(ObjectId queueId,
+                                            ObjectId bufferId,
+                                            uint64_t bufferOffset,
+                                            const uint8_t* data,
+                                            size_t size) {
+        // The null object isn't valid as `self` or `buffer` so we can combine the check with the
+        // check that the ID is valid.
+        auto* queue = QueueObjects().Get(queueId);
+        auto* buffer = BufferObjects().Get(bufferId);
+        if (queue == nullptr || buffer == nullptr) {
+            return false;
+        }
+
+        mProcs.queueWriteBuffer(queue->handle, buffer->handle, bufferOffset, data, size);
+        return true;
+    }
+
 }}  // namespace dawn_wire::server
diff --git a/src/tests/end2end/BasicTests.cpp b/src/tests/end2end/BasicTests.cpp
index 1b594bd..98ecb2e 100644
--- a/src/tests/end2end/BasicTests.cpp
+++ b/src/tests/end2end/BasicTests.cpp
@@ -26,30 +26,30 @@
     ASSERT_EQ(GetAdapterProperties().vendorID, GetVendorIdFilter());
 }
 
-// Test Buffer::SetSubData changes the content of the buffer, but really this is the most
+// Test Queue::WriteBuffer changes the content of the buffer, but really this is the most
 // basic test possible, and tests the test harness
-TEST_P(BasicTests, BufferSetSubData) {
+TEST_P(BasicTests, QueueWriteBuffer) {
     wgpu::BufferDescriptor descriptor;
     descriptor.size = 4;
     descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
     uint32_t value = 0x01020304;
-    buffer.SetSubData(0, sizeof(value), &value);
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
 
     EXPECT_BUFFER_U32_EQ(value, buffer, 0);
 }
 
-// Test a validation error for buffer setSubData, but really this is the most basic test possible
+// Test a validation error for Queue::WriteBuffer but really this is the most basic test possible
 // for ASSERT_DEVICE_ERROR
-TEST_P(BasicTests, BufferSetSubDataError) {
+TEST_P(BasicTests, QueueWriteBufferError) {
     wgpu::BufferDescriptor descriptor;
     descriptor.size = 4;
     descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
     uint8_t value = 187;
-    ASSERT_DEVICE_ERROR(buffer.SetSubData(1000, sizeof(value), &value));
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buffer, 1000, &value, sizeof(value)));
 }
 
 DAWN_INSTANTIATE_TEST(BasicTests, D3D12Backend(), MetalBackend(), OpenGLBackend(), VulkanBackend());
diff --git a/src/tests/end2end/BindGroupTests.cpp b/src/tests/end2end/BindGroupTests.cpp
index 153c303..c288844 100644
--- a/src/tests/end2end/BindGroupTests.cpp
+++ b/src/tests/end2end/BindGroupTests.cpp
@@ -770,9 +770,9 @@
     wgpu::Buffer buffer2 = device.CreateBuffer(&bufferDescriptor);
 
     // Populate the values
-    buffer0.SetSubData(offsets[0], sizeof(uint32_t), &values[0]);
-    buffer2.SetSubData(offsets[1], sizeof(uint32_t), &values[1]);
-    buffer3.SetSubData(offsets[2], sizeof(uint32_t), &values[2]);
+    queue.WriteBuffer(buffer0, offsets[0], &values[0], sizeof(uint32_t));
+    queue.WriteBuffer(buffer2, offsets[1], &values[1], sizeof(uint32_t));
+    queue.WriteBuffer(buffer3, offsets[2], &values[2], sizeof(uint32_t));
 
     wgpu::Buffer outputBuffer = utils::CreateBufferFromData(
         device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::Storage, {0, 0, 0});
diff --git a/src/tests/end2end/BufferTests.cpp b/src/tests/end2end/BufferTests.cpp
index 04af878..504cf32 100644
--- a/src/tests/end2end/BufferTests.cpp
+++ b/src/tests/end2end/BufferTests.cpp
@@ -55,7 +55,7 @@
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
     uint32_t myData = 0x01020304;
-    buffer.SetSubData(0, sizeof(myData), &myData);
+    queue.WriteBuffer(buffer, 0, &myData, sizeof(myData));
 
     const void* mappedData = MapReadAsyncAndWait(buffer);
     ASSERT_EQ(myData, *reinterpret_cast<const uint32_t*>(mappedData));
@@ -71,7 +71,7 @@
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
     uint32_t myData = 0x01020304;
-    buffer.SetSubData(0, sizeof(myData), &myData);
+    queue.WriteBuffer(buffer, 0, &myData, sizeof(myData));
 
     const void* mappedData = MapReadAsyncAndWait(buffer);
     EXPECT_EQ(myData, *reinterpret_cast<const uint32_t*>(mappedData));
@@ -79,7 +79,7 @@
     UnmapBuffer(buffer);
 
     myData = 0x05060708;
-    buffer.SetSubData(0, sizeof(myData), &myData);
+    queue.WriteBuffer(buffer, 0, &myData, sizeof(myData));
 
     const void* mappedData1 = MapReadAsyncAndWait(buffer);
     EXPECT_EQ(myData, *reinterpret_cast<const uint32_t*>(mappedData1));
@@ -100,7 +100,7 @@
     descriptor.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
 
-    buffer.SetSubData(0, kDataSize * sizeof(uint32_t), myData.data());
+    queue.WriteBuffer(buffer, 0, myData.data(), kDataSize * sizeof(uint32_t));
 
     const void* mappedData = MapReadAsyncAndWait(buffer);
     ASSERT_EQ(0, memcmp(mappedData, myData.data(), kDataSize * sizeof(uint32_t)));
@@ -233,144 +233,6 @@
 
 DAWN_INSTANTIATE_TEST(BufferMapWriteTests, D3D12Backend(), MetalBackend(), OpenGLBackend(), VulkanBackend());
 
-class BufferSetSubDataTests : public DawnTest {
-};
-
-// Test the simplest set sub data: setting one u32 at offset 0.
-TEST_P(BufferSetSubDataTests, SmallDataAtZero) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    uint32_t value = 0x01020304;
-    buffer.SetSubData(0, sizeof(value), &value);
-
-    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
-}
-
-// Test the simplest set sub data: setting nothing
-TEST_P(BufferSetSubDataTests, ZeroSized) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    uint32_t initialValue = 0x42;
-    buffer.SetSubData(0, sizeof(initialValue), &initialValue);
-
-    buffer.SetSubData(0, 0, nullptr);
-
-    // The content of the buffer isn't changed
-    EXPECT_BUFFER_U32_EQ(initialValue, buffer, 0);
-}
-
-// Call SetSubData at offset 0 via a u32 twice. Test that data is updated accoordingly.
-TEST_P(BufferSetSubDataTests, SetTwice) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    uint32_t value = 0x01020304;
-    buffer.SetSubData(0, sizeof(value), &value);
-
-    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
-
-    value = 0x05060708;
-    buffer.SetSubData(0, sizeof(value), &value);
-
-    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
-}
-
-// Test that SetSubData offset works.
-TEST_P(BufferSetSubDataTests, SmallDataAtOffset) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4000;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    constexpr uint64_t kOffset = 2000;
-    uint32_t value = 0x01020304;
-    buffer.SetSubData(kOffset, sizeof(value), &value);
-
-    EXPECT_BUFFER_U32_EQ(value, buffer, kOffset);
-}
-
-// Stress test for many calls to SetSubData
-TEST_P(BufferSetSubDataTests, ManySetSubData) {
-    // Note: Increasing the size of the buffer will likely cause timeout issues.
-    // In D3D12, timeout detection occurs when the GPU scheduler tries but cannot preempt the task
-    // executing these commands in-flight. If this takes longer than ~2s, a device reset occurs and
-    // fails the test. Since GPUs may or may not complete by then, this test must be disabled OR
-    // modified to be well-below the timeout limit.
-
-    // TODO (jiawei.shao@intel.com): find out why this test fails on Intel Vulkan Linux bots.
-    DAWN_SKIP_TEST_IF(IsIntel() && IsVulkan() && IsLinux());
-    // TODO(https://bugs.chromium.org/p/dawn/issues/detail?id=228): Re-enable
-    // once the issue with Metal on 10.14.6 is fixed.
-    DAWN_SKIP_TEST_IF(IsMacOS() && IsIntel() && IsMetal());
-
-    constexpr uint64_t kSize = 4000 * 1000;
-    constexpr uint32_t kElements = 500 * 500;
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = kSize;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    std::vector<uint32_t> expectedData;
-    for (uint32_t i = 0; i < kElements; ++i) {
-        buffer.SetSubData(i * sizeof(uint32_t), sizeof(i), &i);
-        expectedData.push_back(i);
-    }
-
-    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
-}
-
-// Test using SetSubData for lots of data
-TEST_P(BufferSetSubDataTests, LargeSetSubData) {
-    constexpr uint64_t kSize = 4000 * 1000;
-    constexpr uint32_t kElements = 1000 * 1000;
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = kSize;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    std::vector<uint32_t> expectedData;
-    for (uint32_t i = 0; i < kElements; ++i) {
-        expectedData.push_back(i);
-    }
-
-    buffer.SetSubData(0, kElements * sizeof(uint32_t), expectedData.data());
-
-    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
-}
-
-// Test using SetSubData for super large data block
-TEST_P(BufferSetSubDataTests, SuperLargeSetSubData) {
-    constexpr uint64_t kSize = 12000 * 1000;
-    constexpr uint64_t kElements = 3000 * 1000;
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = kSize;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-
-    std::vector<uint32_t> expectedData;
-    for (uint32_t i = 0; i < kElements; ++i) {
-        expectedData.push_back(i);
-    }
-
-    buffer.SetSubData(0, kElements * sizeof(uint32_t), expectedData.data());
-
-    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
-}
-
-DAWN_INSTANTIATE_TEST(BufferSetSubDataTests,
-                     D3D12Backend(),
-                     MetalBackend(),
-                     OpenGLBackend(),
-                     VulkanBackend());
-
 // TODO(enga): These tests should use the testing toggle to initialize resources to 1.
 class CreateBufferMappedTests : public DawnTest {
     protected:
diff --git a/src/tests/end2end/ComputeCopyStorageBufferTests.cpp b/src/tests/end2end/ComputeCopyStorageBufferTests.cpp
index 569c82b..dcb762e 100644
--- a/src/tests/end2end/ComputeCopyStorageBufferTests.cpp
+++ b/src/tests/end2end/ComputeCopyStorageBufferTests.cpp
@@ -48,7 +48,7 @@
     for (uint32_t i = 0; i < kNumUints; ++i) {
         expected[i] = (i + 1u) * 0x11111111u;
     }
-    src.SetSubData(0, sizeof(expected), expected.data());
+    queue.WriteBuffer(src, 0, expected.data(), sizeof(expected));
     EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), src, 0, kNumUints);
 
     // Set up dst storage buffer
@@ -59,7 +59,7 @@
     wgpu::Buffer dst = device.CreateBuffer(&dstDesc);
 
     std::array<uint32_t, kNumUints> zero{};
-    dst.SetSubData(0, sizeof(zero), zero.data());
+    queue.WriteBuffer(dst, 0, zero.data(), sizeof(zero));
 
     // Set up bind group and issue dispatch
     wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
diff --git a/src/tests/end2end/ComputeSharedMemoryTests.cpp b/src/tests/end2end/ComputeSharedMemoryTests.cpp
index 980735d..bb33856 100644
--- a/src/tests/end2end/ComputeSharedMemoryTests.cpp
+++ b/src/tests/end2end/ComputeSharedMemoryTests.cpp
@@ -42,7 +42,7 @@
     wgpu::Buffer dst = device.CreateBuffer(&dstDesc);
 
     const uint32_t zero = 0;
-    dst.SetSubData(0, sizeof(zero), &zero);
+    queue.WriteBuffer(dst, 0, &zero, sizeof(zero));
 
     // Set up bind group and issue dispatch
     wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp
index f9d768d..60e76e1 100644
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -124,8 +124,8 @@
             bufDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
             wgpu::Buffer buffer = device.CreateBuffer(&bufDescriptor);
             std::vector<RGBA8> emptyData(bufferSpec.size / kBytesPerTexel * textureSpec.arraySize);
-            buffer.SetSubData(0, static_cast<uint32_t>(emptyData.size() * sizeof(RGBA8)),
-                              emptyData.data());
+            queue.WriteBuffer(buffer, 0, emptyData.data(),
+                              static_cast<uint32_t>(emptyData.size() * sizeof(RGBA8)));
 
             uint64_t bufferOffset = bufferSpec.offset;
             for (uint32_t slice = 0; slice < textureSpec.arraySize; ++slice) {
@@ -195,8 +195,8 @@
 
         std::vector<RGBA8> bufferData(bufferSpec.size / kBytesPerTexel);
         FillBufferData(bufferData.data(), bufferData.size());
-        buffer.SetSubData(0, static_cast<uint32_t>(bufferData.size() * sizeof(RGBA8)),
-                          bufferData.data());
+        queue.WriteBuffer(buffer, 0, bufferData.data(),
+                          static_cast<uint32_t>(bufferData.size() * sizeof(RGBA8)));
 
         // Create a texture that is `width` x `height` with (`level` + 1) mip levels.
         wgpu::TextureDescriptor descriptor;
diff --git a/src/tests/end2end/DeprecatedAPITests.cpp b/src/tests/end2end/DeprecatedAPITests.cpp
index 76e9e82..087d182 100644
--- a/src/tests/end2end/DeprecatedAPITests.cpp
+++ b/src/tests/end2end/DeprecatedAPITests.cpp
@@ -58,6 +58,30 @@
         }                                                                        \
     } while (0)
 
+// Test that using SetSubData emits a deprecation warning.
+TEST_P(DeprecationTests, SetSubDataDeprecated) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.usage = wgpu::BufferUsage::CopyDst;
+    descriptor.size = 4;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    EXPECT_DEPRECATION_WARNING(buffer.SetSubData(0, 0, nullptr));
+}
+
+// Test that using SetSubData works
+TEST_P(DeprecationTests, SetSubDataStillWorks) {
+    DAWN_SKIP_TEST_IF(IsNull());
+
+    wgpu::BufferDescriptor descriptor;
+    descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc;
+    descriptor.size = 4;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    uint32_t data = 2020;
+    EXPECT_DEPRECATION_WARNING(buffer.SetSubData(0, 4, &data));
+    EXPECT_BUFFER_U32_EQ(data, buffer, 0);
+}
+
 DAWN_INSTANTIATE_TEST(DeprecationTests,
                       D3D12Backend(),
                       MetalBackend(),
diff --git a/src/tests/end2end/DepthSamplingTests.cpp b/src/tests/end2end/DepthSamplingTests.cpp
index a8d08df..7c24a69 100644
--- a/src/tests/end2end/DepthSamplingTests.cpp
+++ b/src/tests/end2end/DepthSamplingTests.cpp
@@ -205,7 +205,7 @@
     }
 
     void UpdateInputTexture(wgpu::CommandEncoder commandEncoder, float textureValue) {
-        mTextureUploadBuffer.SetSubData(0, sizeof(float), &textureValue);
+        queue.WriteBuffer(mTextureUploadBuffer, 0, &textureValue, sizeof(float));
 
         wgpu::BufferCopyView bufferCopyView = {};
         bufferCopyView.buffer = mTextureUploadBuffer;
@@ -315,7 +315,7 @@
                           float compareRef,
                           wgpu::CompareFunction compare,
                           std::vector<float> textureValues) {
-        mUniformBuffer.SetSubData(0, sizeof(float), &compareRef);
+        queue.WriteBuffer(mUniformBuffer, 0, &compareRef, sizeof(float));
 
         wgpu::SamplerDescriptor samplerDesc;
         samplerDesc.compare = compare;
@@ -357,7 +357,7 @@
                           float compareRef,
                           wgpu::CompareFunction compare,
                           std::vector<float> textureValues) {
-        mUniformBuffer.SetSubData(0, sizeof(float), &compareRef);
+        queue.WriteBuffer(mUniformBuffer, 0, &compareRef, sizeof(float));
 
         wgpu::SamplerDescriptor samplerDesc;
         samplerDesc.compare = compare;
diff --git a/src/tests/end2end/DeviceLostTests.cpp b/src/tests/end2end/DeviceLostTests.cpp
index 5aa0263..e473533 100644
--- a/src/tests/end2end/DeviceLostTests.cpp
+++ b/src/tests/end2end/DeviceLostTests.cpp
@@ -320,8 +320,8 @@
     SetCallbackAndLoseForTesting();
 }
 
-// Test that SetSubData fails after device is lost
-TEST_P(DeviceLostTest, SetSubDataFails) {
+// Test that WriteBuffer fails after device is lost
+TEST_P(DeviceLostTest, WriteBufferFails) {
     wgpu::BufferDescriptor bufferDescriptor;
     bufferDescriptor.size = sizeof(float);
     bufferDescriptor.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst;
@@ -329,8 +329,8 @@
     wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
 
     SetCallbackAndLoseForTesting();
-    std::array<float, 1> data = {12};
-    ASSERT_DEVICE_ERROR(buffer.SetSubData(0, sizeof(float), data.data()));
+    float data = 12.0f;
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buffer, 0, &data, sizeof(data)));
 }
 
 // Test that Command Encoder Finish fails when device lost
diff --git a/src/tests/end2end/GpuMemorySynchronizationTests.cpp b/src/tests/end2end/GpuMemorySynchronizationTests.cpp
index d396a5f..3b4dcd1 100644
--- a/src/tests/end2end/GpuMemorySynchronizationTests.cpp
+++ b/src/tests/end2end/GpuMemorySynchronizationTests.cpp
@@ -29,7 +29,7 @@
         wgpu::Buffer buffer = device.CreateBuffer(&srcDesc);
 
         int myData = 0;
-        buffer.SetSubData(0, sizeof(myData), &myData);
+        queue.WriteBuffer(buffer, 0, &myData, sizeof(myData));
         return buffer;
     }
 
@@ -432,7 +432,7 @@
         wgpu::Buffer buffer = device.CreateBuffer(&srcDesc);
 
         std::vector<uint8_t> zeros(size, 0);
-        buffer.SetSubData(0, size, zeros.data());
+        queue.WriteBuffer(buffer, 0, zeros.data(), size);
 
         return buffer;
     }
diff --git a/src/tests/end2end/QueueTests.cpp b/src/tests/end2end/QueueTests.cpp
index 710caec..bc83a1b 100644
--- a/src/tests/end2end/QueueTests.cpp
+++ b/src/tests/end2end/QueueTests.cpp
@@ -34,3 +34,140 @@
                       NullBackend(),
                       OpenGLBackend(),
                       VulkanBackend());
+
+class QueueWriteBufferTests : public DawnTest {};
+
+// Test the simplest WriteBuffer setting one u32 at offset 0.
+TEST_P(QueueWriteBufferTests, SmallDataAtZero) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    uint32_t value = 0x01020304;
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
+
+    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
+}
+
+// Test an empty WriteBuffer
+TEST_P(QueueWriteBufferTests, ZeroSized) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    uint32_t initialValue = 0x42;
+    queue.WriteBuffer(buffer, 0, &initialValue, sizeof(initialValue));
+
+    queue.WriteBuffer(buffer, 0, nullptr, 0);
+
+    // The content of the buffer isn't changed
+    EXPECT_BUFFER_U32_EQ(initialValue, buffer, 0);
+}
+
+// Call WriteBuffer at offset 0 via a u32 twice. Test that data is updated accoordingly.
+TEST_P(QueueWriteBufferTests, SetTwice) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    uint32_t value = 0x01020304;
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
+
+    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
+
+    value = 0x05060708;
+    queue.WriteBuffer(buffer, 0, &value, sizeof(value));
+
+    EXPECT_BUFFER_U32_EQ(value, buffer, 0);
+}
+
+// Test that WriteBuffer offset works.
+TEST_P(QueueWriteBufferTests, SmallDataAtOffset) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4000;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    constexpr uint64_t kOffset = 2000;
+    uint32_t value = 0x01020304;
+    queue.WriteBuffer(buffer, kOffset, &value, sizeof(value));
+
+    EXPECT_BUFFER_U32_EQ(value, buffer, kOffset);
+}
+
+// Stress test for many calls to WriteBuffer
+TEST_P(QueueWriteBufferTests, ManyWriteBuffer) {
+    // Note: Increasing the size of the buffer will likely cause timeout issues.
+    // In D3D12, timeout detection occurs when the GPU scheduler tries but cannot preempt the task
+    // executing these commands in-flight. If this takes longer than ~2s, a device reset occurs and
+    // fails the test. Since GPUs may or may not complete by then, this test must be disabled OR
+    // modified to be well-below the timeout limit.
+
+    // TODO (jiawei.shao@intel.com): find out why this test fails on Intel Vulkan Linux bots.
+    DAWN_SKIP_TEST_IF(IsIntel() && IsVulkan() && IsLinux());
+    // TODO(https://bugs.chromium.org/p/dawn/issues/detail?id=228): Re-enable
+    // once the issue with Metal on 10.14.6 is fixed.
+    DAWN_SKIP_TEST_IF(IsMacOS() && IsIntel() && IsMetal());
+
+    constexpr uint64_t kSize = 4000 * 1000;
+    constexpr uint32_t kElements = 500 * 500;
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = kSize;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    std::vector<uint32_t> expectedData;
+    for (uint32_t i = 0; i < kElements; ++i) {
+        queue.WriteBuffer(buffer, i * sizeof(uint32_t), &i, sizeof(i));
+        expectedData.push_back(i);
+    }
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
+}
+
+// Test using WriteBuffer for lots of data
+TEST_P(QueueWriteBufferTests, LargeWriteBuffer) {
+    constexpr uint64_t kSize = 4000 * 1000;
+    constexpr uint32_t kElements = 1000 * 1000;
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = kSize;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    std::vector<uint32_t> expectedData;
+    for (uint32_t i = 0; i < kElements; ++i) {
+        expectedData.push_back(i);
+    }
+
+    queue.WriteBuffer(buffer, 0, expectedData.data(), kElements * sizeof(uint32_t));
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
+}
+
+// Test using WriteBuffer for super large data block
+TEST_P(QueueWriteBufferTests, SuperLargeWriteBuffer) {
+    constexpr uint64_t kSize = 12000 * 1000;
+    constexpr uint64_t kElements = 3000 * 1000;
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = kSize;
+    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+    std::vector<uint32_t> expectedData;
+    for (uint32_t i = 0; i < kElements; ++i) {
+        expectedData.push_back(i);
+    }
+
+    queue.WriteBuffer(buffer, 0, expectedData.data(), kElements * sizeof(uint32_t));
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
+}
+
+DAWN_INSTANTIATE_TEST(QueueWriteBufferTests,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      OpenGLBackend(),
+                      VulkanBackend());
diff --git a/src/tests/end2end/TextureFormatTests.cpp b/src/tests/end2end/TextureFormatTests.cpp
index cad0bc9..b6fbdb1 100644
--- a/src/tests/end2end/TextureFormatTests.cpp
+++ b/src/tests/end2end/TextureFormatTests.cpp
@@ -224,7 +224,7 @@
         ASSERT(sampleDataSize % sampleFormatInfo.texelByteSize == 0);
         uint32_t width = sampleDataSize / sampleFormatInfo.texelByteSize;
 
-        // The input data must be a multiple of 4 byte in length for setSubData
+        // The input data must be a multiple of 4 byte in length for WriteBuffer
         ASSERT(sampleDataSize % 4 == 0);
         ASSERT(expectedRenderDataSize % 4 == 0);
 
diff --git a/src/tests/end2end/TextureZeroInitTests.cpp b/src/tests/end2end/TextureZeroInitTests.cpp
index d6b0df8..40962f5 100644
--- a/src/tests/end2end/TextureZeroInitTests.cpp
+++ b/src/tests/end2end/TextureZeroInitTests.cpp
@@ -586,7 +586,7 @@
     wgpu::Buffer bufferTex = device.CreateBuffer(&bufferDescriptor);
     // Add data to buffer to ensure it is initialized
     uint32_t data = 100;
-    bufferTex.SetSubData(0, sizeof(data), &data);
+    queue.WriteBuffer(bufferTex, 0, &data, sizeof(data));
 
     wgpu::SamplerDescriptor samplerDesc = utils::GetDefaultSamplerDescriptor();
     wgpu::Sampler sampler = device.CreateSampler(&samplerDesc);
diff --git a/src/tests/perf_tests/BufferUploadPerf.cpp b/src/tests/perf_tests/BufferUploadPerf.cpp
index 5849bba..49e52f5 100644
--- a/src/tests/perf_tests/BufferUploadPerf.cpp
+++ b/src/tests/perf_tests/BufferUploadPerf.cpp
@@ -22,7 +22,7 @@
     constexpr unsigned int kNumIterations = 50;
 
     enum class UploadMethod {
-        SetSubData,
+        WriteBuffer,
         CreateBufferMapped,
     };
 
@@ -52,8 +52,8 @@
         ostream << static_cast<const AdapterTestParam&>(param);
 
         switch (param.uploadMethod) {
-            case UploadMethod::SetSubData:
-                ostream << "_SetSubData";
+            case UploadMethod::WriteBuffer:
+                ostream << "_WriteBuffer";
                 break;
             case UploadMethod::CreateBufferMapped:
                 ostream << "_CreateBufferMapped";
@@ -113,11 +113,11 @@
 
 void BufferUploadPerf::Step() {
     switch (GetParam().uploadMethod) {
-        case UploadMethod::SetSubData: {
+        case UploadMethod::WriteBuffer: {
             for (unsigned int i = 0; i < kNumIterations; ++i) {
-                dst.SetSubData(0, data.size(), data.data());
+                queue.WriteBuffer(dst, 0, data.data(), data.size());
             }
-            // Make sure all SetSubData's are flushed.
+            // Make sure all WriteBuffer's are flushed.
             queue.Submit(0, nullptr);
             break;
         }
@@ -150,7 +150,7 @@
 DAWN_INSTANTIATE_PERF_TEST_SUITE_P(BufferUploadPerf,
                                    {D3D12Backend(), MetalBackend(), OpenGLBackend(),
                                     VulkanBackend()},
-                                   {UploadMethod::SetSubData, UploadMethod::CreateBufferMapped},
+                                   {UploadMethod::WriteBuffer, UploadMethod::CreateBufferMapped},
                                    {UploadSize::BufferSize_1KB, UploadSize::BufferSize_64KB,
                                     UploadSize::BufferSize_1MB, UploadSize::BufferSize_4MB,
                                     UploadSize::BufferSize_16MB});
diff --git a/src/tests/perf_tests/DrawCallPerf.cpp b/src/tests/perf_tests/DrawCallPerf.cpp
index 72e837a..1b1fe58 100644
--- a/src/tests/perf_tests/DrawCallPerf.cpp
+++ b/src/tests/perf_tests/DrawCallPerf.cpp
@@ -568,18 +568,20 @@
         switch (GetParam().bindGroupType) {
             case BindGroup::NoChange:
             case BindGroup::Redundant:
-                mUniformBuffers[0].SetSubData(0, 3 * sizeof(float), mUniformBufferData.data());
+                queue.WriteBuffer(mUniformBuffers[0], 0, mUniformBufferData.data(),
+                                  3 * sizeof(float));
                 break;
             case BindGroup::NoReuse:
             case BindGroup::Multiple:
                 for (uint32_t i = 0; i < kNumDraws; ++i) {
-                    mUniformBuffers[i].SetSubData(
-                        0, 3 * sizeof(float), mUniformBufferData.data() + i * mNumUniformFloats);
+                    queue.WriteBuffer(mUniformBuffers[i], 0,
+                                      mUniformBufferData.data() + i * mNumUniformFloats,
+                                      3 * sizeof(float));
                 }
                 break;
             case BindGroup::Dynamic:
-                mUniformBuffers[0].SetSubData(0, mUniformBufferData.size() * sizeof(float),
-                                              mUniformBufferData.data());
+                queue.WriteBuffer(mUniformBuffers[0], 0, mUniformBufferData.data(),
+                                  mUniformBufferData.size() * sizeof(float));
                 break;
         }
     }
diff --git a/src/tests/unittests/validation/BufferValidationTests.cpp b/src/tests/unittests/validation/BufferValidationTests.cpp
index 1bda448..0f74d66 100644
--- a/src/tests/unittests/validation/BufferValidationTests.cpp
+++ b/src/tests/unittests/validation/BufferValidationTests.cpp
@@ -74,13 +74,6 @@
 
           return device.CreateBuffer(&descriptor);
       }
-      wgpu::Buffer CreateSetSubDataBuffer(uint64_t size) {
-          wgpu::BufferDescriptor descriptor;
-          descriptor.size = size;
-          descriptor.usage = wgpu::BufferUsage::CopyDst;
-
-          return device.CreateBuffer(&descriptor);
-      }
 
       wgpu::CreateBufferMappedResult CreateBufferMapped(uint64_t size, wgpu::BufferUsage usage) {
           wgpu::BufferDescriptor descriptor;
@@ -429,72 +422,6 @@
     queue.Submit(0, nullptr);
 }
 
-// Test the success case for Buffer::SetSubData
-TEST_F(BufferValidationTest, SetSubDataSuccess) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(4);
-
-    uint32_t foo = 0x01020304;
-    buf.SetSubData(0, sizeof(foo), &foo);
-}
-
-// Test error case for SetSubData out of bounds
-TEST_F(BufferValidationTest, SetSubDataOutOfBounds) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(1);
-
-    uint8_t foo[2] = {0, 0};
-    ASSERT_DEVICE_ERROR(buf.SetSubData(0, 2, foo));
-}
-
-// Test error case for SetSubData out of bounds with an overflow
-TEST_F(BufferValidationTest, SetSubDataOutOfBoundsOverflow) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(1000);
-
-    uint8_t foo[2] = {0, 0};
-
-    // An offset that when added to "2" would overflow to be zero and pass validation without
-    // overflow checks.
-    uint64_t offset = uint64_t(int64_t(0) - int64_t(2));
-
-    ASSERT_DEVICE_ERROR(buf.SetSubData(offset, 2, foo));
-}
-
-// Test error case for SetSubData with the wrong usage
-TEST_F(BufferValidationTest, SetSubDataWrongUsage) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = wgpu::BufferUsage::Vertex;
-
-    wgpu::Buffer buf = device.CreateBuffer(&descriptor);
-
-    uint8_t foo = 0;
-    ASSERT_DEVICE_ERROR(buf.SetSubData(0, sizeof(foo), &foo));
-}
-
-// Test SetSubData with unaligned size
-TEST_F(BufferValidationTest, SetSubDataWithUnalignedSize) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-
-    wgpu::Buffer buf = device.CreateBuffer(&descriptor);
-
-    uint8_t value = 123;
-    ASSERT_DEVICE_ERROR(buf.SetSubData(0, sizeof(value), &value));
-}
-
-// Test SetSubData with unaligned offset
-TEST_F(BufferValidationTest, SetSubDataWithUnalignedOffset) {
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4000;
-    descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-
-    wgpu::Buffer buf = device.CreateBuffer(&descriptor);
-
-    uint64_t kOffset = 2999;
-    uint32_t value = 0x01020304;
-    ASSERT_DEVICE_ERROR(buf.SetSubData(kOffset, sizeof(value), &value));
-}
-
 // Test that it is valid to destroy an unmapped buffer
 TEST_F(BufferValidationTest, DestroyUnmappedBuffer) {
     {
@@ -547,7 +474,7 @@
 
 // Test that it is valid to Destroy a destroyed buffer
 TEST_F(BufferValidationTest, DestroyDestroyedBuffer) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(4);
+    wgpu::Buffer buf = CreateMapWriteBuffer(4);
     buf.Destroy();
     buf.Destroy();
 }
@@ -580,14 +507,6 @@
     }
 }
 
-// Test that it is invalid to call SetSubData on a destroyed buffer
-TEST_F(BufferValidationTest, SetSubDataDestroyedBuffer) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(4);
-    buf.Destroy();
-    uint8_t foo = 0;
-    ASSERT_DEVICE_ERROR(buf.SetSubData(0, sizeof(foo), &foo));
-}
-
 // Test that is is invalid to Map a mapped buffer
 TEST_F(BufferValidationTest, MapMappedBuffer) {
     {
@@ -618,24 +537,6 @@
     }
 }
 
-// Test that it is invalid to call SetSubData on a mapped buffer
-TEST_F(BufferValidationTest, SetSubDataMappedBuffer) {
-    {
-        wgpu::Buffer buf = CreateMapReadBuffer(4);
-        buf.MapReadAsync(ToMockBufferMapReadCallback, nullptr);
-        uint8_t foo = 0;
-        ASSERT_DEVICE_ERROR(buf.SetSubData(0, sizeof(foo), &foo));
-        queue.Submit(0, nullptr);
-    }
-    {
-        wgpu::Buffer buf = CreateMapWriteBuffer(4);
-        buf.MapWriteAsync(ToMockBufferMapWriteCallback, nullptr);
-        uint8_t foo = 0;
-        ASSERT_DEVICE_ERROR(buf.SetSubData(0, sizeof(foo), &foo));
-        queue.Submit(0, nullptr);
-    }
-}
-
 // Test that it is valid to submit a buffer in a queue with a map usage if it is unmapped
 TEST_F(BufferValidationTest, SubmitBufferWithMapUsage) {
     wgpu::BufferDescriptor descriptorA;
@@ -732,7 +633,11 @@
 
 // Test that a map usage is required to call Unmap
 TEST_F(BufferValidationTest, UnmapWithoutMapUsage) {
-    wgpu::Buffer buf = CreateSetSubDataBuffer(4);
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer buf = device.CreateBuffer(&descriptor);
+
     ASSERT_DEVICE_ERROR(buf.Unmap());
 }
 
diff --git a/src/tests/unittests/validation/QueueSubmitValidationTests.cpp b/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
index 66e0691..a74dd7e 100644
--- a/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
+++ b/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
@@ -66,4 +66,125 @@
     queue.Submit(1, &commands);
 }
 
+class QueueWriteBufferValidationTest : public ValidationTest {
+  private:
+    void SetUp() override {
+        ValidationTest::SetUp();
+        queue = device.GetDefaultQueue();
+    }
+
+  protected:
+    wgpu::Buffer CreateBuffer(uint64_t size) {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = size;
+        descriptor.usage = wgpu::BufferUsage::CopyDst;
+        return device.CreateBuffer(&descriptor);
+    }
+
+    wgpu::Queue queue;
+};
+
+// Test the success case for WriteBuffer
+TEST_F(QueueWriteBufferValidationTest, Success) {
+    wgpu::Buffer buf = CreateBuffer(4);
+
+    uint32_t foo = 0x01020304;
+    queue.WriteBuffer(buf, 0, &foo, sizeof(foo));
+}
+
+// Test error case for WriteBuffer out of bounds
+TEST_F(QueueWriteBufferValidationTest, OutOfBounds) {
+    wgpu::Buffer buf = CreateBuffer(4);
+
+    uint32_t foo[2] = {0, 0};
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, foo, 8));
+}
+
+// Test error case for WriteBuffer out of bounds with an overflow
+TEST_F(QueueWriteBufferValidationTest, OutOfBoundsOverflow) {
+    wgpu::Buffer buf = CreateBuffer(1024);
+
+    uint32_t foo[2] = {0, 0};
+
+    // An offset that when added to "4" would overflow to be zero and pass validation without
+    // overflow checks.
+    uint64_t offset = uint64_t(int64_t(0) - int64_t(4));
+
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, offset, foo, 4));
+}
+
+// Test error case for WriteBuffer with the wrong usage
+TEST_F(QueueWriteBufferValidationTest, WrongUsage) {
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 4;
+    descriptor.usage = wgpu::BufferUsage::Vertex;
+    wgpu::Buffer buf = device.CreateBuffer(&descriptor);
+
+    uint32_t foo = 0;
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, &foo, sizeof(foo)));
+}
+
+// Test WriteBuffer with unaligned size
+TEST_F(QueueWriteBufferValidationTest, UnalignedSize) {
+    wgpu::Buffer buf = CreateBuffer(4);
+
+    uint16_t value = 123;
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, &value, sizeof(value)));
+}
+
+// Test WriteBuffer with unaligned offset
+TEST_F(QueueWriteBufferValidationTest, UnalignedOffset) {
+    wgpu::Buffer buf = CreateBuffer(8);
+
+    uint32_t value = 0x01020304;
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 2, &value, sizeof(value)));
+}
+
+// Test WriteBuffer with destroyed buffer
+TEST_F(QueueWriteBufferValidationTest, DestroyedBuffer) {
+    wgpu::Buffer buf = CreateBuffer(4);
+    buf.Destroy();
+
+    uint32_t value = 0;
+    ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, &value, sizeof(value)));
+}
+
+// Test WriteBuffer with mapped buffer
+TEST_F(QueueWriteBufferValidationTest, MappedBuffer) {
+    // CreateBufferMapped
+    {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = 4;
+        descriptor.usage = wgpu::BufferUsage::CopyDst;
+        wgpu::CreateBufferMappedResult result = device.CreateBufferMapped(&descriptor);
+
+        uint32_t value = 0;
+        ASSERT_DEVICE_ERROR(queue.WriteBuffer(result.buffer, 0, &value, sizeof(value)));
+    }
+
+    // MapReadAsync
+    {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = 4;
+        descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
+        wgpu::Buffer buf = device.CreateBuffer(&descriptor);
+
+        buf.MapReadAsync(nullptr, nullptr);
+        uint32_t value = 0;
+        ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, &value, sizeof(value)));
+    }
+
+    // MapWriteAsync
+    {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = 4;
+        descriptor.usage = wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead;
+        wgpu::Buffer buf = device.CreateBuffer(&descriptor);
+
+        buf.MapReadAsync(nullptr, nullptr);
+        uint32_t value = 0;
+        ASSERT_DEVICE_ERROR(queue.WriteBuffer(buf, 0, &value, sizeof(value)));
+    }
+}
+
 }  // anonymous namespace
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index b6eb4a6..9b4ffe7 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -601,7 +601,7 @@
     // Re-encode the first bindgroup again.
     {
         std::array<float, 4> greenColor = {0, 1, 0, 1};
-        firstUniformBuffer.SetSubData(0, sizeof(greenColor), &greenColor);
+        queue.WriteBuffer(firstUniformBuffer, 0, &greenColor, sizeof(greenColor));
 
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
         {
diff --git a/src/tests/white_box/D3D12ResidencyTests.cpp b/src/tests/white_box/D3D12ResidencyTests.cpp
index e58e48a..8d958ab5 100644
--- a/src/tests/white_box/D3D12ResidencyTests.cpp
+++ b/src/tests/white_box/D3D12ResidencyTests.cpp
@@ -217,7 +217,7 @@
     wgpu::Buffer buffer = CreateBuffer(4, kMapReadBufferUsage);
 
     uint32_t data = 12345;
-    buffer.SetSubData(0, sizeof(uint32_t), &data);
+    queue.WriteBuffer(buffer, 0, &data, sizeof(uint32_t));
 
     // The mappable buffer should be resident.
     EXPECT_TRUE(CheckIfBufferIsResident(buffer));
diff --git a/src/utils/WGPUHelpers.cpp b/src/utils/WGPUHelpers.cpp
index 5b016b0..f0d1242 100644
--- a/src/utils/WGPUHelpers.cpp
+++ b/src/utils/WGPUHelpers.cpp
@@ -161,9 +161,9 @@
         wgpu::BufferDescriptor descriptor;
         descriptor.size = size;
         descriptor.usage = usage | wgpu::BufferUsage::CopyDst;
-
         wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
-        buffer.SetSubData(0, size, data);
+
+        device.GetDefaultQueue().WriteBuffer(buffer, 0, data, size);
         return buffer;
     }