Byte counting WriteBuffer/Texture to submit

Only tag to submit when the total size is larger than the threshold,
so that we can make as few submits as possible meanwhile avoiding OOM.

Bug: chromium:1258986
Change-Id: I7190e1bb942bfaffc5cd424ce4743173735b25e3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106418
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 3ec0328..a4e2e17 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -1918,4 +1918,28 @@
     return HasPendingCommands() ? GetPendingCommandSerial() : GetLastSubmittedCommandSerial();
 }
 
+MaybeError DeviceBase::CopyFromStagingToBuffer(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
+    DAWN_TRY(
+        CopyFromStagingToBufferImpl(source, sourceOffset, destination, destinationOffset, size));
+    if (GetDynamicUploader()->ShouldFlush()) {
+        ForceEventualFlushOfCommands();
+    }
+    return {};
+}
+
+MaybeError DeviceBase::CopyFromStagingToTexture(const StagingBufferBase* source,
+                                                const TextureDataLayout& src,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
+    DAWN_TRY(CopyFromStagingToTextureImpl(source, src, dst, copySizePixels));
+    if (GetDynamicUploader()->ShouldFlush()) {
+        ForceEventualFlushOfCommands();
+    }
+    return {};
+}
+
 }  // namespace dawn::native
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index 28ea5b7..6fd0d5a 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -298,15 +298,15 @@
     void StoreCachedBlob(const CacheKey& key, const Blob& blob);
 
     virtual ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) = 0;
-    virtual MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
-                                               uint64_t sourceOffset,
-                                               BufferBase* destination,
-                                               uint64_t destinationOffset,
-                                               uint64_t size) = 0;
-    virtual MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                                const TextureDataLayout& src,
-                                                TextureCopy* dst,
-                                                const Extent3D& copySizePixels) = 0;
+    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
+                                       uint64_t sourceOffset,
+                                       BufferBase* destination,
+                                       uint64_t destinationOffset,
+                                       uint64_t size);
+    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
+                                        const TextureDataLayout& src,
+                                        TextureCopy* dst,
+                                        const Extent3D& copySizePixels);
 
     DynamicUploader* GetDynamicUploader() const;
 
@@ -405,6 +405,15 @@
     // The serial by which time all currently submitted or pending operations will be completed.
     ExecutionSerial GetScheduledWorkDoneSerial() const;
 
+    // For the commands being internally recorded in backend, that were not urgent to submit, this
+    // method makes them to be submitted as soon as possbile in next ticks.
+    virtual void ForceEventualFlushOfCommands() = 0;
+
+    // In the 'Normal' mode, currently recorded commands in the backend normally will be actually
+    // submitted in the next Tick. However in the 'Passive' mode, the submission will be postponed
+    // as late as possible, for example, until the client has explictly issued a submission.
+    enum class SubmitMode { Normal, Passive };
+
   protected:
     // Constructor used only for mocking and testing.
     DeviceBase();
@@ -515,6 +524,16 @@
     // Indicates whether the backend has pending commands to be submitted as soon as possible.
     virtual bool HasPendingCommands() const = 0;
 
+    virtual MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                                   uint64_t sourceOffset,
+                                                   BufferBase* destination,
+                                                   uint64_t destinationOffset,
+                                                   uint64_t size) = 0;
+    virtual MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                    const TextureDataLayout& src,
+                                                    TextureCopy* dst,
+                                                    const Extent3D& copySizePixels) = 0;
+
     wgpu::ErrorCallback mUncapturedErrorCallback = nullptr;
     void* mUncapturedErrorUserdata = nullptr;
 
diff --git a/src/dawn/native/DynamicUploader.cpp b/src/dawn/native/DynamicUploader.cpp
index bae374f..9dca057 100644
--- a/src/dawn/native/DynamicUploader.cpp
+++ b/src/dawn/native/DynamicUploader.cpp
@@ -126,4 +126,25 @@
     uploadHandle.startOffset += additionalOffset;
     return uploadHandle;
 }
+
+bool DynamicUploader::ShouldFlush() {
+    uint64_t kTotalAllocatedSizeThreshold = 64 * 1024 * 1024;
+    // We use total allocated size instead of pending-upload size to prevent Dawn from allocating
+    // too much GPU memory so that the risk of OOM can be minimized.
+    return GetTotalAllocatedSize() > kTotalAllocatedSizeThreshold;
+}
+
+uint64_t DynamicUploader::GetTotalAllocatedSize() {
+    uint64_t size = 0;
+    for (const auto& buffer : mReleasedStagingBuffers.IterateAll()) {
+        size += buffer->GetSize();
+    }
+    for (const auto& buffer : mRingBuffers) {
+        if (buffer->mStagingBuffer != nullptr) {
+            size += buffer->mStagingBuffer->GetSize();
+        }
+    }
+    return size;
+}
+
 }  // namespace dawn::native
diff --git a/src/dawn/native/DynamicUploader.h b/src/dawn/native/DynamicUploader.h
index 0317e8d..9cc6a83 100644
--- a/src/dawn/native/DynamicUploader.h
+++ b/src/dawn/native/DynamicUploader.h
@@ -49,8 +49,11 @@
                                          uint64_t offsetAlignment);
     void Deallocate(ExecutionSerial lastCompletedSerial);
 
+    bool ShouldFlush();
+
   private:
     static constexpr uint64_t kRingBufferSize = 4 * 1024 * 1024;
+    uint64_t GetTotalAllocatedSize();
 
     struct RingBuffer {
         std::unique_ptr<StagingBufferBase> mStagingBuffer;
diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp
index 801e8ed..d1d6025 100644
--- a/src/dawn/native/Queue.cpp
+++ b/src/dawn/native/Queue.cpp
@@ -225,6 +225,7 @@
 }
 
 void QueueBase::TrackTask(std::unique_ptr<TrackTaskCallback> task) {
+    GetDevice()->ForceEventualFlushOfCommands();
     // we can move the task to the callback task manager, as it's ready to be called if there are no
     // scheduled commands.
     if (!GetDevice()->HasScheduledCommands()) {
diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp
index f880760..eeb19c3 100644
--- a/src/dawn/native/d3d12/BufferD3D12.cpp
+++ b/src/dawn/native/d3d12/BufferD3D12.cpp
@@ -488,8 +488,8 @@
 
         memset(uploadHandle.mappedBuffer, clearValue, size);
 
-        device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
-                                            uploadHandle.startOffset, this, offset, size);
+        device->CopyFromStagingToBufferHelper(commandContext, uploadHandle.stagingBuffer,
+                                              uploadHandle.startOffset, this, offset, size);
     }
 
     return {};
diff --git a/src/dawn/native/d3d12/CommandRecordingContext.cpp b/src/dawn/native/d3d12/CommandRecordingContext.cpp
index cd44e4a..6a368a0 100644
--- a/src/dawn/native/d3d12/CommandRecordingContext.cpp
+++ b/src/dawn/native/d3d12/CommandRecordingContext.cpp
@@ -60,6 +60,7 @@
     }
 
     mIsOpen = true;
+    mNeedsSubmit = false;
 
     return {};
 }
@@ -128,6 +129,7 @@
         }
 
         mIsOpen = false;
+        mNeedsSubmit = false;
         mSharedTextures.clear();
         mHeapsPendingUsage.clear();
         mTempBuffers.clear();
@@ -162,6 +164,7 @@
     mD3d12CommandList.Reset();
     mD3d12CommandList4.Reset();
     mIsOpen = false;
+    mNeedsSubmit = false;
     mSharedTextures.clear();
     mHeapsPendingUsage.clear();
     mTempBuffers.clear();
@@ -171,6 +174,14 @@
     return mIsOpen;
 }
 
+bool CommandRecordingContext::NeedsSubmit() const {
+    return mNeedsSubmit;
+}
+
+void CommandRecordingContext::SetNeedsSubmit() {
+    mNeedsSubmit = true;
+}
+
 void CommandRecordingContext::AddToTempBuffers(Ref<Buffer> tempBuffer) {
     mTempBuffers.emplace_back(tempBuffer);
 }
diff --git a/src/dawn/native/d3d12/CommandRecordingContext.h b/src/dawn/native/d3d12/CommandRecordingContext.h
index 80b6204..49a41c1 100644
--- a/src/dawn/native/d3d12/CommandRecordingContext.h
+++ b/src/dawn/native/d3d12/CommandRecordingContext.h
@@ -37,6 +37,8 @@
     ID3D12GraphicsCommandList4* GetCommandList4() const;
     void Release();
     bool IsOpen() const;
+    bool NeedsSubmit() const;
+    void SetNeedsSubmit();
 
     MaybeError ExecuteCommandList(Device* device);
 
@@ -48,6 +50,7 @@
     ComPtr<ID3D12GraphicsCommandList> mD3d12CommandList;
     ComPtr<ID3D12GraphicsCommandList4> mD3d12CommandList4;
     bool mIsOpen = false;
+    bool mNeedsSubmit = false;
     std::set<Texture*> mSharedTextures;
     std::vector<Heap*> mHeapsPendingUsage;
 
diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp
index 73bb13b..d26fefd 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@@ -274,12 +274,16 @@
     return mResidencyManager.get();
 }
 
-ResultOrError<CommandRecordingContext*> Device::GetPendingCommandContext() {
+ResultOrError<CommandRecordingContext*> Device::GetPendingCommandContext(
+    Device::SubmitMode submitMode) {
     // Callers of GetPendingCommandList do so to record commands. Only reserve a command
     // allocator when it is needed so we don't submit empty command lists
     if (!mPendingCommands.IsOpen()) {
         DAWN_TRY(mPendingCommands.Open(mD3d12Device.Get(), mCommandAllocatorManager.get()));
     }
+    if (submitMode == Device::SubmitMode::Normal) {
+        mPendingCommands.SetNeedsSubmit();
+    }
     return &mPendingCommands;
 }
 
@@ -309,9 +313,9 @@
 
         memset(uploadHandle.mappedBuffer, 0u, kZeroBufferSize);
 
-        CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
-                                    uploadHandle.startOffset, mZeroBuffer.Get(), 0,
-                                    kZeroBufferSize);
+        CopyFromStagingToBufferHelper(commandContext, uploadHandle.stagingBuffer,
+                                      uploadHandle.startOffset, mZeroBuffer.Get(), 0,
+                                      kZeroBufferSize);
 
         mZeroBuffer->SetIsDataInitialized();
     }
@@ -346,7 +350,7 @@
     mDepthStencilViewAllocator->Tick(completedSerial);
     mUsedComObjectRefs.ClearUpTo(completedSerial);
 
-    if (mPendingCommands.IsOpen()) {
+    if (mPendingCommands.IsOpen() && mPendingCommands.NeedsSubmit()) {
         DAWN_TRY(ExecutePendingCommandContext());
         DAWN_TRY(NextSerial());
     }
@@ -401,7 +405,13 @@
 }
 
 bool Device::HasPendingCommands() const {
-    return mPendingCommands.IsOpen();
+    return mPendingCommands.NeedsSubmit();
+}
+
+void Device::ForceEventualFlushOfCommands() {
+    if (mPendingCommands.IsOpen()) {
+        mPendingCommands.SetNeedsSubmit();
+    }
 }
 
 MaybeError Device::ExecutePendingCommandContext() {
@@ -484,13 +494,13 @@
     return std::move(stagingBuffer);
 }
 
-MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source,
-                                           uint64_t sourceOffset,
-                                           BufferBase* destination,
-                                           uint64_t destinationOffset,
-                                           uint64_t size) {
+MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
     CommandRecordingContext* commandRecordingContext;
-    DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext());
+    DAWN_TRY_ASSIGN(commandRecordingContext, GetPendingCommandContext(Device::SubmitMode::Passive));
 
     Buffer* dstBuffer = ToBackend(destination);
 
@@ -499,18 +509,18 @@
                                  commandRecordingContext, destinationOffset, size));
     DAWN_UNUSED(cleared);
 
-    CopyFromStagingToBufferImpl(commandRecordingContext, source, sourceOffset, destination,
-                                destinationOffset, size);
+    CopyFromStagingToBufferHelper(commandRecordingContext, source, sourceOffset, destination,
+                                  destinationOffset, size);
 
     return {};
 }
 
-void Device::CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext,
-                                         StagingBufferBase* source,
-                                         uint64_t sourceOffset,
-                                         BufferBase* destination,
-                                         uint64_t destinationOffset,
-                                         uint64_t size) {
+void Device::CopyFromStagingToBufferHelper(CommandRecordingContext* commandContext,
+                                           StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) {
     ASSERT(commandContext != nullptr);
     Buffer* dstBuffer = ToBackend(destination);
     StagingBuffer* srcBuffer = ToBackend(source);
@@ -521,12 +531,12 @@
                                                        sourceOffset, size);
 }
 
-MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source,
-                                            const TextureDataLayout& src,
-                                            TextureCopy* dst,
-                                            const Extent3D& copySizePixels) {
+MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                const TextureDataLayout& src,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
     CommandRecordingContext* commandContext;
-    DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext());
+    DAWN_TRY_ASSIGN(commandContext, GetPendingCommandContext(Device::SubmitMode::Passive));
     Texture* texture = ToBackend(dst->texture.Get());
 
     SubresourceRange range = GetSubresourcesAffectedByCopy(*dst, copySizePixels);
diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h
index 70f6fab..ccd5cb0 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.h
+++ b/src/dawn/native/d3d12/DeviceD3D12.h
@@ -77,7 +77,8 @@
     ComPtr<IDxcCompiler> GetDxcCompiler() const;
     ComPtr<IDxcValidator> GetDxcValidator() const;
 
-    ResultOrError<CommandRecordingContext*> GetPendingCommandContext();
+    ResultOrError<CommandRecordingContext*> GetPendingCommandContext(
+        Device::SubmitMode submitMode = Device::SubmitMode::Normal);
 
     MaybeError ClearBufferToZero(CommandRecordingContext* commandContext,
                                  BufferBase* destination,
@@ -91,26 +92,28 @@
 
     void ReferenceUntilUnused(ComPtr<IUnknown> object);
 
+    void ForceEventualFlushOfCommands() override;
+
     MaybeError ExecutePendingCommandContext();
 
     ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
-    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
+    MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) override;
+
+    void CopyFromStagingToBufferHelper(CommandRecordingContext* commandContext,
+                                       StagingBufferBase* source,
                                        uint64_t sourceOffset,
                                        BufferBase* destination,
                                        uint64_t destinationOffset,
-                                       uint64_t size) override;
+                                       uint64_t size);
 
-    void CopyFromStagingToBufferImpl(CommandRecordingContext* commandContext,
-                                     StagingBufferBase* source,
-                                     uint64_t sourceOffset,
-                                     BufferBase* destination,
-                                     uint64_t destinationOffset,
-                                     uint64_t size);
-
-    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                        const TextureDataLayout& src,
-                                        TextureCopy* dst,
-                                        const Extent3D& copySizePixels) override;
+    MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                            const TextureDataLayout& src,
+                                            TextureCopy* dst,
+                                            const Extent3D& copySizePixels) override;
 
     ResultOrError<ResourceHeapAllocation> AllocateMemory(
         D3D12_HEAP_TYPE heapType,
diff --git a/src/dawn/native/metal/CommandRecordingContext.h b/src/dawn/native/metal/CommandRecordingContext.h
index 025c88b..ca096bb 100644
--- a/src/dawn/native/metal/CommandRecordingContext.h
+++ b/src/dawn/native/metal/CommandRecordingContext.h
@@ -30,6 +30,8 @@
     ~CommandRecordingContext();
 
     id<MTLCommandBuffer> GetCommands();
+    void SetNeedsSubmit();
+    bool NeedsSubmit() const;
     void MarkUsed();
     bool WasUsed() const;
 
@@ -59,6 +61,7 @@
     NSPRef<id<MTLComputeCommandEncoder>> mCompute;
     NSPRef<id<MTLRenderCommandEncoder>> mRender;
     bool mInEncoder = false;
+    bool mNeedsSubmit = false;
     bool mUsed = false;
 };
 
diff --git a/src/dawn/native/metal/CommandRecordingContext.mm b/src/dawn/native/metal/CommandRecordingContext.mm
index 294e53f..ad5ab82 100644
--- a/src/dawn/native/metal/CommandRecordingContext.mm
+++ b/src/dawn/native/metal/CommandRecordingContext.mm
@@ -29,6 +29,13 @@
     return mCommands.Get();
 }
 
+void CommandRecordingContext::SetNeedsSubmit() {
+    mNeedsSubmit = true;
+}
+bool CommandRecordingContext::NeedsSubmit() const {
+    return mNeedsSubmit;
+}
+
 void CommandRecordingContext::MarkUsed() {
     mUsed = true;
 }
@@ -38,6 +45,7 @@
 
 MaybeError CommandRecordingContext::PrepareNextCommandBuffer(id<MTLCommandQueue> queue) {
     ASSERT(mCommands == nil);
+    ASSERT(!mNeedsSubmit);
     ASSERT(!mUsed);
 
     // The MTLCommandBuffer will be autoreleased by default.
@@ -58,6 +66,7 @@
     }
 
     ASSERT(!mInEncoder);
+    mNeedsSubmit = false;
     mUsed = false;
     return std::move(mCommands);
 }
diff --git a/src/dawn/native/metal/DeviceMTL.h b/src/dawn/native/metal/DeviceMTL.h
index e6c14ea..fef04bf 100644
--- a/src/dawn/native/metal/DeviceMTL.h
+++ b/src/dawn/native/metal/DeviceMTL.h
@@ -49,7 +49,8 @@
     id<MTLDevice> GetMTLDevice();
     id<MTLCommandQueue> GetMTLQueue();
 
-    CommandRecordingContext* GetPendingCommandContext();
+    CommandRecordingContext* GetPendingCommandContext(
+        Device::SubmitMode submitMode = Device::SubmitMode::Normal);
     MaybeError SubmitPendingCommandBuffer();
 
     Ref<Texture> CreateTextureWrappingIOSurface(const ExternalImageDescriptor* descriptor,
@@ -57,15 +58,15 @@
     void WaitForCommandsToBeScheduled();
 
     ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
-    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
-                                       uint64_t sourceOffset,
-                                       BufferBase* destination,
-                                       uint64_t destinationOffset,
-                                       uint64_t size) override;
-    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                        const TextureDataLayout& dataLayout,
-                                        TextureCopy* dst,
-                                        const Extent3D& copySizePixels) override;
+    MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) override;
+    MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                            const TextureDataLayout& dataLayout,
+                                            TextureCopy* dst,
+                                            const Extent3D& copySizePixels) override;
 
     uint32_t GetOptimalBytesPerRowAlignment() const override;
     uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const override;
@@ -79,6 +80,8 @@
     // single-byte buffer
     id<MTLBuffer> GetDummyBlitMtlBuffer();
 
+    void ForceEventualFlushOfCommands() override;
+
   private:
     Device(AdapterBase* adapter,
            NSPRef<id<MTLDevice>> mtlDevice,
diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm
index 5adfefb..ee4952f 100644
--- a/src/dawn/native/metal/DeviceMTL.mm
+++ b/src/dawn/native/metal/DeviceMTL.mm
@@ -343,7 +343,9 @@
 }
 
 MaybeError Device::TickImpl() {
-    DAWN_TRY(SubmitPendingCommandBuffer());
+    if (mCommandContext.NeedsSubmit()) {
+        DAWN_TRY(SubmitPendingCommandBuffer());
+    }
 
     // Just run timestamp period calculation when timestamp feature is enabled and timestamp
     // conversion is not disabled.
@@ -366,17 +368,26 @@
     return mCommandQueue.Get();
 }
 
-CommandRecordingContext* Device::GetPendingCommandContext() {
+CommandRecordingContext* Device::GetPendingCommandContext(Device::SubmitMode submitMode) {
+    if (submitMode == DeviceBase::SubmitMode::Normal) {
+        mCommandContext.SetNeedsSubmit();
+    }
     mCommandContext.MarkUsed();
     return &mCommandContext;
 }
 
 bool Device::HasPendingCommands() const {
-    return mCommandContext.WasUsed();
+    return mCommandContext.NeedsSubmit();
+}
+
+void Device::ForceEventualFlushOfCommands() {
+    if (mCommandContext.WasUsed()) {
+        mCommandContext.SetNeedsSubmit();
+    }
 }
 
 MaybeError Device::SubmitPendingCommandBuffer() {
-    if (!mCommandContext.WasUsed()) {
+    if (!mCommandContext.NeedsSubmit()) {
         return {};
     }
 
@@ -428,42 +439,45 @@
     return std::move(stagingBuffer);
 }
 
-MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source,
-                                           uint64_t sourceOffset,
-                                           BufferBase* destination,
-                                           uint64_t destinationOffset,
-                                           uint64_t size) {
+MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
     // Metal validation layers forbid  0-sized copies, assert it is skipped prior to calling
     // this function.
     ASSERT(size != 0);
 
     ToBackend(destination)
-        ->EnsureDataInitializedAsDestination(GetPendingCommandContext(), destinationOffset, size);
+        ->EnsureDataInitializedAsDestination(
+            GetPendingCommandContext(DeviceBase::SubmitMode::Passive), destinationOffset, size);
 
     id<MTLBuffer> uploadBuffer = ToBackend(source)->GetBufferHandle();
     id<MTLBuffer> buffer = ToBackend(destination)->GetMTLBuffer();
-    [GetPendingCommandContext()->EnsureBlit() copyFromBuffer:uploadBuffer
-                                                sourceOffset:sourceOffset
-                                                    toBuffer:buffer
-                                           destinationOffset:destinationOffset
-                                                        size:size];
+    [GetPendingCommandContext(DeviceBase::SubmitMode::Passive)->EnsureBlit()
+           copyFromBuffer:uploadBuffer
+             sourceOffset:sourceOffset
+                 toBuffer:buffer
+        destinationOffset:destinationOffset
+                     size:size];
     return {};
 }
 
 // In Metal we don't write from the CPU to the texture directly which can be done using the
 // replaceRegion function, because the function requires a non-private storage mode and Dawn
 // sets the private storage mode by default for all textures except IOSurfaces on macOS.
-MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source,
-                                            const TextureDataLayout& dataLayout,
-                                            TextureCopy* dst,
-                                            const Extent3D& copySizePixels) {
+MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                const TextureDataLayout& dataLayout,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
     Texture* texture = ToBackend(dst->texture.Get());
-    EnsureDestinationTextureInitialized(GetPendingCommandContext(), texture, *dst, copySizePixels);
+    EnsureDestinationTextureInitialized(GetPendingCommandContext(DeviceBase::SubmitMode::Passive),
+                                        texture, *dst, copySizePixels);
 
-    RecordCopyBufferToTexture(GetPendingCommandContext(), ToBackend(source)->GetBufferHandle(),
-                              source->GetSize(), dataLayout.offset, dataLayout.bytesPerRow,
-                              dataLayout.rowsPerImage, texture, dst->mipLevel, dst->origin,
-                              dst->aspect, copySizePixels);
+    RecordCopyBufferToTexture(GetPendingCommandContext(DeviceBase::SubmitMode::Passive),
+                              ToBackend(source)->GetBufferHandle(), source->GetSize(),
+                              dataLayout.offset, dataLayout.bytesPerRow, dataLayout.rowsPerImage,
+                              texture, dst->mipLevel, dst->origin, dst->aspect, copySizePixels);
     return {};
 }
 
diff --git a/src/dawn/native/null/DeviceNull.cpp b/src/dawn/native/null/DeviceNull.cpp
index 90cc962..e5b84d8 100644
--- a/src/dawn/native/null/DeviceNull.cpp
+++ b/src/dawn/native/null/DeviceNull.cpp
@@ -217,11 +217,11 @@
     return false;
 }
 
-MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source,
-                                           uint64_t sourceOffset,
-                                           BufferBase* destination,
-                                           uint64_t destinationOffset,
-                                           uint64_t size) {
+MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
     if (IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse)) {
         destination->SetIsDataInitialized();
     }
@@ -238,10 +238,10 @@
     return {};
 }
 
-MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source,
-                                            const TextureDataLayout& src,
-                                            TextureCopy* dst,
-                                            const Extent3D& copySizePixels) {
+MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                const TextureDataLayout& src,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
     return {};
 }
 
@@ -556,6 +556,8 @@
     return 1.0f;
 }
 
+void Device::ForceEventualFlushOfCommands() {}
+
 Texture::Texture(DeviceBase* device, const TextureDescriptor* descriptor, TextureState state)
     : TextureBase(device, descriptor, state) {}
 
diff --git a/src/dawn/native/null/DeviceNull.h b/src/dawn/native/null/DeviceNull.h
index e2d6036..51274ab 100644
--- a/src/dawn/native/null/DeviceNull.h
+++ b/src/dawn/native/null/DeviceNull.h
@@ -106,15 +106,15 @@
     MaybeError SubmitPendingOperations();
 
     ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
-    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
-                                       uint64_t sourceOffset,
-                                       BufferBase* destination,
-                                       uint64_t destinationOffset,
-                                       uint64_t size) override;
-    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                        const TextureDataLayout& src,
-                                        TextureCopy* dst,
-                                        const Extent3D& copySizePixels) override;
+    MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) override;
+    MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                            const TextureDataLayout& src,
+                                            TextureCopy* dst,
+                                            const Extent3D& copySizePixels) override;
 
     MaybeError IncrementMemoryUsage(uint64_t bytes);
     void DecrementMemoryUsage(uint64_t bytes);
@@ -124,6 +124,8 @@
 
     float GetTimestampPeriodInNS() const override;
 
+    void ForceEventualFlushOfCommands() override;
+
   private:
     using DeviceBase::DeviceBase;
 
diff --git a/src/dawn/native/opengl/DeviceGL.cpp b/src/dawn/native/opengl/DeviceGL.cpp
index 7ca79ab..b663841 100644
--- a/src/dawn/native/opengl/DeviceGL.cpp
+++ b/src/dawn/native/opengl/DeviceGL.cpp
@@ -418,18 +418,18 @@
     return DAWN_UNIMPLEMENTED_ERROR("Device unable to create staging buffer.");
 }
 
-MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source,
-                                           uint64_t sourceOffset,
-                                           BufferBase* destination,
-                                           uint64_t destinationOffset,
-                                           uint64_t size) {
+MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
     return DAWN_UNIMPLEMENTED_ERROR("Device unable to copy from staging buffer.");
 }
 
-MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source,
-                                            const TextureDataLayout& src,
-                                            TextureCopy* dst,
-                                            const Extent3D& copySizePixels) {
+MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                const TextureDataLayout& src,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
     return DAWN_UNIMPLEMENTED_ERROR("Device unable to copy from staging buffer to texture.");
 }
 
@@ -464,6 +464,8 @@
     return 1.0f;
 }
 
+void Device::ForceEventualFlushOfCommands() {}
+
 const OpenGLFunctions& Device::GetGL() const {
     if (mContext) {
         mContext->MakeCurrent();
diff --git a/src/dawn/native/opengl/DeviceGL.h b/src/dawn/native/opengl/DeviceGL.h
index 1831774..8c544f5 100644
--- a/src/dawn/native/opengl/DeviceGL.h
+++ b/src/dawn/native/opengl/DeviceGL.h
@@ -68,21 +68,22 @@
     MaybeError TickImpl() override;
 
     ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
-    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
-                                       uint64_t sourceOffset,
-                                       BufferBase* destination,
-                                       uint64_t destinationOffset,
-                                       uint64_t size) override;
+    MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) override;
 
-    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                        const TextureDataLayout& src,
-                                        TextureCopy* dst,
-                                        const Extent3D& copySizePixels) override;
+    MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                            const TextureDataLayout& src,
+                                            TextureCopy* dst,
+                                            const Extent3D& copySizePixels) override;
 
     uint32_t GetOptimalBytesPerRowAlignment() const override;
     uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const override;
 
     float GetTimestampPeriodInNS() const override;
+    void ForceEventualFlushOfCommands() override;
 
     class Context {
       public:
diff --git a/src/dawn/native/vulkan/CommandRecordingContext.h b/src/dawn/native/vulkan/CommandRecordingContext.h
index c8fb544..7948e3b 100644
--- a/src/dawn/native/vulkan/CommandRecordingContext.h
+++ b/src/dawn/native/vulkan/CommandRecordingContext.h
@@ -40,6 +40,7 @@
 
     // For Device state tracking only.
     VkCommandPool commandPool = VK_NULL_HANDLE;
+    bool needsSubmit = false;
     bool used = false;
 
     // In some cases command buffer will need to be split to accomodate driver bug workarounds.
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index be8dc51..ffb058d 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -230,7 +230,7 @@
     mDeleter->Tick(completedSerial);
     mDescriptorAllocatorsPendingDeallocation.ClearUpTo(completedSerial);
 
-    if (mRecordingContext.used) {
+    if (mRecordingContext.needsSubmit) {
         DAWN_TRY(SubmitPendingCommands());
     }
 
@@ -282,18 +282,23 @@
     mDescriptorAllocatorsPendingDeallocation.Enqueue(allocator, GetPendingCommandSerial());
 }
 
-CommandRecordingContext* Device::GetPendingRecordingContext() {
+CommandRecordingContext* Device::GetPendingRecordingContext(Device::SubmitMode submitMode) {
     ASSERT(mRecordingContext.commandBuffer != VK_NULL_HANDLE);
+    mRecordingContext.needsSubmit |= (submitMode == DeviceBase::SubmitMode::Normal);
     mRecordingContext.used = true;
     return &mRecordingContext;
 }
 
 bool Device::HasPendingCommands() const {
-    return mRecordingContext.used;
+    return mRecordingContext.needsSubmit;
+}
+
+void Device::ForceEventualFlushOfCommands() {
+    mRecordingContext.needsSubmit |= mRecordingContext.used;
 }
 
 MaybeError Device::SubmitPendingCommands() {
-    if (!mRecordingContext.used) {
+    if (!mRecordingContext.needsSubmit) {
         return {};
     }
 
@@ -705,7 +710,7 @@
 }
 
 MaybeError Device::PrepareRecordingContext() {
-    ASSERT(!mRecordingContext.used);
+    ASSERT(!mRecordingContext.needsSubmit);
     ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE);
     ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE);
 
@@ -812,16 +817,17 @@
     return std::move(stagingBuffer);
 }
 
-MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source,
-                                           uint64_t sourceOffset,
-                                           BufferBase* destination,
-                                           uint64_t destinationOffset,
-                                           uint64_t size) {
+MaybeError Device::CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                               uint64_t sourceOffset,
+                                               BufferBase* destination,
+                                               uint64_t destinationOffset,
+                                               uint64_t size) {
     // It is a validation error to do a 0-sized copy in Vulkan, check it is skipped prior to
     // calling this function.
     ASSERT(size != 0);
 
-    CommandRecordingContext* recordingContext = GetPendingRecordingContext();
+    CommandRecordingContext* recordingContext =
+        GetPendingRecordingContext(DeviceBase::SubmitMode::Passive);
 
     ToBackend(destination)
         ->EnsureDataInitializedAsDestination(recordingContext, destinationOffset, size);
@@ -845,15 +851,16 @@
     return {};
 }
 
-MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source,
-                                            const TextureDataLayout& src,
-                                            TextureCopy* dst,
-                                            const Extent3D& copySizePixels) {
+MaybeError Device::CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                                const TextureDataLayout& src,
+                                                TextureCopy* dst,
+                                                const Extent3D& copySizePixels) {
     // There is no need of a barrier to make host writes available and visible to the copy
     // operation for HOST_COHERENT memory. The Vulkan spec for vkQueueSubmit describes that it
     // does an implicit availability, visibility and domain operation.
 
-    CommandRecordingContext* recordingContext = GetPendingRecordingContext();
+    CommandRecordingContext* recordingContext =
+        GetPendingRecordingContext(DeviceBase::SubmitMode::Passive);
 
     VkBufferImageCopy region = ComputeBufferImageCopyRegion(src, *dst, copySizePixels);
     VkImageSubresourceLayers subresource = region.imageSubresource;
@@ -1118,7 +1125,7 @@
     ToBackend(GetAdapter())->GetVulkanInstance()->StopListeningForDeviceMessages(this);
 
     // Immediately tag the recording context as unused so we don't try to submit it in Tick.
-    mRecordingContext.used = false;
+    mRecordingContext.needsSubmit = false;
     if (mRecordingContext.commandPool != VK_NULL_HANDLE) {
         // The VkCommandBuffer memory should be wholly owned by the pool and freed when it is
         // destroyed, but that's not the case in some drivers and the leak memory.
diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h
index 963fcaa..d9c01db 100644
--- a/src/dawn/native/vulkan/DeviceVk.h
+++ b/src/dawn/native/vulkan/DeviceVk.h
@@ -65,7 +65,8 @@
     ResourceMemoryAllocator* GetResourceMemoryAllocator() const;
     external_semaphore::Service* GetExternalSemaphoreService() const;
 
-    CommandRecordingContext* GetPendingRecordingContext();
+    CommandRecordingContext* GetPendingRecordingContext(
+        Device::SubmitMode submitMode = Device::SubmitMode::Normal);
     MaybeError SplitRecordingContext(CommandRecordingContext* recordingContext);
     MaybeError SubmitPendingCommands();
 
@@ -89,15 +90,15 @@
     MaybeError TickImpl() override;
 
     ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
-    MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
-                                       uint64_t sourceOffset,
-                                       BufferBase* destination,
-                                       uint64_t destinationOffset,
-                                       uint64_t size) override;
-    MaybeError CopyFromStagingToTexture(const StagingBufferBase* source,
-                                        const TextureDataLayout& src,
-                                        TextureCopy* dst,
-                                        const Extent3D& copySizePixels) override;
+    MaybeError CopyFromStagingToBufferImpl(StagingBufferBase* source,
+                                           uint64_t sourceOffset,
+                                           BufferBase* destination,
+                                           uint64_t destinationOffset,
+                                           uint64_t size) override;
+    MaybeError CopyFromStagingToTextureImpl(const StagingBufferBase* source,
+                                            const TextureDataLayout& src,
+                                            TextureCopy* dst,
+                                            const Extent3D& copySizePixels) override;
 
     // Return the fixed subgroup size to use for compute shaders on this device or 0 if none
     // needs to be set.
@@ -115,6 +116,8 @@
     // Used to associate this device with validation layer messages.
     const char* GetDebugPrefix() { return mDebugPrefix.c_str(); }
 
+    void ForceEventualFlushOfCommands() override;
+
   private:
     Device(Adapter* adapter,
            const DeviceDescriptor* descriptor,
diff --git a/src/dawn/tests/unittests/native/mocks/DeviceMock.h b/src/dawn/tests/unittests/native/mocks/DeviceMock.h
index 309f2d9..d3a3a84 100644
--- a/src/dawn/tests/unittests/native/mocks/DeviceMock.h
+++ b/src/dawn/tests/unittests/native/mocks/DeviceMock.h
@@ -39,11 +39,11 @@
                 (size_t),
                 (override));
     MOCK_METHOD(MaybeError,
-                CopyFromStagingToBuffer,
+                CopyFromStagingToBufferImpl,
                 (StagingBufferBase*, uint64_t, BufferBase*, uint64_t, uint64_t),
                 (override));
     MOCK_METHOD(MaybeError,
-                CopyFromStagingToTexture,
+                CopyFromStagingToTextureImpl,
                 (const StagingBufferBase*, const TextureDataLayout&, TextureCopy*, const Extent3D&),
                 (override));
 
@@ -51,6 +51,7 @@
     MOCK_METHOD(uint64_t, GetOptimalBufferToTextureCopyOffsetAlignment, (), (const, override));
 
     MOCK_METHOD(float, GetTimestampPeriodInNS, (), (const, override));
+    MOCK_METHOD(void, ForceEventualFlushOfCommands, (), (override));
 
     MOCK_METHOD(ResultOrError<Ref<BindGroupBase>>,
                 CreateBindGroupImpl,