Avoid processing already processed tick

To avoid overly ticking, we only want to tick when:
1. the last submitted serial has moved beyond the completed serial
2. or the completed serial has not reached the future command serial added
by the trackers (MapRequestTracker, FenceSignalTracker, ErrorScopeTracker).

Bug: dawn:400
Change-Id: Ie7c65acc332846ac1a27f9a18f230149d96d2189
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/19062
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Natasha Lee <natlee@microsoft.com>
diff --git a/src/dawn_native/Device.cpp b/src/dawn_native/Device.cpp
index b3f4766..1f41745 100644
--- a/src/dawn_native/Device.cpp
+++ b/src/dawn_native/Device.cpp
@@ -126,7 +126,7 @@
                 // complete before proceeding with destruction.
                 // Assert that errors are device loss so that we can continue with destruction
                 AssertAndIgnoreDeviceLossError(WaitForIdleForDestruction());
-                ASSERT(mCompletedSerial == mLastSubmittedSerial);
+                AssumeCommandsComplete();
                 break;
 
             case State::BeingDisconnected:
@@ -139,6 +139,8 @@
             case State::Disconnected:
                 break;
         }
+        ASSERT(mCompletedSerial == mLastSubmittedSerial);
+        ASSERT(mFutureCallbackSerial <= mCompletedSerial);
 
         // Skip handling device facilities if they haven't even been created (or failed doing so)
         if (mState != State::BeingCreated) {
@@ -163,6 +165,7 @@
         mDynamicUploader = nullptr;
         mMapRequestTracker = nullptr;
 
+        AssumeCommandsComplete();
         // Tell the backend that it can free all the objects now that the GPU timeline is empty.
         ShutDownImpl();
 
@@ -181,8 +184,10 @@
             mState = State::BeingDisconnected;
 
             // Assert that errors are device losses so that we can continue with destruction.
+            // Assume all commands are complete after WaitForIdleForDestruction (because they were)
             AssertAndIgnoreDeviceLossError(WaitForIdleForDestruction());
-            ASSERT(mCompletedSerial == mLastSubmittedSerial);
+            AssumeCommandsComplete();
+            ASSERT(mFutureCallbackSerial <= mCompletedSerial);
             mState = State::Disconnected;
 
             // Now everything is as if the device was lost.
@@ -320,24 +325,30 @@
         return mLastSubmittedSerial;
     }
 
+    Serial DeviceBase::GetFutureCallbackSerial() const {
+        return mFutureCallbackSerial;
+    }
+
     void DeviceBase::IncrementLastSubmittedCommandSerial() {
         mLastSubmittedSerial++;
     }
 
-    void DeviceBase::ArtificiallyIncrementSerials() {
-        mCompletedSerial++;
-        mLastSubmittedSerial++;
-    }
-
     void DeviceBase::AssumeCommandsComplete() {
-        mLastSubmittedSerial++;
-        mCompletedSerial = mLastSubmittedSerial;
+        Serial maxSerial = std::max(mLastSubmittedSerial + 1, mFutureCallbackSerial);
+        mLastSubmittedSerial = maxSerial;
+        mCompletedSerial = maxSerial;
     }
 
     Serial DeviceBase::GetPendingCommandSerial() const {
         return mLastSubmittedSerial + 1;
     }
 
+    void DeviceBase::AddFutureCallbackSerial(Serial serial) {
+        if (serial > mFutureCallbackSerial) {
+            mFutureCallbackSerial = serial;
+        }
+    }
+
     void DeviceBase::CheckPassedSerials() {
         Serial completedSerial = CheckAndUpdateCompletedSerials();
 
@@ -712,17 +723,32 @@
         if (ConsumedError(ValidateIsAlive())) {
             return;
         }
-        if (ConsumedError(TickImpl())) {
-            return;
-        }
+        // to avoid overly ticking, we only want to tick when:
+        // 1. the last submitted serial has moved beyond the completed serial
+        // 2. or the completed serial has not reached the future serial set by the trackers
+        if (mLastSubmittedSerial > mCompletedSerial || mCompletedSerial < mFutureCallbackSerial) {
+            CheckPassedSerials();
 
-        // TODO(cwallez@chromium.org): decouple TickImpl from updating the serial so that we can
-        // tick the dynamic uploader before the backend resource allocators. This would allow
-        // reclaiming resources one tick earlier.
-        mDynamicUploader->Deallocate(GetCompletedCommandSerial());
-        mErrorScopeTracker->Tick(GetCompletedCommandSerial());
-        mFenceSignalTracker->Tick(GetCompletedCommandSerial());
-        mMapRequestTracker->Tick(GetCompletedCommandSerial());
+            if (ConsumedError(TickImpl())) {
+                return;
+            }
+
+            // There is no GPU work in flight, we need to move the serials forward so that
+            // so that CPU operations waiting on GPU completion can know they don't have to wait.
+            // AssumeCommandsComplete will assign the max serial we must tick to in order to
+            // fire the awaiting callbacks.
+            if (mCompletedSerial == mLastSubmittedSerial) {
+                AssumeCommandsComplete();
+            }
+
+            // TODO(cwallez@chromium.org): decouple TickImpl from updating the serial so that we can
+            // tick the dynamic uploader before the backend resource allocators. This would allow
+            // reclaiming resources one tick earlier.
+            mDynamicUploader->Deallocate(mCompletedSerial);
+            mErrorScopeTracker->Tick(mCompletedSerial);
+            mFenceSignalTracker->Tick(mCompletedSerial);
+            mMapRequestTracker->Tick(mCompletedSerial);
+        }
     }
 
     void DeviceBase::Reference() {
diff --git a/src/dawn_native/Device.h b/src/dawn_native/Device.h
index 3aa5920..9e336ac 100644
--- a/src/dawn_native/Device.h
+++ b/src/dawn_native/Device.h
@@ -90,6 +90,7 @@
 
         Serial GetCompletedCommandSerial() const;
         Serial GetLastSubmittedCommandSerial() const;
+        Serial GetFutureCallbackSerial() const;
         Serial GetPendingCommandSerial() const;
         virtual MaybeError TickImpl() = 0;
 
@@ -214,6 +215,7 @@
         size_t GetDeprecationWarningCountForTesting();
         void EmitDeprecationWarning(const char* warning);
         void LoseForTesting();
+        void AddFutureCallbackSerial(Serial serial);
 
       protected:
         void SetToggle(Toggle toggle, bool isEnabled);
@@ -224,13 +226,6 @@
 
         // Incrememt mLastSubmittedSerial when we submit the next serial
         void IncrementLastSubmittedCommandSerial();
-        // If there's no GPU work in flight we still need to artificially increment the serial
-        // so that CPU operations waiting on GPU completion can know they don't have to wait.
-        void ArtificiallyIncrementSerials();
-        // During shut down of device, some operations might have been started since the last submit
-        // and waiting on a serial that doesn't have a corresponding fence enqueued. Fake serials to
-        // make all commands look completed.
-        void AssumeCommandsComplete();
         // Check for passed fences and set the new completed serial
         void CheckPassedSerials();
 
@@ -298,14 +293,21 @@
         // Each backend should implement to check their passed fences if there are any and return a
         // completed serial. Return 0 should indicate no fences to check.
         virtual Serial CheckAndUpdateCompletedSerials() = 0;
+        // During shut down of device, some operations might have been started since the last submit
+        // and waiting on a serial that doesn't have a corresponding fence enqueued. Fake serials to
+        // make all commands look completed.
+        void AssumeCommandsComplete();
         // mCompletedSerial tracks the last completed command serial that the fence has returned.
         // mLastSubmittedSerial tracks the last submitted command serial.
         // During device removal, the serials could be artificially incremented
         // to make it appear as if commands have been compeleted. They can also be artificially
         // incremented when no work is being done in the GPU so CPU operations don't have to wait on
         // stale serials.
+        // mFutureCallbackSerial tracks the largest serial we need to tick to for the callbacks to
+        // fire
         Serial mCompletedSerial = 0;
         Serial mLastSubmittedSerial = 0;
+        Serial mFutureCallbackSerial = 0;
 
         // ShutDownImpl is used to clean up and release resources used by device, does not wait for
         // GPU or check errors.
diff --git a/src/dawn_native/ErrorScopeTracker.cpp b/src/dawn_native/ErrorScopeTracker.cpp
index 409b36b..b110e97 100644
--- a/src/dawn_native/ErrorScopeTracker.cpp
+++ b/src/dawn_native/ErrorScopeTracker.cpp
@@ -37,6 +37,7 @@
 
     void ErrorScopeTracker::TrackUntilLastSubmitComplete(ErrorScope* scope) {
         mScopesInFlight.Enqueue(scope, mDevice->GetLastSubmittedCommandSerial());
+        mDevice->AddFutureCallbackSerial(mDevice->GetPendingCommandSerial());
     }
 
     void ErrorScopeTracker::Tick(Serial completedSerial) {
diff --git a/src/dawn_native/FenceSignalTracker.cpp b/src/dawn_native/FenceSignalTracker.cpp
index 1daf10a..b8243a2 100644
--- a/src/dawn_native/FenceSignalTracker.cpp
+++ b/src/dawn_native/FenceSignalTracker.cpp
@@ -31,6 +31,7 @@
         // the fence completed value once the last submitted serial has passed.
         mFencesInFlight.Enqueue(FenceInFlight{fence, value},
                                 mDevice->GetLastSubmittedCommandSerial());
+        mDevice->AddFutureCallbackSerial(mDevice->GetPendingCommandSerial());
     }
 
     void FenceSignalTracker::Tick(Serial finishedSerial) {
diff --git a/src/dawn_native/MapRequestTracker.cpp b/src/dawn_native/MapRequestTracker.cpp
index fefcabc..8f33e02 100644
--- a/src/dawn_native/MapRequestTracker.cpp
+++ b/src/dawn_native/MapRequestTracker.cpp
@@ -34,6 +34,7 @@
         request.isWrite = isWrite;
 
         mInflightRequests.Enqueue(std::move(request), mDevice->GetPendingCommandSerial());
+        mDevice->AddFutureCallbackSerial(mDevice->GetPendingCommandSerial());
     }
 
     void MapRequestTracker::Tick(Serial finishedSerial) {
diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp
index 1483cdd..8026bb9 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@@ -212,8 +212,6 @@
     }
 
     MaybeError Device::TickImpl() {
-        CheckPassedSerials();
-
         // Perform cleanup operations to free unused objects
         Serial completedSerial = GetCompletedCommandSerial();
 
@@ -245,6 +243,7 @@
             DAWN_TRY(CheckHRESULT(mFence->SetEventOnCompletion(serial, mFenceEvent),
                                   "D3D12 set event on completion"));
             WaitForSingleObject(mFenceEvent, INFINITE);
+            CheckPassedSerials();
         }
         return {};
     }
@@ -462,9 +461,6 @@
 
         // Call tick one last time so resources are cleaned up.
         DAWN_TRY(TickImpl());
-
-        // Force all operations to look as if they were completed
-        AssumeCommandsComplete();
         return {};
     }
 
@@ -519,11 +515,6 @@
         // Immediately forget about all pending commands
         mPendingCommands.Release();
 
-        // Some operations might have been started since the last submit and waiting
-        // on a serial that doesn't have a corresponding fence enqueued. Force all
-        // operations to look as if they were completed (because they were).
-        AssumeCommandsComplete();
-
         if (mFenceEvent != nullptr) {
             ::CloseHandle(mFenceEvent);
         }
diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm
index f396f58..71f2ed7 100644
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@@ -155,9 +155,9 @@
 
     Serial Device::CheckAndUpdateCompletedSerials() {
         if (GetCompletedCommandSerial() > mCompletedSerial) {
-            // sometimes we artificially increase the serials, in which case the completed serial in
+            // sometimes we increase the serials, in which case the completed serial in
             // the device base will surpass the completed serial we have in the metal backend, so we
-            // must update ours when we see that the completed serial from the frontend has
+            // must update ours when we see that the completed serial from device base has
             // increased.
             mCompletedSerial = GetCompletedCommandSerial();
         }
@@ -166,15 +166,8 @@
     }
 
     MaybeError Device::TickImpl() {
-        CheckPassedSerials();
-        Serial completedSerial = GetCompletedCommandSerial();
-
         if (mCommandContext.GetCommands() != nil) {
             SubmitPendingCommandBuffer();
-        } else if (completedSerial == GetLastSubmittedCommandSerial()) {
-            // If there's no GPU work in flight we still need to artificially increment the serial
-            // so that CPU operations waiting on GPU completion can know they don't have to wait.
-            ArtificiallyIncrementSerials();
         }
 
         return {};
@@ -300,14 +293,8 @@
             CheckPassedSerials();
         }
 
-        // Artificially increase the serials so work that was pending knows it can complete.
-        ArtificiallyIncrementSerials();
-
         DAWN_TRY(TickImpl());
 
-        // Force all operations to look as if they were completed
-        AssumeCommandsComplete();
-
         return {};
     }
 
diff --git a/src/dawn_native/null/DeviceNull.cpp b/src/dawn_native/null/DeviceNull.cpp
index d6d8fff..59f87ff 100644
--- a/src/dawn_native/null/DeviceNull.cpp
+++ b/src/dawn_native/null/DeviceNull.cpp
@@ -185,8 +185,6 @@
     }
 
     MaybeError Device::WaitForIdleForDestruction() {
-        // Fake all commands being completed
-        AssumeCommandsComplete();
         return {};
     }
 
diff --git a/src/dawn_native/opengl/DeviceGL.cpp b/src/dawn_native/opengl/DeviceGL.cpp
index e2e81f2..3f852b3 100644
--- a/src/dawn_native/opengl/DeviceGL.cpp
+++ b/src/dawn_native/opengl/DeviceGL.cpp
@@ -153,12 +153,6 @@
     }
 
     MaybeError Device::TickImpl() {
-        CheckPassedSerials();
-        if (GetCompletedCommandSerial() == GetLastSubmittedCommandSerial()) {
-            // If there's no GPU work in flight we still need to artificially increment the serial
-            // so that CPU operations waiting on GPU completion can know they don't have to wait.
-            ArtificiallyIncrementSerials();
-        }
         return {};
     }
 
@@ -200,11 +194,6 @@
 
     void Device::ShutDownImpl() {
         ASSERT(GetState() == State::Disconnected);
-
-        // Some operations might have been started since the last submit and waiting
-        // on a serial that doesn't have a corresponding fence enqueued. Force all
-        // operations to look as if they were completed (because they were).
-        AssumeCommandsComplete();
     }
 
     MaybeError Device::WaitForIdleForDestruction() {
@@ -213,8 +202,6 @@
         ASSERT(mFencesInFlight.empty());
         Tick();
 
-        // Force all operations to look as if they were completed
-        AssumeCommandsComplete();
         return {};
     }
 
diff --git a/src/dawn_native/vulkan/DeviceVk.cpp b/src/dawn_native/vulkan/DeviceVk.cpp
index 9edefcb..ae62336 100644
--- a/src/dawn_native/vulkan/DeviceVk.cpp
+++ b/src/dawn_native/vulkan/DeviceVk.cpp
@@ -155,7 +155,6 @@
     }
 
     MaybeError Device::TickImpl() {
-        CheckPassedSerials();
         RecycleCompletedCommands();
 
         Serial completedSerial = GetCompletedCommandSerial();
@@ -171,10 +170,6 @@
 
         if (mRecordingContext.used) {
             DAWN_TRY(SubmitPendingCommands());
-        } else if (completedSerial == GetLastSubmittedCommandSerial()) {
-            // If there's no GPU work in flight we still need to artificially increment the serial
-            // so that CPU operations waiting on GPU completion can know they don't have to wait.
-            ArtificiallyIncrementSerials();
         }
 
         return {};
@@ -731,9 +726,6 @@
 
             mFencesInFlight.pop();
         }
-
-        // Force all operations to look as if they were completed
-        AssumeCommandsComplete();
         return {};
     }
 
@@ -773,11 +765,6 @@
         }
         mRecordingContext.signalSemaphores.clear();
 
-        // Some operations might have been started since the last submit and waiting
-        // on a serial that doesn't have a corresponding fence enqueued. Force all
-        // operations to look as if they were completed (because they were).
-        AssumeCommandsComplete();
-
         // Assert that errors are device loss so that we can continue with destruction
         AssertAndIgnoreDeviceLossError(TickImpl());
 
diff --git a/src/tests/end2end/FenceTests.cpp b/src/tests/end2end/FenceTests.cpp
index 20d3e6c..34d4198 100644
--- a/src/tests/end2end/FenceTests.cpp
+++ b/src/tests/end2end/FenceTests.cpp
@@ -143,6 +143,43 @@
     WaitForCompletedValue(fence, 4);
 }
 
+// Test callbacks still occur if Queue::Signal and fence::OnCompletion happens multiple times
+TEST_P(FenceTests, SignalOnCompletionWait) {
+    wgpu::Fence fence = queue.CreateFence();
+
+    queue.Signal(fence, 2);
+    queue.Signal(fence, 6);
+
+    EXPECT_CALL(*mockFenceOnCompletionCallback, Call(WGPUFenceCompletionStatus_Success, this + 2))
+        .Times(1);
+    EXPECT_CALL(*mockFenceOnCompletionCallback, Call(WGPUFenceCompletionStatus_Success, this + 6))
+        .Times(1);
+
+    fence.OnCompletion(1u, ToMockFenceOnCompletionCallback, this + 2);
+    fence.OnCompletion(5u, ToMockFenceOnCompletionCallback, this + 6);
+
+    WaitForCompletedValue(fence, 6);
+}
+
+// Test callbacks still occur if Queue::Signal and fence::OnCompletion happens multiple times
+TEST_P(FenceTests, SignalOnCompletionWaitStaggered) {
+    wgpu::Fence fence = queue.CreateFence();
+
+    queue.Signal(fence, 2);
+
+    EXPECT_CALL(*mockFenceOnCompletionCallback, Call(WGPUFenceCompletionStatus_Success, this + 2))
+        .Times(1);
+    fence.OnCompletion(1u, ToMockFenceOnCompletionCallback, this + 2);
+
+    queue.Signal(fence, 4);
+
+    EXPECT_CALL(*mockFenceOnCompletionCallback, Call(WGPUFenceCompletionStatus_Success, this + 4))
+        .Times(1);
+    fence.OnCompletion(3u, ToMockFenceOnCompletionCallback, this + 4);
+
+    WaitForCompletedValue(fence, 4);
+}
+
 // Test all callbacks are called if they are added for the same fence value
 TEST_P(FenceTests, OnCompletionMultipleCallbacks) {
     wgpu::Fence fence = queue.CreateFence();