Vulkan: Check for device loss in CheckAndUpdateCompletedSerials

Bug: chromium:1195645
Bug: chromium:1195693
Change-Id: I3c25a64af87a60f40030094dd73b13a035a7876c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/46625
Auto-Submit: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/Device.cpp b/src/dawn_native/Device.cpp
index 9f05587..49898d9 100644
--- a/src/dawn_native/Device.cpp
+++ b/src/dawn_native/Device.cpp
@@ -402,8 +402,9 @@
         }
     }
 
-    void DeviceBase::CheckPassedSerials() {
-        ExecutionSerial completedSerial = CheckAndUpdateCompletedSerials();
+    MaybeError DeviceBase::CheckPassedSerials() {
+        ExecutionSerial completedSerial;
+        DAWN_TRY_ASSIGN(completedSerial, CheckAndUpdateCompletedSerials());
 
         ASSERT(completedSerial <= mLastSubmittedSerial);
         // completedSerial should not be less than mCompletedSerial unless it is 0.
@@ -413,6 +414,8 @@
         if (completedSerial > mCompletedSerial) {
             mCompletedSerial = completedSerial;
         }
+
+        return {};
     }
 
     ResultOrError<const Format*> DeviceBase::GetInternalFormat(wgpu::TextureFormat format) const {
@@ -932,8 +935,7 @@
         // 1. the last submitted serial has moved beyond the completed serial
         // 2. or the completed serial has not reached the future serial set by the trackers
         if (mLastSubmittedSerial > mCompletedSerial || mCompletedSerial < mFutureSerial) {
-            CheckPassedSerials();
-
+            DAWN_TRY(CheckPassedSerials());
             DAWN_TRY(TickImpl());
 
             // There is no GPU work in flight, we need to move the serials forward so that
diff --git a/src/dawn_native/Device.h b/src/dawn_native/Device.h
index 42c684e..1f014f3 100644
--- a/src/dawn_native/Device.h
+++ b/src/dawn_native/Device.h
@@ -243,7 +243,7 @@
         // reaching the serial the work will be executed on.
         void AddFutureSerial(ExecutionSerial serial);
         // Check for passed fences and set the new completed serial
-        void CheckPassedSerials();
+        MaybeError CheckPassedSerials();
 
         MaybeError Tick();
 
@@ -336,7 +336,7 @@
 
         // Each backend should implement to check their passed fences if there are any and return a
         // completed serial. Return 0 should indicate no fences to check.
-        virtual ExecutionSerial CheckAndUpdateCompletedSerials() = 0;
+        virtual ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() = 0;
         // During shut down of device, some operations might have been started since the last submit
         // and waiting on a serial that doesn't have a corresponding fence enqueued. Fake serials to
         // make all commands look completed.
diff --git a/src/dawn_native/d3d12/DeviceD3D12.cpp b/src/dawn_native/d3d12/DeviceD3D12.cpp
index fc56302..4de7654 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn_native/d3d12/DeviceD3D12.cpp
@@ -269,17 +269,17 @@
     }
 
     MaybeError Device::WaitForSerial(ExecutionSerial serial) {
-        CheckPassedSerials();
+        DAWN_TRY(CheckPassedSerials());
         if (GetCompletedCommandSerial() < serial) {
             DAWN_TRY(CheckHRESULT(mFence->SetEventOnCompletion(uint64_t(serial), mFenceEvent),
                                   "D3D12 set event on completion"));
             WaitForSingleObject(mFenceEvent, INFINITE);
-            CheckPassedSerials();
+            DAWN_TRY(CheckPassedSerials());
         }
         return {};
     }
 
-    ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
+    ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
         ExecutionSerial completeSerial = ExecutionSerial(mFence->GetCompletedValue());
 
         if (completeSerial <= GetCompletedCommandSerial()) {
diff --git a/src/dawn_native/d3d12/DeviceD3D12.h b/src/dawn_native/d3d12/DeviceD3D12.h
index 177c943..4819dd4 100644
--- a/src/dawn_native/d3d12/DeviceD3D12.h
+++ b/src/dawn_native/d3d12/DeviceD3D12.h
@@ -181,7 +181,7 @@
 
         ComPtr<ID3D12Fence> mFence;
         HANDLE mFenceEvent = nullptr;
-        ExecutionSerial CheckAndUpdateCompletedSerials() override;
+        ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
 
         ComPtr<ID3D12Device> mD3d12Device;  // Device is owned by adapter and will not be outlived.
         ComPtr<ID3D12CommandQueue> mCommandQueue;
diff --git a/src/dawn_native/metal/DeviceMTL.h b/src/dawn_native/metal/DeviceMTL.h
index ae97727..04e5f90 100644
--- a/src/dawn_native/metal/DeviceMTL.h
+++ b/src/dawn_native/metal/DeviceMTL.h
@@ -112,7 +112,7 @@
         void InitTogglesFromDriver();
         void ShutDownImpl() override;
         MaybeError WaitForIdleForDestruction() override;
-        ExecutionSerial CheckAndUpdateCompletedSerials() override;
+        ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
 
         NSPRef<id<MTLDevice>> mMtlDevice;
         NSPRef<id<MTLCommandQueue>> mCommandQueue;
diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm
index 34c39d2..5a8b3a1 100644
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@@ -176,7 +176,7 @@
         return TextureView::Create(texture, descriptor);
     }
 
-    ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
+    ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
         uint64_t frontendCompletedSerial{GetCompletedCommandSerial()};
         if (frontendCompletedSerial > mCompletedSerial) {
             // sometimes we increase the serials, in which case the completed serial in
@@ -375,12 +375,12 @@
     MaybeError Device::WaitForIdleForDestruction() {
         // Forget all pending commands.
         mCommandContext.AcquireCommands();
-        CheckPassedSerials();
+        DAWN_TRY(CheckPassedSerials());
 
         // Wait for all commands to be finished so we can free resources
         while (GetCompletedCommandSerial() != GetLastSubmittedCommandSerial()) {
             usleep(100);
-            CheckPassedSerials();
+            DAWN_TRY(CheckPassedSerials());
         }
 
         return {};
diff --git a/src/dawn_native/null/DeviceNull.cpp b/src/dawn_native/null/DeviceNull.cpp
index 13d6818..70ac53c 100644
--- a/src/dawn_native/null/DeviceNull.cpp
+++ b/src/dawn_native/null/DeviceNull.cpp
@@ -218,25 +218,27 @@
     }
 
     MaybeError Device::TickImpl() {
-        SubmitPendingOperations();
-        return {};
+        return SubmitPendingOperations();
     }
 
-    ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
+    ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
         return GetLastSubmittedCommandSerial();
     }
 
     void Device::AddPendingOperation(std::unique_ptr<PendingOperation> operation) {
         mPendingOperations.emplace_back(std::move(operation));
     }
-    void Device::SubmitPendingOperations() {
+
+    MaybeError Device::SubmitPendingOperations() {
         for (auto& operation : mPendingOperations) {
             operation->Execute();
         }
         mPendingOperations.clear();
 
-        CheckPassedSerials();
+        DAWN_TRY(CheckPassedSerials());
         IncrementLastSubmittedCommandSerial();
+
+        return {};
     }
 
     // BindGroupDataHolder
@@ -342,8 +344,7 @@
         // for testing purposes we should also tick in the null implementation.
         DAWN_TRY(device->Tick());
 
-        device->SubmitPendingOperations();
-        return {};
+        return device->SubmitPendingOperations();
     }
 
     MaybeError Queue::WriteBufferImpl(BufferBase* buffer,
diff --git a/src/dawn_native/null/DeviceNull.h b/src/dawn_native/null/DeviceNull.h
index 815d4a0..6ee1422 100644
--- a/src/dawn_native/null/DeviceNull.h
+++ b/src/dawn_native/null/DeviceNull.h
@@ -98,7 +98,7 @@
         MaybeError TickImpl() override;
 
         void AddPendingOperation(std::unique_ptr<PendingOperation> operation);
-        void SubmitPendingOperations();
+        MaybeError SubmitPendingOperations();
 
         ResultOrError<std::unique_ptr<StagingBufferBase>> CreateStagingBuffer(size_t size) override;
         MaybeError CopyFromStagingToBuffer(StagingBufferBase* source,
@@ -153,7 +153,7 @@
             TextureBase* texture,
             const TextureViewDescriptor* descriptor) override;
 
-        ExecutionSerial CheckAndUpdateCompletedSerials() override;
+        ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
 
         void ShutDownImpl() override;
         MaybeError WaitForIdleForDestruction() override;
diff --git a/src/dawn_native/opengl/DeviceGL.cpp b/src/dawn_native/opengl/DeviceGL.cpp
index a1248d8..2963ad8 100644
--- a/src/dawn_native/opengl/DeviceGL.cpp
+++ b/src/dawn_native/opengl/DeviceGL.cpp
@@ -180,7 +180,7 @@
         return {};
     }
 
-    ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
+    ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
         ExecutionSerial fenceSerial{0};
         while (!mFencesInFlight.empty()) {
             GLsync sync = mFencesInFlight.front().first;
@@ -234,7 +234,7 @@
 
     MaybeError Device::WaitForIdleForDestruction() {
         gl.Finish();
-        CheckPassedSerials();
+        DAWN_TRY(CheckPassedSerials());
         ASSERT(mFencesInFlight.empty());
 
         return {};
diff --git a/src/dawn_native/opengl/DeviceGL.h b/src/dawn_native/opengl/DeviceGL.h
index 232fd53..6ee6de5 100644
--- a/src/dawn_native/opengl/DeviceGL.h
+++ b/src/dawn_native/opengl/DeviceGL.h
@@ -109,7 +109,7 @@
             const TextureViewDescriptor* descriptor) override;
 
         void InitTogglesFromDriver();
-        ExecutionSerial CheckAndUpdateCompletedSerials() override;
+        ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
         void ShutDownImpl() override;
         MaybeError WaitForIdleForDestruction() override;
 
diff --git a/src/dawn_native/vulkan/DeviceVk.cpp b/src/dawn_native/vulkan/DeviceVk.cpp
index 2c9d32f..530b5ca 100644
--- a/src/dawn_native/vulkan/DeviceVk.cpp
+++ b/src/dawn_native/vulkan/DeviceVk.cpp
@@ -513,21 +513,22 @@
         return fence;
     }
 
-    ExecutionSerial Device::CheckAndUpdateCompletedSerials() {
+    ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() {
         ExecutionSerial fenceSerial(0);
         while (!mFencesInFlight.empty()) {
             VkFence fence = mFencesInFlight.front().first;
             ExecutionSerial tentativeSerial = mFencesInFlight.front().second;
             VkResult result = VkResult::WrapUnsafe(
                 INJECT_ERROR_OR_RUN(fn.GetFenceStatus(mVkDevice, fence), VK_ERROR_DEVICE_LOST));
-            // TODO: Handle DeviceLost error.
-            ASSERT(result == VK_SUCCESS || result == VK_NOT_READY);
 
             // Fence are added in order, so we can stop searching as soon
             // as we see one that's not ready.
             if (result == VK_NOT_READY) {
                 return fenceSerial;
+            } else {
+                DAWN_TRY(CheckVkSuccess(::VkResult(result), "GetFenceStatus"));
             }
+
             // Update fenceSerial since fence is ready.
             fenceSerial = tentativeSerial;
 
diff --git a/src/dawn_native/vulkan/DeviceVk.h b/src/dawn_native/vulkan/DeviceVk.h
index 3e4bff1..ae6032c 100644
--- a/src/dawn_native/vulkan/DeviceVk.h
+++ b/src/dawn_native/vulkan/DeviceVk.h
@@ -174,7 +174,7 @@
         std::unique_ptr<external_semaphore::Service> mExternalSemaphoreService;
 
         ResultOrError<VkFence> GetUnusedFence();
-        ExecutionSerial CheckAndUpdateCompletedSerials() override;
+        ResultOrError<ExecutionSerial> CheckAndUpdateCompletedSerials() override;
 
         // We track which operations are in flight on the GPU with an increasing serial.
         // This works only because we have a single queue. Each submit to a queue is associated
diff --git a/src/tests/white_box/D3D12DescriptorHeapTests.cpp b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
index 0fb70fb..4f67ca7 100644
--- a/src/tests/white_box/D3D12DescriptorHeapTests.cpp
+++ b/src/tests/white_box/D3D12DescriptorHeapTests.cpp
@@ -245,7 +245,7 @@
         EXPECT_TRUE(std::find(heaps.begin(), heaps.end(), heap) == heaps.end());
         heaps.push_back(heap);
         // CheckPassedSerials() will update the last internally completed serial.
-        mD3DDevice->CheckPassedSerials();
+        EXPECT_TRUE(mD3DDevice->CheckPassedSerials().IsSuccess());
         // NextSerial() will increment the last internally submitted serial.
         EXPECT_TRUE(mD3DDevice->NextSerial().IsSuccess());
     }
@@ -258,7 +258,7 @@
         ComPtr<ID3D12DescriptorHeap> heap = allocator->GetShaderVisibleHeap();
         EXPECT_TRUE(heaps.front() == heap);
         heaps.pop_front();
-        mD3DDevice->CheckPassedSerials();
+        EXPECT_TRUE(mD3DDevice->CheckPassedSerials().IsSuccess());
         EXPECT_TRUE(mD3DDevice->NextSerial().IsSuccess());
     }