Don't forward device lost errors to the uncaptured error callback

This issue was discovered in http://crrev.com/c/2613517 where a
device lost error on page teardown was bubbling up to the Renderer's
uncaptured error callback.

Bug: chromium:1160459
Change-Id: I64b8c7779f4808d5a4b87c131aaf2e041c512bb9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/36960
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/Device.cpp b/src/dawn_native/Device.cpp
index 72d7606..390ed57 100644
--- a/src/dawn_native/Device.cpp
+++ b/src/dawn_native/Device.cpp
@@ -212,10 +212,15 @@
     }
 
     void DeviceBase::HandleError(InternalErrorType type, const char* message) {
-        // If we receive an internal error, assume the backend can't recover and proceed with
-        // device destruction. We first wait for all previous commands to be completed so that
-        // backend objects can be freed immediately, before handling the loss.
-        if (type == InternalErrorType::Internal) {
+        if (type == InternalErrorType::DeviceLost) {
+            // A real device lost happened. Set the state to disconnected as the device cannot be
+            // used.
+            mState = State::Disconnected;
+        } else if (type == InternalErrorType::Internal) {
+            // If we receive an internal error, assume the backend can't recover and proceed with
+            // device destruction. We first wait for all previous commands to be completed so that
+            // backend objects can be freed immediately, before handling the loss.
+
             // Move away from the Alive state so that the application cannot use this device
             // anymore.
             // TODO(cwallez@chromium.org): Do we need atomics for this to become visible to other
@@ -240,7 +245,7 @@
             mDeviceLostCallback = nullptr;
         }
 
-        // Still forward device loss and internal errors to the error scopes so they all reject.
+        // Still forward device loss errors to the error scopes so they all reject.
         mCurrentErrorScope->HandleError(ToWGPUErrorType(type), message);
     }
 
@@ -322,7 +327,7 @@
         if (DAWN_LIKELY(mState == State::Alive)) {
             return {};
         }
-        return DAWN_DEVICE_LOST_ERROR("Device is lost");
+        return DAWN_VALIDATION_ERROR("Device is lost");
     }
 
     void DeviceBase::LoseForTesting() {
diff --git a/src/dawn_native/Error.h b/src/dawn_native/Error.h
index 3d5d5c3..4715c83 100644
--- a/src/dawn_native/Error.h
+++ b/src/dawn_native/Error.h
@@ -26,7 +26,6 @@
         Validation,
         DeviceLost,
         Internal,
-        Unimplemented,
         OutOfMemory
     };
 
@@ -72,11 +71,23 @@
 
 #define DAWN_MAKE_ERROR(TYPE, MESSAGE) \
     ::dawn_native::ErrorData::Create(TYPE, MESSAGE, __FILE__, __func__, __LINE__)
+
 #define DAWN_VALIDATION_ERROR(MESSAGE) DAWN_MAKE_ERROR(InternalErrorType::Validation, MESSAGE)
+
+// DAWN_DEVICE_LOST_ERROR means that there was a real unrecoverable native device lost error.
+// We can't even do a graceful shutdown because the Device is gone.
 #define DAWN_DEVICE_LOST_ERROR(MESSAGE) DAWN_MAKE_ERROR(InternalErrorType::DeviceLost, MESSAGE)
+
+// DAWN_INTERNAL_ERROR means Dawn hit an unexpected error in the backend and should try to
+// gracefully shut down.
 #define DAWN_INTERNAL_ERROR(MESSAGE) DAWN_MAKE_ERROR(InternalErrorType::Internal, MESSAGE)
+
 #define DAWN_UNIMPLEMENTED_ERROR(MESSAGE) \
     DAWN_MAKE_ERROR(InternalErrorType::Internal, std::string("Unimplemented: ") + MESSAGE)
+
+// DAWN_OUT_OF_MEMORY_ERROR means we ran out of memory. It may be used as a signal internally in
+// Dawn to free up unused resources. Or, it may bubble up to the application to signal an allocation
+// was too large or they should free some existing resources.
 #define DAWN_OUT_OF_MEMORY_ERROR(MESSAGE) DAWN_MAKE_ERROR(InternalErrorType::OutOfMemory, MESSAGE)
 
 #define DAWN_CONCAT1(x, y) x##y
diff --git a/src/dawn_native/ErrorScope.cpp b/src/dawn_native/ErrorScope.cpp
index 674bb7a..314b422 100644
--- a/src/dawn_native/ErrorScope.cpp
+++ b/src/dawn_native/ErrorScope.cpp
@@ -87,14 +87,23 @@
                     consumed = true;
                     break;
 
-                // Unknown and DeviceLost are fatal. All error scopes capture them.
+                // DeviceLost is fatal. All error scopes capture them.
                 // |consumed| is false because these should bubble to all scopes.
-                case wgpu::ErrorType::Unknown:
                 case wgpu::ErrorType::DeviceLost:
                     consumed = false;
+                    if (currentScope->mErrorType != wgpu::ErrorType::DeviceLost) {
+                        // DeviceLost overrides any other error that is not a DeviceLost.
+                        currentScope->mErrorType = type;
+                        currentScope->mErrorMessage = message;
+                    }
                     break;
 
+                case wgpu::ErrorType::Unknown:
+                    // Means the scope was destroyed before contained work finished.
+                    // This happens when you destroy the device while there's pending work.
+                    // That's handled in ErrorScope::UnlinkForShutdownImpl, not here.
                 case wgpu::ErrorType::NoError:
+                    // Not considered an error, and should never be passed to HandleError.
                     UNREACHABLE();
                     return;
             }
@@ -111,8 +120,10 @@
         }
 
         // The root error scope captures all uncaptured errors.
+        // Except, it should not capture device lost errors since those go to
+        // the device lost callback.
         ASSERT(currentScope->IsRoot());
-        if (currentScope->mCallback) {
+        if (currentScope->mCallback && type != wgpu::ErrorType::DeviceLost) {
             currentScope->mCallback(static_cast<WGPUErrorType>(type), message,
                                     currentScope->mUserdata);
         }
diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn
index 258d26e..10ab9c5 100644
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@@ -264,6 +264,7 @@
 
   sources = [
     "DawnTest.h",
+    "MockCallback.h",
     "end2end/BasicTests.cpp",
     "end2end/BindGroupTests.cpp",
     "end2end/BufferTests.cpp",
diff --git a/src/tests/end2end/DeviceLostTests.cpp b/src/tests/end2end/DeviceLostTests.cpp
index ba75364..d537008 100644
--- a/src/tests/end2end/DeviceLostTests.cpp
+++ b/src/tests/end2end/DeviceLostTests.cpp
@@ -15,6 +15,7 @@
 #include "tests/DawnTest.h"
 
 #include <gmock/gmock.h>
+#include "tests/MockCallback.h"
 #include "utils/ComboRenderPipelineDescriptor.h"
 #include "utils/WGPUHelpers.h"
 
@@ -516,6 +517,19 @@
     device.LoseForTesting();
 }
 
+TEST_P(DeviceLostTest, DeviceLostDoesntCallUncapturedError) {
+    // Set no callback.
+    device.SetDeviceLostCallback(nullptr, nullptr);
+
+    // Set the uncaptured error callback which should not be called on
+    // device lost.
+    MockCallback<WGPUErrorCallback> mockErrorCallback;
+    device.SetUncapturedErrorCallback(mockErrorCallback.Callback(),
+                                      mockErrorCallback.MakeUserdata(nullptr));
+    EXPECT_CALL(mockErrorCallback, Call(_, _, _)).Times(Exactly(0));
+    device.LoseForTesting();
+}
+
 DAWN_INSTANTIATE_TEST(DeviceLostTest,
                       D3D12Backend(),
                       MetalBackend(),