[dawn][emscripten] Make DeviceLost callback pass null on device drop

Per upstream spec, DeviceLost callback should pass a null device if the
device's last ref has been, or is being, dropped.
https://github.com/webgpu-native/webgpu-headers/pull/532

In Emscripten, also fix a leak of the WGPUDeviceImpl when requestDevice
fails, and assert that GetLostFuture can't be called on imported
devices.

Fixed: 415094789
Change-Id: I58b6685874aca09bcd42beeb45337a7e8c4c8016
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/241396
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 2d63972..a59e65a 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -223,6 +223,7 @@
         mCallback(&device, ToAPI(mReason), ToOutputStringView(mMessage), userdata1, userdata2);
     }
 
+    // Break the ref cycle between DeviceBase and DeviceLostEvent.
     mDevice = nullptr;
 }
 
@@ -473,6 +474,13 @@
         // the lock.
         auto deviceLock(GetScopedLock());
 
+        // Set DeviceLostEvent to pass a null device to the callback (which may happen in Destroy()
+        // depending on the CallbackMode). This also makes DeviceLostEvent skip unregistering the
+        // UncapturedError and Logging callbacks; they'll be unregistered later in this function.
+        if (mLostEvent) {
+            mLostEvent->mDevice = nullptr;
+        }
+
         // DeviceBase uses RefCountedWithExternalCount to break refcycles.
         //
         // DeviceBase holds multiple Refs to various API objects (pipelines, buffers, etc.) which
@@ -502,7 +510,7 @@
 
     auto deviceLock(GetScopedLock());
     // Drop the device's reference to the queue. Because the application dropped the last external
-    // references, they can no longer get the queue from APIGetQueue().
+    // reference, it's UB if they try to get the queue from APIGetQueue().
     mQueue = nullptr;
 
     // Reset callbacks since after dropping the last external reference, the application may have
diff --git a/src/dawn/tests/DawnTest.cpp b/src/dawn/tests/DawnTest.cpp
index fc8343d..ac04e86 100644
--- a/src/dawn/tests/DawnTest.cpp
+++ b/src/dawn/tests/DawnTest.cpp
@@ -1214,9 +1214,10 @@
     DAWN_ASSERT(apiDevice);
 
     // The loss of the device is expected to happen at the end of the test so add it directly.
-    EXPECT_CALL(mDeviceLostCallback,
-                Call(CHandleIs(apiDevice.Get()), wgpu::DeviceLostReason::Destroyed, _))
-        .Times(AtMost(1));
+    // We don't know if the device will be dropped or Destroy()ed, so we can't check device=null.
+    EXPECT_CALL(mDeviceLostCallback, Call(_, wgpu::DeviceLostReason::Destroyed, _))
+        .Times(AtMost(1))
+        .RetiresOnSaturation();
 
     apiDevice.SetLoggingCallback([](wgpu::LoggingType type, wgpu::StringView message) {
         std::string_view view = {message.data, message.length};
diff --git a/src/dawn/tests/unittests/native/mocks/DawnMockTest.cpp b/src/dawn/tests/unittests/native/mocks/DawnMockTest.cpp
index 8077a73..893fbdc 100644
--- a/src/dawn/tests/unittests/native/mocks/DawnMockTest.cpp
+++ b/src/dawn/tests/unittests/native/mocks/DawnMockTest.cpp
@@ -69,8 +69,7 @@
         return;
     }
 
-    EXPECT_CALL(mDeviceLostCallback,
-                Call(CHandleIs(device.Get()), wgpu::DeviceLostReason::Destroyed, _))
+    EXPECT_CALL(mDeviceLostCallback, Call(CHandleIs(nullptr), wgpu::DeviceLostReason::Destroyed, _))
         .Times(AtMost(1));
 
     // Since the device owns the instance in these tests, we need to explicitly verify that the
diff --git a/src/emdawnwebgpu/tests/FuturesTests.cpp b/src/emdawnwebgpu/tests/FuturesTests.cpp
index 296ccea1..b36d1a2 100644
--- a/src/emdawnwebgpu/tests/FuturesTests.cpp
+++ b/src/emdawnwebgpu/tests/FuturesTests.cpp
@@ -116,7 +116,7 @@
 
 TEST_F(AdapterLevelTests, RequestDeviceThenDestroy) {
     wgpu::Device device = nullptr;
-    wgpu::DeviceLostReason reason;
+    wgpu::DeviceLostReason reason{};
 
     wgpu::DeviceDescriptor descriptor = {};
     descriptor.SetDeviceLostCallback(
@@ -134,12 +134,17 @@
 }
 
 TEST_F(AdapterLevelTests, RequestDeviceThenDrop) {
-    wgpu::DeviceLostReason reason;
+    wgpu::DeviceLostReason reason{};
 
     wgpu::DeviceDescriptor descriptor = {};
     descriptor.SetDeviceLostCallback(
         wgpu::CallbackMode::AllowSpontaneous,
-        [&reason](const wgpu::Device&, wgpu::DeviceLostReason r, wgpu::StringView) { reason = r; });
+        [&reason](const wgpu::Device& d, wgpu::DeviceLostReason r, wgpu::StringView) {
+            reason = r;
+            // d should be null even though this is called during wgpuDeviceRelease()
+            // so the allocation hasn't been freed yet.
+            EXPECT_EQ(nullptr, d.Get());
+        });
     wgpu::Device device = RequestDevice(&descriptor);
 
     auto deviceLostFuture = device.GetLostFuture();
diff --git a/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp b/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp
index 9ae6dea..badfc9c 100644
--- a/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp
+++ b/third_party/emdawnwebgpu/pkg/webgpu/src/webgpu.cpp
@@ -168,7 +168,7 @@
     return false;
   }
 
-  bool IsImported() { return mIsImportedFromJS; }
+  bool IsImported() const { return mIsImportedFromJS; }
 
  private:
   std::atomic<uint64_t> mRefCount = 1;
@@ -382,7 +382,6 @@
 };
 
 class EventManager;
-class TrackedEvent;
 
 class TrackedEvent : NonMovable {
  public:
@@ -730,6 +729,8 @@
   WGPUQueueImpl(const EventSource* source);
 };
 
+enum class DropDeviceFromDeviceLostEvent : bool { No, Yes };
+
 // Device is specially implemented in order to handle refcounting the Queue.
 struct WGPUDeviceImpl final : public EventSource,
                               public RefCountedWithExternalCount {
@@ -741,7 +742,7 @@
   // Injection constructor used when we already have a backing Device.
   WGPUDeviceImpl(const EventSource* source, WGPUQueue queue);
 
-  void Destroy();
+  void Destroy(DropDeviceFromDeviceLostEvent dropDevice);
   WGPUQueue GetQueue() const;
   WGPUFuture GetLostFuture() const;
 
@@ -936,7 +937,15 @@
 
   EventType GetType() override { return kType; }
 
-  void ReadyHook(WGPUDeviceLostReason reason, const char* message) {
+  // If dropDevice=Yes, drops the DeviceLostEvent->WGPUDeviceImpl ref so the
+  // device won't be passed to the callback, in preparation to free the device.
+  void ReadyHook(DropDeviceFromDeviceLostEvent dropDevice,
+                 WGPUDeviceLostReason reason,
+                 const char* message) {
+    if (dropDevice == DropDeviceFromDeviceLostEvent::Yes) {
+      mDevice = nullptr;
+    }
+
     mReason = reason;
     if (message) {
       mMessage = message;
@@ -949,9 +958,7 @@
       mMessage = "A valid external Instance reference no longer exists.";
     }
     if (mCallback) {
-      WGPUDevice device = mReason != WGPUDeviceLostReason_FailedCreation
-                              ? mDevice.Get()
-                              : nullptr;
+      WGPUDevice device = mDevice.Get();
       mCallback(&device, mReason, ToOutputStringView(mMessage), mUserdata1,
                 mUserdata2);
     }
@@ -1270,7 +1277,8 @@
 void emwgpuOnDeviceLostCompleted(double futureId,
                                  WGPUDeviceLostReason reason,
                                  const char* message) {
-  GetEventManager().SetFutureReady<DeviceLostEvent>(futureId, reason, message);
+  GetEventManager().SetFutureReady<DeviceLostEvent>(
+      futureId, DropDeviceFromDeviceLostEvent::No, reason, message);
 }
 void emwgpuOnMapAsyncCompleted(double futureId,
                                WGPUMapAsyncStatus status,
@@ -1311,8 +1319,12 @@
     GetEventManager().SetFutureReady<RequestDeviceEvent>(futureId, status,
                                                          nullptr, message);
     GetEventManager().SetFutureReady<DeviceLostEvent>(
-        device->GetLostFuture().id, WGPUDeviceLostReason_FailedCreation,
-        "Device failed at creation.");
+        device->GetLostFuture().id, DropDeviceFromDeviceLostEvent::Yes,
+        WGPUDeviceLostReason_FailedCreation, "Device creation failed.");
+
+    // Free the device now that there should be no pointers to it.
+    [[maybe_unused]] bool deviceFreed = device->Release();
+    assert(deviceFreed);
   }
 }
 void emwgpuOnWorkDoneCompleted(double futureId,
@@ -1489,7 +1501,13 @@
   mQueue.Acquire(queue);
 }
 
-void WGPUDeviceImpl::Destroy() {
+void WGPUDeviceImpl::Destroy(DropDeviceFromDeviceLostEvent dropDevice) {
+  // Ready the DeviceLostEvent now. dropDevice=Yes ensures we can't later
+  // try to pass an invalid device pointer to the callback.
+  GetEventManager().SetFutureReady<DeviceLostEvent>(
+      mDeviceLostFutureId, dropDevice, WGPUDeviceLostReason_Destroyed,
+      "Device was destroyed.");
+
   emwgpuDeviceDestroy(this);
 }
 
@@ -1499,6 +1517,11 @@
 }
 
 WGPUFuture WGPUDeviceImpl::GetLostFuture() const {
+  if (IsImported()) {
+    DEBUG_PRINTF("GetLostFuture cannot be called on an imported device.");
+    assert(false);
+  }
+  assert(mDeviceLostFutureId != kNullFutureId);
   return WGPUFuture{mDeviceLostFutureId};
 }
 
@@ -1515,8 +1538,13 @@
 }
 
 void WGPUDeviceImpl::WillDropLastExternalRef() {
-  if (!IsImported()) {
-    Destroy();
+  if (IsImported()) {
+    // Note Destroy is responsible for readying the DeviceLostEvent. If the
+    // device was imported, that doesn't happen, which is fine because no
+    // callback can have been set.
+    assert(mDeviceLostFutureId == kNullFutureId);
+  } else {
+    Destroy(DropDeviceFromDeviceLostEvent::Yes);
   }
 }
 
@@ -1852,7 +1880,7 @@
 }
 
 void wgpuDeviceDestroy(WGPUDevice device) {
-  device->Destroy();
+  device->Destroy(DropDeviceFromDeviceLostEvent::No);
 }
 
 WGPUFuture wgpuDeviceGetLostFuture(WGPUDevice device) {