[dawn][native] Updates buffer mapping w.r.t device lock. The crux of this change is to make it such that users can re-entrantly use *MappedRange functions and Unmap inside of the MapAsync callback if the callback is successful. If the callback was not successful, re-entrant API usage is not allowed. - Renames GetMappedPointer->GetMappedPointerImpl for the frontend class, and then implements GetMappedPointer to take into account the staging buffer. This is more in line with how the rest of the Buffer's *Impl functions look and are handled. This also allows some cleanup of overly specialized code. - Coalesces buffer mapping state for clarity. - Updates |completedSerial| in QueueAndSerial to be atomic because spontaneous events may modify and read it concurrently. - Removes device wide lock acquiring when calling GetMappedRange and its variants now that we have an atomic to guarantee some code paths to be valid. This should increase performance on these buffers also. - Refactors Unmap a bit to differentiate internal and external usages. - Manually scope the device lock in Unmap to ensure that the callbacks are triggered in the right places. Bug: 412761228, 418165343, 42241400, 40643114, 417802523 Change-Id: If5ea22f81c6d305d1dee90eb3cf6276061e2f888 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/242715 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Kai Ninomiya <kainino@chromium.org> Commit-Queue: Loko Kung <lokokung@google.com>

commit: 8c55820034cee5d7146587f35ab04c9456308244 [log] [tgz]
author: Lokbondo Kung <lokokung@google.com> Mon Jun 23 14:32:58 2025 -0700
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Jun 23 14:32:58 2025 -0700
tree: 601eb75d8eee4051d945f67f39eed1d6bb5b5d07
parent: 676d919522185dec21d2471fc9a8d3fd32894176 [diff]
diff --git a/src/dawn/dawn.json b/src/dawn/dawn.json
index f8aac7e..0ecd471 100644
--- a/src/dawn/dawn.json
+++ b/src/dawn/dawn.json

@@ -594,6 +594,7 @@
             },
             {
                 "name": "get mapped range",
+                "no autolock": true,
                 "returns": "void *",
                 "args": [
                     {"name": "offset", "type": "size_t", "default": 0},
@@ -602,6 +603,7 @@
             },
             {
                 "name": "get const mapped range",
+                "no autolock": true,
                 "returns": "void const *",
                 "args": [
                     {"name": "offset", "type": "size_t", "default": 0},
@@ -610,6 +612,7 @@
             },
             {
                 "name": "write mapped range",
+                "no autolock": true,
                 "returns": "status",
                 "args": [
                     {"name": "offset", "type": "size_t"},
@@ -619,6 +622,7 @@
             },
             {
                 "name": "read mapped range",
+                "no autolock": true,
                 "returns": "status",
                 "args": [
                     {"name": "offset", "type": "size_t"},
@@ -648,7 +652,8 @@
                 "returns": "buffer map state"
             },
             {
-                "name": "unmap"
+                "name": "unmap",
+                "no autolock": true
             },
             {
                 "name": "destroy"

diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index 1337cca..ebbd3ab 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp

@@ -95,7 +95,7 @@
         DAWN_UNREACHABLE();
     }
 
-    void* GetMappedPointer() override { return mFakeMappedData.get(); }
+    void* GetMappedPointerImpl() override { return mFakeMappedData.get(); }
 
     void UnmapImpl() override { mFakeMappedData.reset(); }
 
@@ -243,7 +243,6 @@
 
         bool error = false;
         BufferErrorData pendingErrorData;
-        Ref<MapAsyncEvent> pendingMapEvent;
 
         // Lock the buffer / error. This may race with UnmapEarly which occurs when the buffer is
         // unmapped or destroyed.
@@ -254,12 +253,7 @@
                 error = true;
             } else if (auto** buffer = std::get_if<BufferBase*>(&*bufferOrError)) {
                 // Set the buffer state to Mapped if this pending map succeeded.
-                // TODO(crbug.com/dawn/831): in order to be thread safe, mutation of the
-                // state and pending map event needs to be atomic w.r.t. UnmapInternal.
-                DAWN_ASSERT((*buffer)->mState == BufferState::PendingMap);
-                (*buffer)->mState = BufferState::Mapped;
-
-                pendingMapEvent = std::move((*buffer)->mPendingMapEvent);
+                (*buffer)->SetMapped(BufferState::Mapped);
             }
         });
         if (error) {
@@ -377,35 +371,52 @@
 }
 
 BufferBase::~BufferBase() {
-    DAWN_ASSERT(mState == BufferState::Unmapped || mState == BufferState::Destroyed ||
+    BufferState state = mState.load(std::memory_order::acquire);
+    DAWN_ASSERT(state == BufferState::Unmapped || state == BufferState::Destroyed ||
                 // Happens if the buffer was created mappedAtCreation *after* device destroy.
                 // TODO(crbug.com/42241190): This shouldn't be needed once the issue above is fixed,
-                // because then mState will just be Destroyed.
-                (mState == BufferState::MappedAtCreation &&
+                // because then bufferState will just be Destroyed.
+                (state == BufferState::MappedAtCreation &&
                  GetDevice()->GetState() == DeviceBase::State::Destroyed));
 }
 
 void BufferBase::DestroyImpl() {
-    // TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
-    // - It may be called if the buffer is explicitly destroyed with APIDestroy.
-    //   This case is NOT thread-safe and needs proper synchronization with other
-    //   simultaneous uses of the buffer.
-    // - It may be called when the last ref to the buffer is dropped and the buffer
-    //   is implicitly destroyed. This case is thread-safe because there are no
-    //   other threads using the buffer since there are no other live refs.
-    if (mState == BufferState::Mapped || mState == BufferState::PendingMap) {
-        UnmapInternal(WGPUMapAsyncStatus_Aborted,
-                      "Buffer was destroyed before mapping was resolved.");
-    } else if (mState == BufferState::MappedAtCreation) {
-        if (mStagingBuffer != nullptr) {
-            mStagingBuffer = nullptr;
-        } else if (mSize != 0) {
-            UnmapInternal(WGPUMapAsyncStatus_Aborted,
-                          "Buffer was destroyed before mapping was resolved.");
-        }
-    }
+    Ref<MapAsyncEvent> event;
 
-    mState = BufferState::Destroyed;
+    switch (mState.load(std::memory_order::acquire)) {
+        case BufferState::Mapped:
+        case BufferState::PendingMap: {
+            [[maybe_unused]] bool hadError = GetDevice()->ConsumedError(
+                UnmapInternal(WGPUMapAsyncStatus_Aborted,
+                              "Buffer was destroyed before mapping was resolved."),
+                &event, "calling %s.Destroy().", this);
+            break;
+        }
+        case BufferState::MappedAtCreation: {
+            if (mStagingBuffer != nullptr) {
+                mStagingBuffer = nullptr;
+            } else if (mSize != 0) {
+                [[maybe_unused]] bool hadError = GetDevice()->ConsumedError(
+                    UnmapInternal(WGPUMapAsyncStatus_Aborted,
+                                  "Buffer was destroyed before mapping was resolved."),
+                    &event, "calling %s.Destroy().", this);
+            }
+            break;
+        }
+        default:
+            break;
+    }
+    mState.store(BufferState::Destroyed, std::memory_order::release);
+
+    // This is the error cases where re-entrant API calls, specifically Unmap will fail since
+    // this function is called in two places, via Buffer::APIDestroy and Device::Destroy, both which
+    // currently hold the device-wide lock which we don't yet have a way to circumvent and release
+    // before the callback is called (spontaneously). That said, this only happens if a user is
+    // calling Unmap in the MapAsync callback even though the callback was Aborted which is an
+    // invalid use case.
+    if (event) {
+        GetInstance()->GetEventManager()->SetFutureReady(event.Get());
+    }
 }
 
 // static
@@ -442,7 +453,7 @@
 }
 
 wgpu::BufferMapState BufferBase::APIGetMapState() const {
-    switch (mState) {
+    switch (mState.load(std::memory_order::acquire)) {
         case BufferState::Mapped:
         case BufferState::MappedAtCreation:
             return wgpu::BufferMapState::Mapped;
@@ -457,33 +468,42 @@
     }
 }
 
-MaybeError BufferBase::MapAtCreation() {
-    DAWN_TRY(MapAtCreationInternal());
+void BufferBase::SetMapped(BufferState newState) {
+    // There are only 2 valid transitions:
+    //   1) Nominal: PendingMap -> Mapped
+    //   2) MappedAtCreation case because initial state is unmapped: Unmapped -> MappedAtCreation.
+    BufferState oldState = mState.load(std::memory_order::acquire);
+    DAWN_ASSERT((oldState == BufferState::PendingMap && newState == BufferState::Mapped) ||
+                (oldState == BufferState::Unmapped && newState == BufferState::MappedAtCreation));
 
-    void* ptr;
-    size_t size;
-    if (mSize == 0) {
-        return {};
-    } else if (mStagingBuffer != nullptr) {
-        // If there is a staging buffer for initialization, clear its contents directly.
-        // It should be exactly as large as the buffer allocation.
-        ptr = mStagingBuffer->GetMappedPointer();
-        size = mStagingBuffer->GetSize();
-        DAWN_ASSERT(size == GetAllocatedSize());
+    if (mStagingBuffer) {
+        mMapData = mStagingBuffer->GetMappedPointerImpl();
+    } else if (GetSize() == 0) {
+        mMapData = static_cast<void*>(&sZeroSizedMappingData);
     } else {
-        // Otherwise, the buffer is directly mappable on the CPU.
-        ptr = GetMappedPointer();
-        size = GetAllocatedSize();
+        mMapData = GetMappedPointerImpl();
     }
 
+    mState.store(newState, std::memory_order::release);
+}
+
+MaybeError BufferBase::MapAtCreation() {
+    bool usingStagingBuffer = false;
+    DAWN_TRY_ASSIGN(usingStagingBuffer, MapAtCreationInternal());
+
+    if (GetSize() == 0) {
+        return {};
+    }
+    size_t size = GetAllocatedSize();
+    void* ptr = GetMappedPointer();
+
     DeviceBase* device = GetDevice();
     if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) &&
         !device->IsToggleEnabled(Toggle::DisableLazyClearForMappedAtCreationBuffer)) {
-        // The staging buffer is created with `MappedAtCreation == true` so we don't need to clear
-        // it again.
-        if (mStagingBuffer != nullptr) {
-            DAWN_ASSERT(!mStagingBuffer->NeedsInitialization());
-        } else {
+        // The staging buffer is created with `MappedAtCreation == true` and the main buffer will
+        // actually get initialized when the staging data is copied in. (But we mark the main buffer
+        // as initialized now.)
+        if (!usingStagingBuffer) {
             memset(ptr, uint8_t(0u), size);
             device->IncrementLazyClearCountForTesting();
         }
@@ -497,11 +517,9 @@
     return {};
 }
 
-MaybeError BufferBase::MapAtCreationInternal() {
-    DAWN_ASSERT(mState == BufferState::Unmapped);
-
-    mMapOffset = 0;
-    mMapSize = mSize;
+ResultOrError<bool> BufferBase::MapAtCreationInternal() {
+    DAWN_ASSERT(mState.load(std::memory_order::acquire) == BufferState::Unmapped);
+    Ref<BufferBase> stagingBuffer;
 
     // 0-sized buffers are not supposed to be written to. Return back any non-null pointer.
     // Skip handling 0-sized buffers so we don't try to map them in the backend.
@@ -522,21 +540,24 @@
             stagingBufferDesc.size = Align(GetAllocatedSize(), 4);
             stagingBufferDesc.mappedAtCreation = true;
             stagingBufferDesc.label = "Dawn_MappedAtCreationStaging";
-            DAWN_TRY_ASSIGN(mStagingBuffer, GetDevice()->CreateBuffer(&stagingBufferDesc));
+            DAWN_TRY_ASSIGN(stagingBuffer, GetDevice()->CreateBuffer(&stagingBufferDesc));
         }
     }
 
-    // Only set the state to mapped at creation if we did no fail any point in this helper.
+    // Only set the state to mapped at creation if we did not fail any point in this helper.
     // Otherwise, if we override the default unmapped state before succeeding to create a
     // staging buffer, we will have issues when we try to destroy the buffer.
-    mState = BufferState::MappedAtCreation;
-    return {};
+    mMapOffset = 0;
+    mMapSize = mSize;
+    mStagingBuffer = std::move(stagingBuffer);
+    SetMapped(BufferState::MappedAtCreation);
+    return mStagingBuffer != nullptr;
 }
 
 MaybeError BufferBase::ValidateCanUseOnQueueNow() const {
     DAWN_ASSERT(!IsError());
 
-    switch (mState) {
+    switch (mState.load(std::memory_order::acquire)) {
         case BufferState::Destroyed:
             return DAWN_VALIDATION_ERROR("%s used in submit while destroyed.", this);
         case BufferState::Mapped:
@@ -561,10 +582,8 @@
     // (note, not raise a validation error to the device) and return the null future.
     DAWN_ASSERT(callbackInfo.nextInChain == nullptr);
 
-    Ref<EventManager::TrackedEvent> event;
+    Ref<MapAsyncEvent> event;
     {
-        // TODO(crbug.com/dawn/831) Manually acquire device lock instead of relying on code-gen for
-        // re-entrancy.
         auto deviceLock(GetDevice()->GetScopedLock());
 
         // Handle the defaulting of size required by WebGPU, even if in webgpu_cpp.h it is not
@@ -576,7 +595,7 @@
 
         WGPUMapAsyncStatus status = WGPUMapAsyncStatus_Error;
         MaybeError maybeError = [&]() -> MaybeError {
-            DAWN_INVALID_IF(mState == BufferState::PendingMap,
+            DAWN_INVALID_IF(mState.load(std::memory_order::acquire) == BufferState::PendingMap,
                             "%s already has an outstanding map pending.", this);
             DAWN_TRY(ValidateMapAsync(mode, offset, size, &status));
             DAWN_TRY(MapAsyncImpl(mode, offset, size));
@@ -594,13 +613,15 @@
             mMapMode = mode;
             mMapOffset = offset;
             mMapSize = size;
-            mState = BufferState::PendingMap;
-            mPendingMapEvent =
+
+            event =
                 AcquireRef(new MapAsyncEvent(GetDevice(), this, callbackInfo, mLastUsageSerial));
-            event = mPendingMapEvent;
+            mMapData = event;
+            mState.store(BufferState::PendingMap, std::memory_order::release);
         }
     }
 
+    DAWN_ASSERT(event);
     FutureID futureID = GetInstance()->GetEventManager()->TrackEvent(std::move(event));
     return {futureID};
 }
@@ -633,17 +654,20 @@
     return wgpu::Status::Success;
 }
 
+void* BufferBase::GetMappedPointer() {
+    BufferState state = mState.load(std::memory_order::acquire);
+    if (state != BufferState::MappedAtCreation && state != BufferState::Mapped) {
+        return nullptr;
+    }
+    void** ptr = std::get_if<void*>(&mMapData);
+    DAWN_ASSERT(ptr);
+    return *ptr;
+}
+
 void* BufferBase::GetMappedRange(size_t offset, size_t size, bool writable) {
     if (!CanGetMappedRange(writable, offset, size)) {
         return nullptr;
     }
-
-    if (mStagingBuffer != nullptr) {
-        return static_cast<uint8_t*>(mStagingBuffer->GetMappedPointer()) + offset;
-    }
-    if (mSize == 0) {
-        return &sZeroSizedMappingData;
-    }
     uint8_t* start = static_cast<uint8_t*>(GetMappedPointer());
     return start == nullptr ? nullptr : start + offset;
 }
@@ -668,41 +692,32 @@
 }
 
 void BufferBase::APIUnmap() {
-    if (GetDevice()->ConsumedError(ValidateUnmap(), "calling %s.Unmap().", this)) {
-        return;
+    Ref<MapAsyncEvent> event;
+    {
+        auto deviceLock(GetDevice()->GetScopedLock());
+        if (GetDevice()->ConsumedError(ValidateUnmap(), "calling %s.Unmap().", this)) {
+            return;
+        }
+        [[maybe_unused]] bool hadError = GetDevice()->ConsumedError(
+            UnmapInternal(WGPUMapAsyncStatus_Aborted,
+                          "Buffer was unmapped before mapping was resolved."),
+            &event, "calling %s.Unmap().", this);
     }
-    [[maybe_unused]] bool hadError =
-        GetDevice()->ConsumedError(Unmap(), "calling %s.Unmap().", this);
+
+    if (event) {
+        GetInstance()->GetEventManager()->SetFutureReady(event.Get());
+    }
 }
 
 MaybeError BufferBase::Unmap() {
-    if (mState == BufferState::Destroyed) {
-        return {};
-    }
-
-    // Make sure writes are now visible to the GPU if we used a staging buffer.
-    if (mState == BufferState::MappedAtCreation && mStagingBuffer != nullptr) {
-        DAWN_TRY(CopyFromStagingBuffer());
-    }
-    UnmapInternal(WGPUMapAsyncStatus_Aborted, "Buffer was unmapped before mapping was resolved.");
-    return {};
-}
-
-void BufferBase::UnmapInternal(WGPUMapAsyncStatus status, std::string_view message) {
-    // Unmaps resources on the backend.
-    switch (mState) {
-        case BufferState::PendingMap: {
-            // TODO(crbug.com/dawn/831): in order to be thread safe, mutation of the
-            // state and pending map event needs to be atomic w.r.t. MapAsyncEvent::Complete.
-            Ref<MapAsyncEvent> pendingMapEvent = std::move(mPendingMapEvent);
-            pendingMapEvent->UnmapEarly(status, message);
-            GetInstance()->GetEventManager()->SetFutureReady(pendingMapEvent.Get());
-            UnmapImpl();
-        } break;
+    switch (mState.load(std::memory_order::acquire)) {
         case BufferState::Mapped:
             UnmapImpl();
             break;
         case BufferState::MappedAtCreation:
+            if (mStagingBuffer != nullptr) {
+                DAWN_TRY(CopyFromStagingBuffer());
+            }
             if (mSize != 0 && IsCPUWritableAtCreation()) {
                 UnmapImpl();
             }
@@ -711,11 +726,41 @@
         case BufferState::HostMappedPersistent:
         case BufferState::SharedMemoryNoAccess:
             break;
+        case BufferState::PendingMap:
         case BufferState::Destroyed:
+            // Internal code should never be trying to unmap a pending or destroyed buffer.
             DAWN_UNREACHABLE();
     }
 
-    mState = BufferState::Unmapped;
+    mState.store(BufferState::Unmapped, std::memory_order::release);
+    return {};
+}
+
+ResultOrError<Ref<BufferBase::MapAsyncEvent>> BufferBase::UnmapInternal(WGPUMapAsyncStatus status,
+                                                                        std::string_view message) {
+    Ref<MapAsyncEvent> event;
+
+    BufferState state = mState.load(std::memory_order::acquire);
+
+    // If the buffer is already destroyed, we don't need to do anything.
+    if (state == BufferState::Destroyed) {
+        return Ref<MapAsyncEvent>(nullptr);
+    }
+
+    // For pending maps, set the pending event statuses, and return it. The caller is responsible
+    // for setting the event to be ready once we no longer are holding the device-wide lock.
+    if (state == BufferState::PendingMap) {
+        // For pending maps, we update the pending event, and only set it to ready after releasing
+        // the buffer state lock so that spontaneous callbacks with re-entrant calls work properly.
+        event = std::get<Ref<MapAsyncEvent>>(std::exchange(mMapData, static_cast<void*>(nullptr)));
+        event->UnmapEarly(status, message);
+        UnmapImpl();
+        mState.store(BufferState::Unmapped, std::memory_order::release);
+        return std::move(event);
+    }
+
+    DAWN_TRY(Unmap());
+    return Ref<MapAsyncEvent>(nullptr);
 }
 
 MaybeError BufferBase::ValidateMapAsync(wgpu::MapMode mode,
@@ -738,7 +783,7 @@
                     "Mapping range (offset:%u, size: %u) doesn't fit in the size (%u) of %s.",
                     offset, size, mSize, this);
 
-    switch (mState) {
+    switch (mState.load(std::memory_order::acquire)) {
         case BufferState::Mapped:
         case BufferState::MappedAtCreation:
             return DAWN_VALIDATION_ERROR("%s is already mapped.", this);
@@ -775,6 +820,39 @@
 }
 
 bool BufferBase::CanGetMappedRange(bool writable, size_t offset, size_t size) const {
+    // Note that:
+    //
+    //   - We don't check that the device is alive because the application can ask for the
+    //     mapped pointer before it knows, and even Dawn knows, that the device was lost, and
+    //     still needs to work properly.
+    //   - We don't check that the object is alive because we need to return mapped pointers
+    //     for error buffers too.
+
+    switch (mState.load(std::memory_order::acquire)) {
+        // It is never valid to call GetMappedRange on a host-mapped buffer.
+        // TODO(crbug.com/dawn/2018): consider returning the same pointer here.
+        case BufferState::HostMappedPersistent:
+            return false;
+
+        // Writeable Buffer::GetMappedRange is always allowed when mapped at creation.
+        case BufferState::MappedAtCreation:
+            break;
+
+        case BufferState::Mapped:
+            DAWN_ASSERT(bool{mMapMode & wgpu::MapMode::Read} ^
+                        bool{mMapMode & wgpu::MapMode::Write});
+            if (!writable || (mMapMode & wgpu::MapMode::Write)) {
+                break;
+            }
+            return false;
+
+        case BufferState::PendingMap:
+        case BufferState::Unmapped:
+        case BufferState::SharedMemoryNoAccess:
+        case BufferState::Destroyed:
+            return false;
+    }
+
     if (offset % 8 != 0 || offset < mMapOffset || offset > mSize) {
         return false;
     }
@@ -790,41 +868,12 @@
         return false;
     }
 
-    // Note that:
-    //
-    //   - We don't check that the device is alive because the application can ask for the
-    //     mapped pointer before it knows, and even Dawn knows, that the device was lost, and
-    //     still needs to work properly.
-    //   - We don't check that the object is alive because we need to return mapped pointers
-    //     for error buffers too.
-
-    switch (mState) {
-        // It is never valid to call GetMappedRange on a host-mapped buffer.
-        // TODO(crbug.com/dawn/2018): consider returning the same pointer here.
-        case BufferState::HostMappedPersistent:
-            return false;
-
-        // Writeable Buffer::GetMappedRange is always allowed when mapped at creation.
-        case BufferState::MappedAtCreation:
-            return true;
-
-        case BufferState::Mapped:
-            DAWN_ASSERT(bool{mMapMode & wgpu::MapMode::Read} ^
-                        bool{mMapMode & wgpu::MapMode::Write});
-            return !writable || (mMapMode & wgpu::MapMode::Write);
-
-        case BufferState::PendingMap:
-        case BufferState::Unmapped:
-        case BufferState::SharedMemoryNoAccess:
-        case BufferState::Destroyed:
-            return false;
-    }
-    DAWN_UNREACHABLE();
+    return true;
 }
 
 MaybeError BufferBase::ValidateUnmap() const {
     DAWN_TRY(GetDevice()->ValidateIsAlive());
-    DAWN_INVALID_IF(mState == BufferState::HostMappedPersistent,
+    DAWN_INVALID_IF(mState.load(std::memory_order::acquire) == BufferState::HostMappedPersistent,
                     "Persistently mapped buffer cannot be unmapped.");
     return {};
 }
@@ -857,22 +906,22 @@
 }
 
 ExecutionSerial BufferBase::OnEndAccess() {
-    mState = BufferState::SharedMemoryNoAccess;
+    mState.store(BufferState::SharedMemoryNoAccess, std::memory_order::release);
     ExecutionSerial lastUsageSerial = mLastUsageSerial;
     mLastUsageSerial = kBeginningOfGPUTime;
     return lastUsageSerial;
 }
 
 void BufferBase::OnBeginAccess() {
-    mState = BufferState::Unmapped;
+    mState.store(BufferState::Unmapped, std::memory_order::release);
 }
 
 bool BufferBase::HasAccess() const {
-    return mState != BufferState::SharedMemoryNoAccess;
+    return mState.load(std::memory_order::acquire) != BufferState::SharedMemoryNoAccess;
 }
 
 bool BufferBase::IsDestroyed() const {
-    return mState == BufferState::Destroyed;
+    return mState.load(std::memory_order::acquire) == BufferState::Destroyed;
 }
 
 void BufferBase::SetInitialized(bool initialized) {

diff --git a/src/dawn/native/Buffer.h b/src/dawn/native/Buffer.h
index a6484f1..f85a41a 100644
--- a/src/dawn/native/Buffer.h
+++ b/src/dawn/native/Buffer.h

@@ -28,6 +28,7 @@
 #ifndef SRC_DAWN_NATIVE_BUFFER_H_
 #define SRC_DAWN_NATIVE_BUFFER_H_
 
+#include <atomic>
 #include <functional>
 #include <memory>
 
@@ -49,8 +50,6 @@
 struct CopyTextureToBufferCmd;
 class MemoryDump;
 
-enum class MapType : uint32_t;
-
 ResultOrError<UnpackedPtr<BufferDescriptor>> ValidateBufferDescriptor(
     DeviceBase* device,
     const BufferDescriptor* descriptor);
@@ -118,8 +117,12 @@
     void SetInitialized(bool initialized) override;
     bool IsInitialized() const override;
 
-    virtual void* GetMappedPointer() = 0;
+    void* GetMappedPointer();
     void* GetMappedRange(size_t offset, size_t size, bool writable = true);
+
+    // Internal non-reentrant version of Unmap. This is used in workarounds or additional copies.
+    // Note that this will fail if the map event is pending since that should never happen
+    // internally.
     MaybeError Unmap();
 
     void DumpMemoryStatistics(dawn::native::MemoryDump* dump, const char* prefix) const;
@@ -147,15 +150,20 @@
 
     ~BufferBase() override;
 
-    MaybeError MapAtCreationInternal();
+    // If no errors occur, returns true iff a staging buffer was used to implement the map at
+    // creation. Otherwise, returns false indicating that backend specific mapping was used instead.
+    ResultOrError<bool> MapAtCreationInternal();
 
     uint64_t mAllocatedSize = 0;
 
     ExecutionSerial mLastUsageSerial = ExecutionSerial(0);
 
   private:
+    struct MapAsyncEvent;
+
     virtual MaybeError MapAtCreationImpl() = 0;
     virtual MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) = 0;
+    virtual void* GetMappedPointerImpl() = 0;
     virtual void UnmapImpl() = 0;
 
     virtual bool IsCPUWritableAtCreation() const = 0;
@@ -167,28 +175,50 @@
                                 WGPUMapAsyncStatus* status) const;
     MaybeError ValidateUnmap() const;
     bool CanGetMappedRange(bool writable, size_t offset, size_t size) const;
-    void UnmapInternal(WGPUMapAsyncStatus status, std::string_view message);
+
+    // Unmaps the buffer and returns a MapAsyncEvent that should be set to ready if the buffer had a
+    // pending map event.
+    ResultOrError<Ref<MapAsyncEvent>> UnmapInternal(WGPUMapAsyncStatus status,
+                                                    std::string_view message);
+
+    // Updates internal state to reflect that the buffer is now mapped.
+    void SetMapped(BufferState newState);
 
     const uint64_t mSize = 0;
     const wgpu::BufferUsage mUsage = wgpu::BufferUsage::None;
     const wgpu::BufferUsage mInternalUsage = wgpu::BufferUsage::None;
-    BufferState mState;
     bool mIsDataInitialized = false;
 
-    // mStagingBuffer is used to implement mappedAtCreation for
-    // buffers with non-mappable usage. It is transiently allocated
-    // and released when the mappedAtCreation-buffer is unmapped.
-    // Because `mStagingBuffer` itself is directly mappable, it will
-    // not create another staging buffer.
-    // i.e. buffer->mStagingBuffer->mStagingBuffer... is not possible.
-    Ref<BufferBase> mStagingBuffer;
+    // Mutable state of the buffer w.r.t mapping. These are kept together in an anonymous struct
+    // since they are all loosely guarded by |mBufferState| by update ordering.
+    struct {
+        // Currently, our API relies on the fact that there is a device level lock that synchronizes
+        // everything. For API*MappedRange calls, however, it is more efficient to not acquire the
+        // device-wide lock since we cannot actually protect against racing w.r.t Unmap, i.e. a user
+        // can call an API*MappedRange function, save the pointer, call Unmap, and now the user is
+        // holding an invalid pointer. While a buffer state change is always guarded by the
+        // device-lock, we can implement the necessary validations for the API*MappedRange calls
+        // without acquiring the lock by ensuring that:
+        //   1) For MapAsync, we only set |mBufferState| = Mapped AFTER we update the other members.
+        //   2) For *MappedRange functions, we always check |mBufferState| = Mapped before checking
+        //      other members for validation.
+        // With those assumptions in place, we can guarantee that if *MappedRange is successful,
+        // that MapAsync must have succeeded. We cannot guarantee, however, that Unmap did not race
+        // with *MappedRange, but that is the responsibility of the caller.
+        std::atomic<BufferState> mState = BufferState::Unmapped;
 
-    wgpu::MapMode mMapMode = wgpu::MapMode::None;
-    size_t mMapOffset = 0;
-    size_t mMapSize = 0;
+        // A recursive buffer used to implement mappedAtCreation for buffers with non-mappable
+        // usage. It is transiently allocated and released when the mappedAtCreation-buffer is
+        // unmapped. Because this buffer itself is directly mappable, it will not create another
+        // staging buffer recursively.
+        Ref<BufferBase> mStagingBuffer = nullptr;
 
-    struct MapAsyncEvent;
-    Ref<MapAsyncEvent> mPendingMapEvent;
+        // Mapping specific states.
+        wgpu::MapMode mMapMode = wgpu::MapMode::None;
+        size_t mMapOffset = 0;
+        size_t mMapSize = 0;
+        std::variant<void*, Ref<MapAsyncEvent>> mMapData;
+    };
 };
 
 }  // namespace dawn::native

diff --git a/src/dawn/native/EventManager.cpp b/src/dawn/native/EventManager.cpp
index 4248a2a..50f85f1 100644
--- a/src/dawn/native/EventManager.cpp
+++ b/src/dawn/native/EventManager.cpp

@@ -208,9 +208,11 @@
         }
         if (const auto* queueAndSerial = future.event->GetIfQueueAndSerial()) {
             auto [it, inserted] = queueLowestWaitSerials.insert(
-                {queueAndSerial->queue, queueAndSerial->completionSerial});
+                {queueAndSerial->queue,
+                 queueAndSerial->completionSerial.load(std::memory_order_acquire)});
             if (!inserted) {
-                it->second = std::min(it->second, queueAndSerial->completionSerial);
+                it->second = std::min(
+                    it->second, queueAndSerial->completionSerial.load(std::memory_order_acquire));
             }
         }
     }
@@ -557,6 +559,9 @@
 
 // QueueAndSerial
 
+QueueAndSerial::QueueAndSerial(QueueBase* q, ExecutionSerial serial)
+    : queue(GetWeakRef(q)), completionSerial(serial) {}
+
 ExecutionSerial QueueAndSerial::GetCompletedSerial() const {
     if (auto q = queue.Promote()) {
         return q->GetCompletedCommandSerial();
@@ -578,7 +583,7 @@
                                          QueueBase* queue,
                                          ExecutionSerial completionSerial)
     : mCallbackMode(callbackMode),
-      mCompletionData(QueueAndSerial{GetWeakRef(queue), completionSerial}) {}
+      mCompletionData(std::in_place_type_t<QueueAndSerial>(), queue, completionSerial) {}
 
 EventManager::TrackedEvent::TrackedEvent(wgpu::CallbackMode callbackMode, Completed tag)
     : mCallbackMode(callbackMode), mCompletionData(AcquireRef(new WaitListEvent())) {
@@ -620,8 +625,12 @@
     if (auto event = GetIfWaitListEvent()) {
         event->Signal();
     }
-    if (auto* queueAndSerial = std::get_if<QueueAndSerial>(&mCompletionData)) {
-        queueAndSerial->completionSerial = queueAndSerial->GetCompletedSerial();
+    if (auto* queueAndSerial = GetIfQueueAndSerial()) {
+        ExecutionSerial current = queueAndSerial->completionSerial.load(std::memory_order_acquire);
+        for (auto completed = queueAndSerial->GetCompletedSerial();
+             current > completed && !queueAndSerial->completionSerial.compare_exchange_weak(
+                                        current, completed, std::memory_order_acq_rel);) {
+        }
     }
 }
 

diff --git a/src/dawn/native/EventManager.h b/src/dawn/native/EventManager.h
index 3b79db8..41af2cb 100644
--- a/src/dawn/native/EventManager.h
+++ b/src/dawn/native/EventManager.h

@@ -106,7 +106,9 @@
 
 struct QueueAndSerial {
     WeakRef<QueueBase> queue;
-    ExecutionSerial completionSerial;
+    std::atomic<ExecutionSerial> completionSerial;
+
+    QueueAndSerial(QueueBase* q, ExecutionSerial serial);
 
     // Returns the most recently completed serial on |queue|. Otherwise, returns |completionSerial|.
     ExecutionSerial GetCompletedSerial() const;
@@ -133,6 +135,7 @@
 
     bool IsReadyToComplete() const;
 
+    QueueAndSerial* GetIfQueueAndSerial() { return std::get_if<QueueAndSerial>(&mCompletionData); }
     const QueueAndSerial* GetIfQueueAndSerial() const {
         return std::get_if<QueueAndSerial>(&mCompletionData);
     }

diff --git a/src/dawn/native/d3d11/BufferD3D11.cpp b/src/dawn/native/d3d11/BufferD3D11.cpp
index fca418e..4d0e128 100644
--- a/src/dawn/native/d3d11/BufferD3D11.cpp
+++ b/src/dawn/native/d3d11/BufferD3D11.cpp

@@ -549,7 +549,7 @@
     }
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     // The frontend asks that the pointer returned is from the start of the resource
     // irrespective of the offset passed in MapAsyncImpl, which is what mMappedData is.
     return mMappedData;

diff --git a/src/dawn/native/d3d11/BufferD3D11.h b/src/dawn/native/d3d11/BufferD3D11.h
index a7f04fb..b65594a 100644
--- a/src/dawn/native/d3d11/BufferD3D11.h
+++ b/src/dawn/native/d3d11/BufferD3D11.h

@@ -180,7 +180,7 @@
     void UnmapImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
 
     MaybeError InitializeToZero(const ScopedCommandRecordingContext* commandContext);
 

diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp
index 09ce7a9..9f1646b 100644
--- a/src/dawn/native/d3d12/BufferD3D12.cpp
+++ b/src/dawn/native/d3d12/BufferD3D12.cpp

@@ -520,7 +520,7 @@
     }
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     // The frontend asks that the pointer returned is from the start of the resource
     // irrespective of the offset passed in MapAsyncImpl, which is what mMappedData is.
     return mMappedData;

diff --git a/src/dawn/native/d3d12/BufferD3D12.h b/src/dawn/native/d3d12/BufferD3D12.h
index 4b3c181..072acc6 100644
--- a/src/dawn/native/d3d12/BufferD3D12.h
+++ b/src/dawn/native/d3d12/BufferD3D12.h

@@ -90,7 +90,7 @@
     void DestroyImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
 
     MaybeError MapInternal(bool isWrite, size_t start, size_t end, const char* contextInfo);
 

diff --git a/src/dawn/native/metal/BufferMTL.h b/src/dawn/native/metal/BufferMTL.h
index 39f77d9..5837885 100644
--- a/src/dawn/native/metal/BufferMTL.h
+++ b/src/dawn/native/metal/BufferMTL.h

@@ -69,7 +69,7 @@
     void UnmapImpl() override;
     void DestroyImpl() override;
     void SetLabelImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
 

diff --git a/src/dawn/native/metal/BufferMTL.mm b/src/dawn/native/metal/BufferMTL.mm
index a47c0f3..e5ce0c1 100644
--- a/src/dawn/native/metal/BufferMTL.mm
+++ b/src/dawn/native/metal/BufferMTL.mm

@@ -200,7 +200,7 @@
     return {};
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     return [*mMtlBuffer contents];
 }
 

diff --git a/src/dawn/native/null/DeviceNull.cpp b/src/dawn/native/null/DeviceNull.cpp
index 80d1946..2f69d48 100644
--- a/src/dawn/native/null/DeviceNull.cpp
+++ b/src/dawn/native/null/DeviceNull.cpp

@@ -401,7 +401,7 @@
     return {};
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     return mBackingData.get();
 }
 

diff --git a/src/dawn/native/null/DeviceNull.h b/src/dawn/native/null/DeviceNull.h
index 482b6df..1f94948 100644
--- a/src/dawn/native/null/DeviceNull.h
+++ b/src/dawn/native/null/DeviceNull.h

@@ -270,7 +270,7 @@
     void DestroyImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
 
     std::unique_ptr<uint8_t[]> mBackingData;
 };

diff --git a/src/dawn/native/opengl/BufferGL.cpp b/src/dawn/native/opengl/BufferGL.cpp
index da282bd..2482712 100644
--- a/src/dawn/native/opengl/BufferGL.cpp
+++ b/src/dawn/native/opengl/BufferGL.cpp

@@ -52,7 +52,8 @@
     }
 
     if (descriptor->mappedAtCreation) {
-        DAWN_TRY(buffer->MapAtCreationInternal());
+        [[maybe_unused]] bool usingStagingBuffer;
+        DAWN_TRY_ASSIGN(usingStagingBuffer, buffer->MapAtCreationInternal());
     }
 
     return std::move(buffer);
@@ -230,7 +231,7 @@
     return {};
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     // The mapping offset has already been removed.
     return mMappedData;
 }

diff --git a/src/dawn/native/opengl/BufferGL.h b/src/dawn/native/opengl/BufferGL.h
index 2be1171..8c62342 100644
--- a/src/dawn/native/opengl/BufferGL.h
+++ b/src/dawn/native/opengl/BufferGL.h

@@ -64,7 +64,7 @@
     void DestroyImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
 
     MaybeError InitializeToZero();
 

diff --git a/src/dawn/native/vulkan/BufferVk.cpp b/src/dawn/native/vulkan/BufferVk.cpp
index e5b2a03..f12608d 100644
--- a/src/dawn/native/vulkan/BufferVk.cpp
+++ b/src/dawn/native/vulkan/BufferVk.cpp

@@ -546,7 +546,7 @@
     // No need to do anything, we keep CPU-visible memory mapped at all time.
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     uint8_t* memory = mMemoryAllocation.GetMappedPointer();
     DAWN_ASSERT(memory != nullptr);
     return memory;

diff --git a/src/dawn/native/vulkan/BufferVk.h b/src/dawn/native/vulkan/BufferVk.h
index 35ef515..c7074ce 100644
--- a/src/dawn/native/vulkan/BufferVk.h
+++ b/src/dawn/native/vulkan/BufferVk.h

@@ -91,7 +91,7 @@
     void DestroyImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
     MaybeError UploadData(uint64_t bufferOffset, const void* data, size_t size) override;
 
     VkBuffer mHandle = VK_NULL_HANDLE;

diff --git a/src/dawn/native/webgpu/BufferWGPU.cpp b/src/dawn/native/webgpu/BufferWGPU.cpp
index 127083e..cccfb0c 100644
--- a/src/dawn/native/webgpu/BufferWGPU.cpp
+++ b/src/dawn/native/webgpu/BufferWGPU.cpp

@@ -119,7 +119,7 @@
     return {};
 }
 
-void* Buffer::GetMappedPointer() {
+void* Buffer::GetMappedPointerImpl() {
     // The mapping offset has already been removed.
     return mMappedData;
 }

diff --git a/src/dawn/native/webgpu/BufferWGPU.h b/src/dawn/native/webgpu/BufferWGPU.h
index b171b4b..ab576fb 100644
--- a/src/dawn/native/webgpu/BufferWGPU.h
+++ b/src/dawn/native/webgpu/BufferWGPU.h

@@ -50,7 +50,7 @@
     void DestroyImpl() override;
     bool IsCPUWritableAtCreation() const override;
     MaybeError MapAtCreationImpl() override;
-    void* GetMappedPointer() override;
+    void* GetMappedPointerImpl() override;
 
     WGPUBuffer mInnerBuffer = nullptr;
 

diff --git a/src/dawn/tests/unittests/native/mocks/BufferMock.cpp b/src/dawn/tests/unittests/native/mocks/BufferMock.cpp
index 26a7f91..626792c 100644
--- a/src/dawn/tests/unittests/native/mocks/BufferMock.cpp
+++ b/src/dawn/tests/unittests/native/mocks/BufferMock.cpp

@@ -44,7 +44,7 @@
     mBackingData = std::unique_ptr<uint8_t[]>(new uint8_t[mAllocatedSize]);
 
     ON_CALL(*this, DestroyImpl).WillByDefault([this] { this->BufferBase::DestroyImpl(); });
-    ON_CALL(*this, GetMappedPointer).WillByDefault(Return(mBackingData.get()));
+    ON_CALL(*this, GetMappedPointerImpl).WillByDefault(Return(mBackingData.get()));
     ON_CALL(*this, IsCPUWritableAtCreation).WillByDefault([this] {
         return (GetInternalUsage() & (wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite)) !=
                0;

diff --git a/src/dawn/tests/unittests/native/mocks/BufferMock.h b/src/dawn/tests/unittests/native/mocks/BufferMock.h
index 7c6740d..83c351f 100644
--- a/src/dawn/tests/unittests/native/mocks/BufferMock.h
+++ b/src/dawn/tests/unittests/native/mocks/BufferMock.h

@@ -54,8 +54,8 @@
                 MapAsyncImpl,
                 (wgpu::MapMode mode, size_t offset, size_t size),
                 (override));
+    MOCK_METHOD(void*, GetMappedPointerImpl, (), (override));
     MOCK_METHOD(void, UnmapImpl, (), (override));
-    MOCK_METHOD(void*, GetMappedPointer, (), (override));
 
     MOCK_METHOD(bool, IsCPUWritableAtCreation, (), (const, override));
commit	8c55820034cee5d7146587f35ab04c9456308244	[log] [tgz]
author	Lokbondo Kung <lokokung@google.com>	Mon Jun 23 14:32:58 2025 -0700
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Jun 23 14:32:58 2025 -0700
tree	601eb75d8eee4051d945f67f39eed1d6bb5b5d07
parent	676d919522185dec21d2471fc9a8d3fd32894176 [diff]