Implement buffer MapAsyncF

Expands existing buffer map tests to use futures.

Also enforce callback ordering matching the JS spec which was
necessary to pass existing tests.

Bug: dawn:1987
Fixed: dawn:2066
Change-Id: Iea4bae85fe10bcff88e84a0c73e8d542dd12a0cc
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/159031
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index d969b5e..82641ed 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp
@@ -42,6 +42,8 @@
 #include "dawn/native/Device.h"
 #include "dawn/native/DynamicUploader.h"
 #include "dawn/native/ErrorData.h"
+#include "dawn/native/EventManager.h"
+#include "dawn/native/Instance.h"
 #include "dawn/native/ObjectType_autogen.h"
 #include "dawn/native/PhysicalDevice.h"
 #include "dawn/native/Queue.h"
@@ -122,6 +124,93 @@
 
 }  // anonymous namespace
 
+struct BufferBase::MapAsyncEvent final : public EventManager::TrackedEvent {
+    // MapAsyncEvent stores a raw pointer to the buffer so that it can
+    // update the buffer's map state when it completes.
+    // If the map completes early (error, unmap, destroy), then the buffer
+    // is no longer needed and we store the early status instead.
+    // The raw pointer is safe because the early status is set to destroyed
+    // before the buffer is dropped.
+    // Note: this could be an atomic + spin lock on a sentinel enum if the mutex
+    // cost is high.
+    MutexProtected<std::variant<BufferBase*, wgpu::BufferMapAsyncStatus>> mBufferOrEarlyStatus;
+
+    WGPUBufferMapCallback mCallback;
+    void* mUserdata;
+
+    // Create an event backed by the given queue execution serial.
+    MapAsyncEvent(DeviceBase* device,
+                  BufferBase* buffer,
+                  const BufferMapCallbackInfo& callbackInfo,
+                  ExecutionSerial serial)
+        : TrackedEvent(callbackInfo.mode, device->GetQueue(), serial),
+          mBufferOrEarlyStatus(buffer),
+          mCallback(callbackInfo.callback),
+          mUserdata(callbackInfo.userdata) {
+        TRACE_EVENT_ASYNC_BEGIN0(device->GetPlatform(), General, "Buffer::APIMapAsync",
+                                 uint64_t(serial));
+    }
+
+    // Create an event that's ready at creation (for errors, etc.)
+    MapAsyncEvent(DeviceBase* device,
+                  const BufferMapCallbackInfo& callbackInfo,
+                  wgpu::BufferMapAsyncStatus earlyStatus)
+        : TrackedEvent(callbackInfo.mode, device->GetQueue(), kBeginningOfGPUTime),
+          mBufferOrEarlyStatus(earlyStatus),
+          mCallback(callbackInfo.callback),
+          mUserdata(callbackInfo.userdata) {
+        TRACE_EVENT_ASYNC_BEGIN0(device->GetPlatform(), General, "Buffer::APIMapAsync",
+                                 uint64_t(kBeginningOfGPUTime));
+        CompleteIfSpontaneous();
+    }
+
+    ~MapAsyncEvent() override { EnsureComplete(EventCompletionType::Shutdown); }
+
+    void Complete(EventCompletionType completionType) override {
+        if (const auto* queueAndSerial = std::get_if<QueueAndSerial>(&GetCompletionData())) {
+            TRACE_EVENT_ASYNC_END0(queueAndSerial->queue->GetDevice()->GetPlatform(), General,
+                                   "Buffer::APIMapAsync",
+                                   uint64_t(queueAndSerial->completionSerial));
+        }
+
+        if (completionType == EventCompletionType::Shutdown) {
+            mCallback(ToAPI(wgpu::BufferMapAsyncStatus::Unknown), mUserdata);
+            return;
+        }
+
+        wgpu::BufferMapAsyncStatus status = wgpu::BufferMapAsyncStatus::Success;
+        Ref<MapAsyncEvent> pendingMapEvent;
+
+        // Lock the buffer / early status. This may race with UnmapEarly which occurs
+        // when the buffer is unmapped or destroyed.
+        mBufferOrEarlyStatus.Use([&](auto bufferOrEarlyStatus) {
+            if (auto* earlyStatus =
+                    std::get_if<wgpu::BufferMapAsyncStatus>(&*bufferOrEarlyStatus)) {
+                // Assign the early status, if it was set.
+                status = *earlyStatus;
+            } else if (auto** buffer = std::get_if<BufferBase*>(&*bufferOrEarlyStatus)) {
+                // Set the buffer state to Mapped if this pending map succeeded.
+                // TODO(crbug.com/dawn/831): in order to be thread safe, mutation of the
+                // state and pending map event needs to be atomic w.r.t. UnmapInternal.
+                DAWN_ASSERT((*buffer)->mState == BufferState::PendingMap);
+                (*buffer)->mState = BufferState::Mapped;
+
+                pendingMapEvent = std::move((*buffer)->mPendingMapEvent);
+            }
+        });
+        mCallback(ToAPI(status), mUserdata);
+    }
+
+    // Set the buffer early status because it was unmapped early due to Unmap or Destroy.
+    // This can race with Complete such that the early status is ignored, but this is OK
+    // because we will still unmap the buffer. It will be as-if the application called
+    // Unmap/Destroy just after the map event completed.
+    void UnmapEarly(wgpu::BufferMapAsyncStatus status) {
+        mBufferOrEarlyStatus.Use([&](auto bufferOrEarlyStatus) { *bufferOrEarlyStatus = status; });
+        CompleteIfSpontaneous();
+    }
+};
+
 MaybeError ValidateBufferDescriptor(DeviceBase* device, const BufferDescriptor* descriptor) {
     UnpackedBufferDescriptorChain unpacked;
     DAWN_TRY_ASSIGN(unpacked, ValidateAndUnpackChain(descriptor));
@@ -506,8 +595,49 @@
                                 size_t offset,
                                 size_t size,
                                 const BufferMapCallbackInfo& callbackInfo) {
-    // TODO(dawn:1987) Implement this.
-    DAWN_CHECK(false);
+    // TODO(crbug.com/dawn/2052): Once we always return a future, change this to log to the instance
+    // (note, not raise a validation error to the device) and return the null future.
+    DAWN_ASSERT(callbackInfo.nextInChain == nullptr);
+
+    // Handle the defaulting of size required by WebGPU, even if in webgpu_cpp.h it is not
+    // possible to default the function argument (because there is the callback later in the
+    // argument list)
+    if ((size == wgpu::kWholeMapSize) && (offset <= mSize)) {
+        size = mSize - offset;
+    }
+
+    auto earlyStatus = [&]() -> std::optional<wgpu::BufferMapAsyncStatus> {
+        if (mState == BufferState::PendingMap) {
+            return wgpu::BufferMapAsyncStatus::MappingAlreadyPending;
+        }
+        WGPUBufferMapAsyncStatus status;
+        if (GetDevice()->ConsumedError(ValidateMapAsync(mode, offset, size, &status),
+                                       "calling %s.MapAsync(%s, %u, %u, ...).", this, mode, offset,
+                                       size)) {
+            return static_cast<wgpu::BufferMapAsyncStatus>(status);
+        }
+        if (GetDevice()->ConsumedError(MapAsyncImpl(mode, offset, size))) {
+            return wgpu::BufferMapAsyncStatus::DeviceLost;
+        }
+        return std::nullopt;
+    }();
+
+    Ref<EventManager::TrackedEvent> event;
+    if (earlyStatus) {
+        event = AcquireRef(new MapAsyncEvent(GetDevice(), callbackInfo, *earlyStatus));
+    } else {
+        mMapMode = mode;
+        mMapOffset = offset;
+        mMapSize = size;
+        mState = BufferState::PendingMap;
+        mPendingMapEvent =
+            AcquireRef(new MapAsyncEvent(GetDevice(), this, callbackInfo, mLastUsageSerial));
+        event = mPendingMapEvent;
+    }
+
+    FutureID futureID =
+        GetInstance()->GetEventManager()->TrackEvent(callbackInfo.mode, std::move(event));
+    return {futureID};
 }
 
 void* BufferBase::APIGetMappedRange(size_t offset, size_t size) {
@@ -576,8 +706,15 @@
 void BufferBase::UnmapInternal(WGPUBufferMapAsyncStatus callbackStatus) {
     // Unmaps resources on the backend.
     if (mState == BufferState::PendingMap) {
-        GetDevice()->GetCallbackTaskManager()->AddCallbackTask(
-            PrepareMappingCallback(mLastMapID, callbackStatus));
+        // TODO(crbug.com/dawn/831): in order to be thread safe, mutation of the
+        // state and pending map event needs to be atomic w.r.t. MapAsyncEvent::Complete.
+        Ref<MapAsyncEvent> pendingMapEvent = std::move(mPendingMapEvent);
+        if (pendingMapEvent != nullptr) {
+            pendingMapEvent->UnmapEarly(static_cast<wgpu::BufferMapAsyncStatus>(callbackStatus));
+        } else {
+            GetDevice()->GetCallbackTaskManager()->AddCallbackTask(
+                PrepareMappingCallback(mLastMapID, callbackStatus));
+        }
         UnmapImpl();
     } else if (mState == BufferState::Mapped) {
         UnmapImpl();
diff --git a/src/dawn/native/Buffer.h b/src/dawn/native/Buffer.h
index 66aecd7..fb530c7 100644
--- a/src/dawn/native/Buffer.h
+++ b/src/dawn/native/Buffer.h
@@ -172,6 +172,9 @@
     wgpu::MapMode mMapMode = wgpu::MapMode::None;
     size_t mMapOffset = 0;
     size_t mMapSize = 0;
+
+    struct MapAsyncEvent;
+    Ref<MapAsyncEvent> mPendingMapEvent;
 };
 
 }  // namespace dawn::native
diff --git a/src/dawn/native/EventManager.cpp b/src/dawn/native/EventManager.cpp
index 26fd124..35be9ed 100644
--- a/src/dawn/native/EventManager.cpp
+++ b/src/dawn/native/EventManager.cpp
@@ -229,6 +229,31 @@
     return wgpu::WaitStatus::Success;
 }
 
+// Reorder callbacks to enforce callback ordering required by the spec.
+// Returns an iterator just past the last ready callback.
+auto PrepareReadyCallbacks(std::vector<TrackedFutureWaitInfo>& futures) {
+    // Partition the futures so the following sort looks at fewer elements.
+    auto endOfReady =
+        std::partition(futures.begin(), futures.end(),
+                       [](const TrackedFutureWaitInfo& future) { return future.ready; });
+
+    // Enforce the following rules from https://gpuweb.github.io/gpuweb/#promise-ordering:
+    // 1. For some GPUQueue q, if p1 = q.onSubmittedWorkDone() is called before
+    //    p2 = q.onSubmittedWorkDone(), then p1 must settle before p2.
+    // 2. For some GPUQueue q and GPUBuffer b on the same GPUDevice,
+    //    if p1 = b.mapAsync() is called before p2 = q.onSubmittedWorkDone(),
+    //    then p1 must settle before p2.
+    //
+    // To satisfy the rules, we need only put lower future ids before higher future
+    // ids. Lower future ids were created first.
+    std::sort(futures.begin(), endOfReady,
+              [](const TrackedFutureWaitInfo& a, const TrackedFutureWaitInfo& b) {
+                  return a.futureID < b.futureID;
+              });
+
+    return endOfReady;
+}
+
 }  // namespace
 
 // EventManager
@@ -297,21 +322,20 @@
         DAWN_ASSERT(waitStatus == wgpu::WaitStatus::Success);
     }
 
+    // Enforce callback ordering.
+    auto readyEnd = PrepareReadyCallbacks(futures);
+
     // For all the futures we are about to complete, first ensure they're untracked. It's OK if
     // something actually isn't tracked anymore (because it completed elsewhere while waiting.)
     mEvents->Use([&](auto events) {
-        for (TrackedFutureWaitInfo& future : futures) {
-            if (future.ready) {
-                events->erase(future.futureID);
-            }
+        for (auto it = futures.begin(); it != readyEnd; ++it) {
+            events->erase(it->futureID);
         }
     });
 
     // Finally, call callbacks.
-    for (TrackedFutureWaitInfo& future : futures) {
-        if (future.ready) {
-            future.event->EnsureComplete(EventCompletionType::Ready);
-        }
+    for (auto it = futures.begin(); it != readyEnd; ++it) {
+        it->event->EnsureComplete(EventCompletionType::Ready);
     }
 }
 
@@ -375,24 +399,22 @@
         return waitStatus;
     }
 
+    // Enforce callback ordering
+    auto readyEnd = PrepareReadyCallbacks(futures);
+
     // For any futures that we're about to complete, first ensure they're untracked. It's OK if
     // something actually isn't tracked anymore (because it completed elsewhere while waiting.)
     mEvents->Use([&](auto events) {
-        for (const TrackedFutureWaitInfo& future : futures) {
-            if (future.ready) {
-                events->erase(future.futureID);
-            }
+        for (auto it = futures.begin(); it != readyEnd; ++it) {
+            events->erase(it->futureID);
         }
     });
 
     // Finally, call callbacks and update return values.
-    for (TrackedFutureWaitInfo& future : futures) {
-        if (future.ready) {
-            // Set completed before calling the callback.
-            infos[future.indexInInfos].completed = true;
-            // TODO(crbug.com/dawn/2066): Guarantee the event ordering from the JS spec.
-            future.event->EnsureComplete(EventCompletionType::Ready);
-        }
+    for (auto it = futures.begin(); it != readyEnd; ++it) {
+        // Set completed before calling the callback.
+        infos[it->indexInInfos].completed = true;
+        it->event->EnsureComplete(EventCompletionType::Ready);
     }
 
     return wgpu::WaitStatus::Success;
diff --git a/src/dawn/native/ExecutionQueue.cpp b/src/dawn/native/ExecutionQueue.cpp
index 87de382..7ff5d80 100644
--- a/src/dawn/native/ExecutionQueue.cpp
+++ b/src/dawn/native/ExecutionQueue.cpp
@@ -74,11 +74,6 @@
            HasPendingCommands();
 }
 
-Ref<SystemEvent> ExecutionQueueBase::CreateWorkDoneSystemEvent(ExecutionSerial serial) {
-    // TODO(crbug.com/dawn/2058): Implement this in all backends and remove this default impl
-    DAWN_CHECK(false);
-}
-
 ResultOrError<bool> ExecutionQueueBase::WaitForQueueSerial(ExecutionSerial serial,
                                                            Nanoseconds timeout) {
     // TODO(crbug.com/dawn/2058): Implement this in all backends and remove this default impl
diff --git a/src/dawn/native/ExecutionQueue.h b/src/dawn/native/ExecutionQueue.h
index d6569b0..7485fd9 100644
--- a/src/dawn/native/ExecutionQueue.h
+++ b/src/dawn/native/ExecutionQueue.h
@@ -74,8 +74,6 @@
     // resources.
     virtual MaybeError WaitForIdleForDestruction() = 0;
 
-    // Get or create an event that will be complete after the ExecutionSerial passes.
-    virtual Ref<SystemEvent> CreateWorkDoneSystemEvent(ExecutionSerial serial);
     // Wait at most `timeout` synchronously for the ExecutionSerial to pass. Returns true
     // if the serial passed.
     virtual ResultOrError<bool> WaitForQueueSerial(ExecutionSerial serial, Nanoseconds timeout);
diff --git a/src/dawn/native/metal/QueueMTL.h b/src/dawn/native/metal/QueueMTL.h
index 63cd463..3948e7a 100644
--- a/src/dawn/native/metal/QueueMTL.h
+++ b/src/dawn/native/metal/QueueMTL.h
@@ -52,7 +52,7 @@
     void WaitForCommandsToBeScheduled();
     void ExportLastSignaledEvent(ExternalImageMTLSharedEventDescriptor* desc);
 
-    Ref<SystemEvent> CreateWorkDoneSystemEvent(ExecutionSerial serial) override;
+    Ref<SystemEvent> CreateWorkDoneSystemEvent(ExecutionSerial serial);
     ResultOrError<bool> WaitForQueueSerial(ExecutionSerial serial, Nanoseconds timeout) override;
 
   private:
diff --git a/src/dawn/tests/end2end/BufferTests.cpp b/src/dawn/tests/end2end/BufferTests.cpp
index 600b8d7..68cee82 100644
--- a/src/dawn/tests/end2end/BufferTests.cpp
+++ b/src/dawn/tests/end2end/BufferTests.cpp
@@ -25,6 +25,7 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <algorithm>
 #include <array>
 #include <cstring>
 #include <limits>
@@ -39,23 +40,69 @@
 namespace dawn {
 namespace {
 
-class BufferMappingTests : public DawnTest {
+using FutureCallbackMode = std::optional<wgpu::CallbackMode>;
+DAWN_TEST_PARAM_STRUCT(BufferMappingTestParams, FutureCallbackMode);
+
+class BufferMappingTests : public DawnTestWithParams<BufferMappingTestParams> {
   protected:
+    void SetUp() override {
+        DawnTestWithParams<BufferMappingTestParams>::SetUp();
+        // Wire only supports polling / spontaneous futures.
+        DAWN_TEST_UNSUPPORTED_IF(UsesWire() && GetParam().mFutureCallbackMode &&
+                                 *GetParam().mFutureCallbackMode ==
+                                     wgpu::CallbackMode::WaitAnyOnly);
+    }
+
     void MapAsyncAndWait(const wgpu::Buffer& buffer,
                          wgpu::MapMode mode,
                          size_t offset,
-                         size_t size) {
-        bool done = false;
-        buffer.MapAsync(
-            mode, offset, size,
-            [](WGPUBufferMapAsyncStatus status, void* userdata) {
-                ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-                *static_cast<bool*>(userdata) = true;
-            },
-            &done);
+                         size_t size,
+                         wgpu::BufferMapCallback cb = nullptr,
+                         void* ud = nullptr) {
+        struct Userdata {
+            wgpu::BufferMapCallback cb;
+            void* ud;
+            bool done = false;
+        };
+        Userdata userdata = Userdata{cb, ud};
+        auto callback = [](WGPUBufferMapAsyncStatus status, void* rawUserdata) {
+            auto* userdata = static_cast<Userdata*>(rawUserdata);
+            userdata->done = true;
+            ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+            auto cb = userdata->cb;
+            auto ud = userdata->ud;
+            if (cb) {
+                cb(status, ud);
+            }
+        };
 
-        while (!done) {
-            WaitABit();
+        // Legacy MapAsync
+        if (!GetParam().mFutureCallbackMode) {
+            buffer.MapAsync(mode, offset, size, callback, &userdata);
+            while (!userdata.done) {
+                // Flush wire and call instance process events.
+                WaitABit();
+            }
+            return;
+        }
+
+        wgpu::Future future = buffer.MapAsyncF(
+            mode, offset, size, {nullptr, *GetParam().mFutureCallbackMode, callback, &userdata});
+        switch (*GetParam().mFutureCallbackMode) {
+            case wgpu::CallbackMode::WaitAnyOnly: {
+                wgpu::FutureWaitInfo waitInfo = {future};
+                GetInstance().WaitAny(1, &waitInfo, UINT64_MAX);
+                ASSERT_TRUE(waitInfo.completed);
+                ASSERT_TRUE(userdata.done);
+                break;
+            }
+            case wgpu::CallbackMode::AllowProcessEvents:
+            case wgpu::CallbackMode::AllowSpontaneous:
+                while (!userdata.done) {
+                    // Flush wire and call instance process events.
+                    WaitABit();
+                }
+                break;
         }
     }
 
@@ -195,34 +242,24 @@
     queue.WriteBuffer(buffer, 0, &myData, kSize);
 
     struct UserData {
-        bool done;
         wgpu::Buffer buffer;
         void* expected;
     };
-    UserData user{false, buffer, &myData};
+    UserData user{buffer, &myData};
 
-    buffer.MapAsync(
-        wgpu::MapMode::Read, 0, kBufferSize,
+    MapAsyncAndWait(
+        buffer, wgpu::MapMode::Read, 0, kBufferSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
             UserData* user = static_cast<UserData*>(userdata);
+            CheckMapping(user->buffer.GetConstMappedRange(), user->expected, kSize);
+            CheckMapping(user->buffer.GetConstMappedRange(0, kSize), user->expected, kSize);
 
-            EXPECT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-            if (status == WGPUBufferMapAsyncStatus_Success) {
-                CheckMapping(user->buffer.GetConstMappedRange(), user->expected, kSize);
-                CheckMapping(user->buffer.GetConstMappedRange(0, kSize), user->expected, kSize);
+            CheckMapping(user->buffer.GetConstMappedRange(8, 4),
+                         static_cast<const uint32_t*>(user->expected) + 2, sizeof(uint32_t));
 
-                CheckMapping(user->buffer.GetConstMappedRange(8, 4),
-                             static_cast<const uint32_t*>(user->expected) + 2, sizeof(uint32_t));
-
-                user->buffer.Unmap();
-            }
-            user->done = true;
+            user->buffer.Unmap();
         },
         &user);
-
-    while (!user.done) {
-        WaitABit();
-    }
 }
 
 // Test that the simplest map write works.
@@ -391,25 +428,71 @@
     std::array<wgpu::Buffer, kBuffers> buffers;
     uint32_t mapCompletedCount = 0;
 
-    // Create buffers and request mapping them.
+    // Create buffers.
     wgpu::BufferDescriptor descriptor;
     descriptor.size = static_cast<uint32_t>(kDataSize * sizeof(uint32_t));
     descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
     for (uint32_t i = 0; i < kBuffers; ++i) {
         buffers[i] = device.CreateBuffer(&descriptor);
-
-        buffers[i].MapAsync(
-            wgpu::MapMode::Write, 0, descriptor.size,
-            [](WGPUBufferMapAsyncStatus status, void* userdata) {
-                ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-                (*static_cast<uint32_t*>(userdata))++;
-            },
-            &mapCompletedCount);
     }
 
-    // Wait for all mappings to complete
-    while (mapCompletedCount != kBuffers) {
-        WaitABit();
+    // Legacy MapAsync
+    if (!GetParam().mFutureCallbackMode) {
+        // Map all the buffers.
+        for (uint32_t i = 0; i < kBuffers; ++i) {
+            buffers[i].MapAsync(
+                wgpu::MapMode::Write, 0, descriptor.size,
+                [](WGPUBufferMapAsyncStatus status, void* userdata) {
+                    ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+                    (*static_cast<uint32_t*>(userdata))++;
+                },
+                &mapCompletedCount);
+        }
+
+        // Wait for all mappings to complete
+        while (mapCompletedCount != kBuffers) {
+            WaitABit();
+        }
+    } else {
+        std::array<wgpu::Future, kBuffers> futures;
+        for (uint32_t i = 0; i < kBuffers; ++i) {
+            futures[i] =
+                buffers[i].MapAsyncF(wgpu::MapMode::Write, 0, descriptor.size,
+                                     {nullptr, *GetParam().mFutureCallbackMode,
+                                      [](WGPUBufferMapAsyncStatus status, void* userdata) {
+                                          ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+                                          (*static_cast<uint32_t*>(userdata))++;
+                                      },
+                                      &mapCompletedCount});
+        }
+
+        switch (*GetParam().mFutureCallbackMode) {
+            case wgpu::CallbackMode::WaitAnyOnly: {
+                std::array<wgpu::FutureWaitInfo, kBuffers> waitInfos;
+                for (uint32_t i = 0; i < kBuffers; ++i) {
+                    waitInfos[i] = {futures[i]};
+                }
+                size_t count = waitInfos.size();
+                wgpu::InstanceFeatures instanceFeatures;
+                wgpu::GetInstanceFeatures(&instanceFeatures);
+                do {
+                    size_t waitCount = std::min(count, instanceFeatures.timedWaitAnyMaxCount);
+                    auto waitInfoStart = waitInfos.begin() + (count - waitCount);
+                    GetInstance().WaitAny(waitCount, &*waitInfoStart, UINT64_MAX);
+                    auto it = std::partition(waitInfoStart, waitInfoStart + waitCount,
+                                             [](const auto& info) { return !info.completed; });
+                    count = std::distance(waitInfos.begin(), it);
+                } while (count > 0);
+                break;
+            }
+            case wgpu::CallbackMode::AllowProcessEvents:
+            case wgpu::CallbackMode::AllowSpontaneous:
+                // Wait for all mappings to complete
+                while (mapCompletedCount != kBuffers) {
+                    WaitABit();
+                }
+                break;
+        }
     }
 
     // All buffers are mapped, write into them and unmap them all.
@@ -430,31 +513,66 @@
     wgpu::Buffer buffer = CreateMapReadBuffer(sizeof(data));
     queue.WriteBuffer(buffer, 0, data, sizeof(data));
 
-    // Map the buffer but do not wait on the result yet.
     bool done1 = false;
     bool done2 = false;
-    buffer.MapAsync(
-        wgpu::MapMode::Read, 8, 4,
-        [](WGPUBufferMapAsyncStatus status, void* userdata) {
-            ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-            *static_cast<bool*>(userdata) = true;
-        },
-        &done1);
-
-    // Call MapAsync another time, the callback will be rejected with error status
+    auto cb1 = [](WGPUBufferMapAsyncStatus status, void* userdata) {
+        ASSERT_EQ(WGPUBufferMapAsyncStatus_Success, status);
+        *static_cast<bool*>(userdata) = true;
+    };
+    // Calling MapAsync another time, will reject the callback with error status
     // (but doesn't produce a validation error) and mMapOffset is not updated
     // because the buffer is already being mapped and it doesn't allow multiple
     // MapAsync requests.
-    buffer.MapAsync(
-        wgpu::MapMode::Read, 0, 4,
-        [](WGPUBufferMapAsyncStatus status, void* userdata) {
-            ASSERT_EQ(WGPUBufferMapAsyncStatus_MappingAlreadyPending, status);
-            *static_cast<bool*>(userdata) = true;
-        },
-        &done2);
+    auto cb2 = [](WGPUBufferMapAsyncStatus status, void* userdata) {
+        ASSERT_EQ(WGPUBufferMapAsyncStatus_MappingAlreadyPending, status);
+        *static_cast<bool*>(userdata) = true;
+    };
 
-    while (!done1 || !done2) {
-        WaitABit();
+    // Legacy MapAsync
+    if (!GetParam().mFutureCallbackMode) {
+        // Map the buffer but do not wait on the result yet.
+        buffer.MapAsync(wgpu::MapMode::Read, 8, 4, cb1, &done1);
+
+        // Call MapAsync another time, the callback will be rejected with error status
+        // (but doesn't produce a validation error) and mMapOffset is not updated
+        // because the buffer is already being mapped and it doesn't allow multiple
+        // MapAsync requests.
+        buffer.MapAsync(wgpu::MapMode::Read, 0, 4, cb2, &done2);
+
+        while (!done1 || !done2) {
+            WaitABit();
+        }
+    } else {
+        // Map the buffer but do not wait on the result yet.
+        wgpu::Future f1 = buffer.MapAsyncF(wgpu::MapMode::Read, 8, 4,
+                                           {nullptr, *GetParam().mFutureCallbackMode, cb1, &done1});
+
+        // Call MapAsync another time, the callback will be rejected with error status
+        // (but doesn't produce a validation error) and mMapOffset is not updated
+        // because the buffer is already being mapped and it doesn't allow multiple
+        // MapAsync requests.
+        wgpu::Future f2 = buffer.MapAsyncF(wgpu::MapMode::Read, 0, 4,
+                                           {nullptr, *GetParam().mFutureCallbackMode, cb2, &done2});
+
+        switch (*GetParam().mFutureCallbackMode) {
+            case wgpu::CallbackMode::WaitAnyOnly: {
+                wgpu::FutureWaitInfo waitInfo[] = {{f1}, {f2}};
+                GetInstance().WaitAny(2, waitInfo, UINT64_MAX);
+
+                if (!waitInfo[0].completed) {
+                    GetInstance().WaitAny(1, &waitInfo[0], UINT64_MAX);
+                } else if (!waitInfo[1].completed) {
+                    GetInstance().WaitAny(1, &waitInfo[1], UINT64_MAX);
+                }
+                break;
+            }
+            case wgpu::CallbackMode::AllowProcessEvents:
+            case wgpu::CallbackMode::AllowSpontaneous:
+                while (!done1 || !done2) {
+                    WaitABit();
+                }
+                break;
+        }
     }
 
     // mMapOffset has not been updated so it should still be 4, which is data[1]
@@ -468,36 +586,21 @@
     static constexpr uint32_t myData = 2934875;
     static constexpr size_t kSize = sizeof(myData);
 
-    struct UserData {
-        bool done;
-        wgpu::Buffer buffer;
-    };
-    UserData user{false, buffer};
-
-    buffer.MapAsync(
-        wgpu::MapMode::Write, 0, kSize,
+    MapAsyncAndWait(
+        buffer, wgpu::MapMode::Write, 0, kSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
-            UserData* user = static_cast<UserData*>(userdata);
+            wgpu::Buffer* buffer = static_cast<wgpu::Buffer*>(userdata);
 
-            EXPECT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-            if (status == WGPUBufferMapAsyncStatus_Success) {
-                EXPECT_NE(nullptr, user->buffer.GetConstMappedRange());
-                void* ptr = user->buffer.GetMappedRange();
-                EXPECT_NE(nullptr, ptr);
-                if (ptr != nullptr) {
-                    uint32_t data = myData;
-                    memcpy(ptr, &data, kSize);
-                }
-
-                user->buffer.Unmap();
+            EXPECT_NE(nullptr, buffer->GetConstMappedRange());
+            void* ptr = buffer->GetMappedRange();
+            EXPECT_NE(nullptr, ptr);
+            if (ptr != nullptr) {
+                uint32_t data = myData;
+                memcpy(ptr, &data, kSize);
             }
-            user->done = true;
+            buffer->Unmap();
         },
-        &user);
-
-    while (!user.done) {
-        WaitABit();
-    }
+        &buffer);
 
     EXPECT_BUFFER_U32_EQ(myData, buffer, 0);
 }
@@ -509,36 +612,21 @@
     static constexpr uint32_t myData = 2934875;
     static constexpr size_t kSize = sizeof(myData);
 
-    struct UserData {
-        bool done;
-        wgpu::Buffer buffer;
-    };
-    UserData user{false, buffer};
-
-    buffer.MapAsync(
-        wgpu::MapMode::Write, 0, kSize,
+    MapAsyncAndWait(
+        buffer, wgpu::MapMode::Write, 0, kSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
-            UserData* user = static_cast<UserData*>(userdata);
+            wgpu::Buffer* buffer = static_cast<wgpu::Buffer*>(userdata);
 
-            EXPECT_EQ(WGPUBufferMapAsyncStatus_Success, status);
-            if (status == WGPUBufferMapAsyncStatus_Success) {
-                EXPECT_NE(nullptr, user->buffer.GetConstMappedRange(0, kSize));
-                void* ptr = user->buffer.GetMappedRange(0, kSize);
-                EXPECT_NE(nullptr, ptr);
-                if (ptr != nullptr) {
-                    uint32_t data = myData;
-                    memcpy(ptr, &data, kSize);
-                }
-
-                user->buffer.Unmap();
+            EXPECT_NE(nullptr, buffer->GetConstMappedRange(0, kSize));
+            void* ptr = buffer->GetMappedRange(0, kSize);
+            EXPECT_NE(nullptr, ptr);
+            if (ptr != nullptr) {
+                uint32_t data = myData;
+                memcpy(ptr, &data, kSize);
             }
-            user->done = true;
+            buffer->Unmap();
         },
-        &user);
-
-    while (!user.done) {
-        WaitABit();
-    }
+        &buffer);
 
     EXPECT_BUFFER_U32_EQ(myData, buffer, 0);
 }
@@ -572,16 +660,51 @@
     device.Tick();
 }
 
-DAWN_INSTANTIATE_TEST(BufferMappingTests,
-                      D3D11Backend(),
-                      D3D12Backend(),
-                      MetalBackend(),
-                      OpenGLBackend(),
-                      OpenGLESBackend(),
-                      VulkanBackend());
+DAWN_INSTANTIATE_PREFIXED_TEST_P(Legacy,
+                                 BufferMappingTests,
+                                 {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
+                                  OpenGLESBackend(), VulkanBackend()},
+                                 {std::nullopt});
+
+DAWN_INSTANTIATE_PREFIXED_TEST_P(Future,
+                                 BufferMappingTests,
+                                 {MetalBackend()},
+                                 std::initializer_list<std::optional<wgpu::CallbackMode>>{
+                                     wgpu::CallbackMode::WaitAnyOnly,
+                                     wgpu::CallbackMode::AllowProcessEvents,
+                                     wgpu::CallbackMode::AllowSpontaneous});
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BufferMappingTests);
 
 class BufferMappingCallbackTests : public BufferMappingTests {
   protected:
+    wgpu::Future DoMapAsync(wgpu::Buffer& buffer,
+                            wgpu::MapMode mapMode,
+                            size_t offset,
+                            size_t size,
+                            wgpu::BufferMapCallback callback,
+                            void* userdata) {
+        if (!GetParam().mFutureCallbackMode) {
+            buffer.MapAsync(mapMode, offset, size, callback, userdata);
+            return {0};
+        } else {
+            return buffer.MapAsyncF(mapMode, offset, size,
+                                    {nullptr, *GetParam().mFutureCallbackMode, callback, userdata});
+        }
+    }
+
+    wgpu::Future DoOnSubmittedWorkDone(wgpu::Queue& queueObj,
+                                       wgpu::QueueWorkDoneCallback callback,
+                                       void* userdata) {
+        if (!GetParam().mFutureCallbackMode) {
+            queueObj.OnSubmittedWorkDone(callback, userdata);
+            return {0};
+        } else {
+            return queueObj.OnSubmittedWorkDoneF(
+                {nullptr, *GetParam().mFutureCallbackMode, callback, userdata});
+        }
+    }
+
     void SubmitCommandBuffer(wgpu::Buffer buffer) {
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
@@ -609,10 +732,26 @@
         queue.Submit(1, &commandBuffer);
     }
 
-    void Wait(std::vector<bool>& done) {
-        do {
-            WaitABit();
-        } while (std::any_of(done.begin(), done.end(), [](bool done) { return !done; }));
+    void WaitAll(std::vector<bool>& done, std::vector<wgpu::Future> futures) {
+        if (GetParam().mFutureCallbackMode &&
+            *GetParam().mFutureCallbackMode == wgpu::CallbackMode::WaitAnyOnly) {
+            std::vector<wgpu::FutureWaitInfo> waitInfos;
+            waitInfos.reserve(futures.size());
+            for (wgpu::Future f : futures) {
+                waitInfos.push_back({f});
+            }
+            size_t count = waitInfos.size();
+            do {
+                GetInstance().WaitAny(count, waitInfos.data(), UINT64_MAX);
+                auto it = std::partition(waitInfos.begin(), waitInfos.end(),
+                                         [](const auto& info) { return !info.completed; });
+                count = std::distance(waitInfos.begin(), it);
+            } while (count > 0);
+        } else {
+            do {
+                WaitABit();
+            } while (std::any_of(done.begin(), done.end(), [](bool done) { return !done; }));
+        }
     }
 };
 
@@ -625,7 +764,8 @@
 
     // 1. submission without using buffer.
     SubmitCommandBuffer({});
-    queue.OnSubmittedWorkDone(
+    wgpu::Future f1 = DoOnSubmittedWorkDone(
+        queue,
         [](WGPUQueueWorkDoneStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -637,8 +777,8 @@
         &done);
 
     // 2.
-    buffer.MapAsync(
-        wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
+    wgpu::Future f2 = DoMapAsync(
+        buffer, wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -649,7 +789,7 @@
         },
         &done);
 
-    Wait(done);
+    WaitAll(done, {f1, f2});
 }
 
 TEST_P(BufferMappingCallbackTests, UseTheBufferAndThenMap) {
@@ -661,7 +801,8 @@
 
     // 1. Submit a command buffer which uses the buffer
     SubmitCommandBuffer(buffer);
-    queue.OnSubmittedWorkDone(
+    wgpu::Future f1 = DoOnSubmittedWorkDone(
+        queue,
         [](WGPUQueueWorkDoneStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -673,8 +814,8 @@
         &done);
 
     // 2.
-    buffer.MapAsync(
-        wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
+    wgpu::Future f2 = DoMapAsync(
+        buffer, wgpu::MapMode::Write, 0, wgpu::kWholeMapSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -685,7 +826,7 @@
         },
         &done);
 
-    Wait(done);
+    WaitAll(done, {f1, f2});
 
     buffer.Unmap();
 }
@@ -699,7 +840,8 @@
 
     // 1. submission without using buffer.
     SubmitCommandBuffer({});
-    queue.OnSubmittedWorkDone(
+    wgpu::Future f1 = DoOnSubmittedWorkDone(
+        queue,
         [](WGPUQueueWorkDoneStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUQueueWorkDoneStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -714,8 +856,8 @@
     queue.WriteBuffer(buffer, 0, &data, sizeof(data));
 
     // 2.
-    buffer.MapAsync(
-        wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
+    wgpu::Future f2 = DoMapAsync(
+        buffer, wgpu::MapMode::Read, 0, wgpu::kWholeMapSize,
         [](WGPUBufferMapAsyncStatus status, void* userdata) {
             EXPECT_EQ(status, WGPUBufferMapAsyncStatus_Success);
             auto& done = *static_cast<std::vector<bool>*>(userdata);
@@ -726,16 +868,25 @@
         },
         &done);
 
-    Wait(done);
+    WaitAll(done, {f1, f2});
 
     buffer.Unmap();
 }
 
-DAWN_INSTANTIATE_TEST(BufferMappingCallbackTests,
-                      D3D11Backend(),
-                      D3D12Backend(),
-                      MetalBackend(),
-                      VulkanBackend());
+DAWN_INSTANTIATE_PREFIXED_TEST_P(Legacy,
+                                 BufferMappingCallbackTests,
+                                 {D3D11Backend(), D3D12Backend(), MetalBackend(), VulkanBackend()},
+                                 {std::nullopt});
+
+DAWN_INSTANTIATE_PREFIXED_TEST_P(Future,
+                                 BufferMappingCallbackTests,
+                                 {MetalBackend()},
+                                 std::initializer_list<std::optional<wgpu::CallbackMode>>{
+                                     wgpu::CallbackMode::WaitAnyOnly,
+                                     wgpu::CallbackMode::AllowProcessEvents,
+                                     wgpu::CallbackMode::AllowSpontaneous});
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BufferMappingCallbackTests);
 
 class BufferMappedAtCreationTests : public DawnTest {
   protected:
@@ -1492,13 +1643,10 @@
     ssbo.Unmap();
 }
 
-DAWN_INSTANTIATE_TEST(BufferMapExtendedUsagesTests,
-                      D3D11Backend(),
-                      D3D12Backend(),
-                      MetalBackend(),
-                      OpenGLBackend(),
-                      OpenGLESBackend(),
-                      VulkanBackend());
+DAWN_INSTANTIATE_TEST_P(BufferMapExtendedUsagesTests,
+                        {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
+                         OpenGLESBackend(), VulkanBackend()},
+                        {std::nullopt});
 
 }  // anonymous namespace
 }  // namespace dawn