Synchronize SharedBufferMemory on the CPU Before Map Operations

Fixes incorrect synchronization of SharedBufferMemory to ensure we wait
on the CPU for all provided fences to complete before mapping on the CPU.
Fixes a flaky test.

Change-Id: Ib3c853317ef71082ae46f0fe23fe7fde74c0eb36
Bug: chromium:407748576
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/235597
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Brandon Jones <bajones@chromium.org>
Commit-Queue: Brandon1 Jones <brandon1.jones@intel.com>
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/dawn/native/d3d12/BufferD3D12.cpp b/src/dawn/native/d3d12/BufferD3D12.cpp
index ab9b5f8..09ce7a9 100644
--- a/src/dawn/native/d3d12/BufferD3D12.cpp
+++ b/src/dawn/native/d3d12/BufferD3D12.cpp
@@ -453,6 +453,8 @@
 }
 
 MaybeError Buffer::MapInternal(bool isWrite, size_t offset, size_t size, const char* contextInfo) {
+    DAWN_TRY(SynchronizeBufferBeforeMapping());
+
     // The mapped buffer can be accessed at any time, so it must be locked to ensure it is never
     // evicted. This buffer should already have been made resident when it was created.
     TRACE_EVENT0(GetDevice()->GetPlatform(), General, "BufferD3D12::MapInternal");
@@ -493,9 +495,6 @@
 }
 
 MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
-    // Externally allocated buffers must be synchronized before any usage.
-    DAWN_TRY(SynchronizeBufferBeforeUse());
-
     // GetPendingCommandContext() call might create a new commandList. Dawn will handle
     // it in Tick() by execute the commandList and signal a fence for it even it is empty.
     // Skip the unnecessary GetPendingCommandContext() call saves an extra fence.
@@ -630,21 +629,38 @@
     return {};
 }
 
-MaybeError Buffer::SynchronizeBufferBeforeUse() {
+MaybeError Buffer::SynchronizeBufferBeforeMapping() {
     // Buffers imported with the SharedBufferMemory feature can include fences that must finish
-    // before Dawn can use the buffer. We acquire and wait for them here.
+    // before Dawn can use the buffer for mapping operations on the CPU timeline. We acquire and
+    // wait for them here.
+    if (SharedResourceMemoryContents* contents = GetSharedResourceMemoryContents()) {
+        SharedBufferMemoryBase::PendingFenceList fences;
+        contents->AcquirePendingFences(&fences);
+        for (const auto& fence : fences) {
+            HANDLE fenceEvent = 0;
+            ComPtr<ID3D12Fence> d3dFence = ToBackend(fence.object)->GetD3DFence();
+            if (d3dFence->GetCompletedValue() < fence.signaledValue) {
+                d3dFence->SetEventOnCompletion(fence.signaledValue, fenceEvent);
+                WaitForSingleObject(fenceEvent, INFINITE);
+            }
+        }
+    }
+
+    return {};
+}
+
+MaybeError Buffer::SynchronizeBufferBeforeUseOnGPU() {
+    // Buffers imported with the SharedBufferMemory feature can include fences that must finish
+    // before Dawn can use the buffer on the GPU timeline. We acquire and wait for them here.
     if (SharedResourceMemoryContents* contents = GetSharedResourceMemoryContents()) {
         Device* device = ToBackend(GetDevice());
         Queue* queue = ToBackend(device->GetQueue());
 
-        std::vector<FenceAndSignalValue> waitFences;
         SharedBufferMemoryBase::PendingFenceList fences;
         contents->AcquirePendingFences(&fences);
-        waitFences.insert(waitFences.end(), std::make_move_iterator(fences.begin()),
-                          std::make_move_iterator(fences.end()));
 
         ID3D12CommandQueue* commandQueue = queue->GetCommandQueue();
-        for (const auto& fence : waitFences) {
+        for (const auto& fence : fences) {
             DAWN_TRY(CheckHRESULT(commandQueue->Wait(ToBackend(fence.object)->GetD3DFence(),
                                                      fence.signaledValue),
                                   "D3D12 fence wait"););
diff --git a/src/dawn/native/d3d12/BufferD3D12.h b/src/dawn/native/d3d12/BufferD3D12.h
index e73840e..4b3c181 100644
--- a/src/dawn/native/d3d12/BufferD3D12.h
+++ b/src/dawn/native/d3d12/BufferD3D12.h
@@ -71,7 +71,8 @@
     MaybeError EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
                                                   const CopyTextureToBufferCmd* copy);
 
-    MaybeError SynchronizeBufferBeforeUse();
+    MaybeError SynchronizeBufferBeforeMapping();
+    MaybeError SynchronizeBufferBeforeUseOnGPU();
 
     // Dawn API
     void SetLabelImpl() override;
diff --git a/src/dawn/native/d3d12/CommandRecordingContext.cpp b/src/dawn/native/d3d12/CommandRecordingContext.cpp
index 2ab8b73..e42d629 100644
--- a/src/dawn/native/d3d12/CommandRecordingContext.cpp
+++ b/src/dawn/native/d3d12/CommandRecordingContext.cpp
@@ -61,7 +61,7 @@
     DAWN_ASSERT(mD3d12CommandList != nullptr);
 
     for (Buffer* buffer : mSharedBuffers) {
-        DAWN_TRY(buffer->SynchronizeBufferBeforeUse());
+        DAWN_TRY(buffer->SynchronizeBufferBeforeUseOnGPU());
     }
 
     for (Texture* texture : mSharedTextures) {
diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp
index 5cace31..6907fe7 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@@ -493,7 +493,7 @@
         ToBackend(GetQueue())->GetPendingCommandContext(QueueBase::SubmitMode::Passive);
 
     Buffer* dstBuffer = ToBackend(destination);
-    DAWN_TRY(dstBuffer->SynchronizeBufferBeforeUse());
+    DAWN_TRY(dstBuffer->SynchronizeBufferBeforeUseOnGPU());
 
     [[maybe_unused]] bool cleared;
     DAWN_TRY_ASSIGN(cleared, dstBuffer->EnsureDataInitializedAsDestination(
diff --git a/src/dawn/tests/white_box/SharedBufferMemoryTests.cpp b/src/dawn/tests/white_box/SharedBufferMemoryTests.cpp
index 542fdff..a84bc57 100644
--- a/src/dawn/tests/white_box/SharedBufferMemoryTests.cpp
+++ b/src/dawn/tests/white_box/SharedBufferMemoryTests.cpp
@@ -357,7 +357,6 @@
     memory.EndAccess(buffer, &endState);
 
     EXPECT_EQ(endState.initialized, true);
-    EXPECT_GE(endState.fenceCount, 1u);
 
     // Pass fences from the previous operation to the next BeginAccessDescriptor to ensure
     // operations are complete.