D3D11: fix SystemEventQueue's incorrect tracking of completed serials
SystemEventQueue currently recycles events in 2 places:
- CheckAndUpdateCompletedSerials().
- WaitForQueueSerial().
This leads to incorrect update of the completed serial in
CheckAndUpdateCompletedSerials().
Considering the following scenario:
- Queue.Submit() is called 2 times.
- SystemEventQueue will store a pending list of event 1 & 2 associated
with serial 1 & 2.
- WaitQueueSerials(serial=1, timeout=inf) is called.
- WaitForQueueSerial() is called.
- This waits until serial=1 is completed.
- Event 1 will be removed from the pending list.
- CheckAndUpdateCompletedSerials() is called.
- This function attempts to check the status of all pending events.
- Event 2 is the only one in the pending list at this point.
- If the event 2 was not signaled yet, the function would return
GetCompletedCommandSerial() which is zero. Because the actual
completed serial=1 has not been notified to the parent Queue class
yet.
This CL fixes the bug by removing the recycling from
WaitForQueueSerial(). Only CheckAndUpdateCompletedSerials() should
recycle the events. Because it's the place supposed to update the
completed serial.
Bug: 415561579
Change-Id: Ifb9fca831a5ccb7bfc165641179966a6bc4a6203
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/240374
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
Auto-Submit: Quyen Le <lehoangquyen@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
Reviewed-by: Loko Kung <lokokung@google.com>
diff --git a/src/dawn/native/d3d11/QueueD3D11.cpp b/src/dawn/native/d3d11/QueueD3D11.cpp
index a097bf4..677fc52 100644
--- a/src/dawn/native/d3d11/QueueD3D11.cpp
+++ b/src/dawn/native/d3d11/QueueD3D11.cpp
@@ -79,6 +79,9 @@
};
// Events associated with submitted commands. They are in old to recent order.
MutexProtected<std::deque<SerialEventReceiverPair>> mPendingEvents;
+
+ // List of completed events to be recycled in CheckAndUpdateCompletedSerials().
+ MutexProtected<std::vector<SerialEventReceiverPair>> mCompletedEvents;
};
ResultOrError<Ref<Queue>> Queue::Create(Device* device, const QueueDescriptor* descriptor) {
@@ -393,6 +396,7 @@
ResultOrError<ExecutionSerial> SystemEventQueue::CheckAndUpdateCompletedSerials() {
ExecutionSerial completedSerial;
std::vector<SystemEventReceiver> returnedReceivers;
+ // Check for completed events in the pending list.
DAWN_TRY_ASSIGN(
completedSerial,
mPendingEvents.Use([&](auto pendingEvents) -> ResultOrError<ExecutionSerial> {
@@ -437,6 +441,16 @@
return completedSerial;
}));
+ // Also check for completed events processed by WaitForQueueSerial()
+ mCompletedEvents.Use([&](auto completedEvents) {
+ returnedReceivers.reserve(returnedReceivers.size() + completedEvents->size());
+ for (auto& event : *completedEvents) {
+ completedSerial = std::max(completedSerial, event.serial);
+ returnedReceivers.emplace_back(std::move(event.receiver));
+ }
+ completedEvents->clear();
+ });
+
DAWN_TRY(CheckAndMapReadyBuffers(completedSerial));
if (!returnedReceivers.empty()) {
@@ -460,8 +474,8 @@
}
bool didComplete = false;
- std::vector<SystemEventReceiver> returnedReceivers;
- DAWN_TRY_ASSIGN(didComplete, mPendingEvents.Use([&](auto pendingEvents) -> ResultOrError<bool> {
+ DAWN_TRY_ASSIGN(didComplete, mPendingEvents.Use([=, &completedEventsList = mCompletedEvents](
+ auto pendingEvents) -> ResultOrError<bool> {
DAWN_ASSERT(!pendingEvents->empty());
DAWN_ASSERT(serial >= pendingEvents->front().serial);
DAWN_ASSERT(serial <= pendingEvents->back().serial);
@@ -481,19 +495,16 @@
}
// Events before |it| should be signalled as well.
- const size_t completedEvents = std::distance(pendingEvents->begin(), it) + 1;
- returnedReceivers.reserve(completedEvents);
- std::for_each_n(pendingEvents->begin(), completedEvents, [&returnedReceivers](auto& e) {
- returnedReceivers.emplace_back(std::move(e.receiver));
+ completedEventsList.Use([&](auto completedEvList) {
+ completedEvList->insert(completedEvList->end(),
+ std::make_move_iterator(pendingEvents->begin()),
+ std::make_move_iterator(it + 1));
});
- pendingEvents->erase(pendingEvents->begin(), pendingEvents->begin() + completedEvents);
+ pendingEvents->erase(pendingEvents->begin(), it + 1);
+
return true;
}));
- if (!returnedReceivers.empty()) {
- DAWN_TRY(ReturnSystemEventReceivers(std::move(returnedReceivers)));
- }
-
return didComplete;
}
diff --git a/src/dawn/tests/end2end/EventTests.cpp b/src/dawn/tests/end2end/EventTests.cpp
index ff9bc1a..2bab721 100644
--- a/src/dawn/tests/end2end/EventTests.cpp
+++ b/src/dawn/tests/end2end/EventTests.cpp
@@ -37,6 +37,7 @@
#include "dawn/common/FutureUtils.h"
#include "dawn/tests/DawnTest.h"
#include "dawn/utils/SystemUtils.h"
+#include "dawn/utils/WGPUHelpers.h"
namespace dawn {
namespace {
@@ -618,6 +619,72 @@
}
}
+// Test that submitting multiple heavy works then waiting one by one works.
+// This is a regression test for crbug.com/dawn/415561579
+TEST_P(WaitAnyTests, WaitHeavyWorksOneByOne) {
+ // Wire doesn't support timeouts.
+ DAWN_TEST_UNSUPPORTED_IF(UsesWire());
+
+ wgpu::Buffer countBuffer;
+ wgpu::Buffer ssbo;
+ {
+ wgpu::BufferDescriptor descriptor;
+ descriptor.size = 4;
+ descriptor.usage = wgpu::BufferUsage::Storage;
+ ssbo = device.CreateBuffer(&descriptor);
+
+ descriptor.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+ countBuffer = device.CreateBuffer(&descriptor);
+ }
+
+ wgpu::ComputePipeline pipeline;
+ {
+ wgpu::ComputePipelineDescriptor csDesc;
+ csDesc.compute.module = utils::CreateShaderModule(device, R"(
+ @group(0) @binding(0) var<uniform> count : u32;
+ @group(0) @binding(1) var<storage, read_write> ssbo : u32;
+
+ @compute @workgroup_size(1) fn main() {
+ for (var i : u32 = 0; i < count; i++) {
+ ssbo += 1u;
+ }
+ })");
+
+ pipeline = device.CreateComputePipeline(&csDesc);
+ }
+
+ wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+ {{0, countBuffer, 0, 4}, {1, ssbo, 0, 4}});
+
+ auto HeavySubmit = [&]() {
+ uint32_t count = 1000000;
+ queue.WriteBuffer(countBuffer, 0, &count, 4);
+
+ auto encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+
+ pass.SetBindGroup(0, bindGroup);
+ pass.SetPipeline(pipeline);
+ pass.DispatchWorkgroups(1);
+
+ pass.End();
+ wgpu::CommandBuffer cb = encoder.Finish();
+ queue.Submit(1, &cb);
+ };
+
+ std::vector<wgpu::Future> futures(5);
+ for (auto& future : futures) {
+ HeavySubmit();
+ future = queue.OnSubmittedWorkDone(wgpu::CallbackMode::WaitAnyOnly,
+ [](wgpu::QueueWorkDoneStatus) {});
+ }
+
+ for (const auto& future : futures) {
+ wgpu::WaitStatus status = instance.WaitAny(future, UINT64_MAX);
+ ASSERT_EQ(status, wgpu::WaitStatus::Success);
+ }
+}
+
DAWN_INSTANTIATE_TEST(WaitAnyTests,
D3D11Backend(),
D3D11Backend({"d3d11_use_unmonitored_fence"}),