Fix TSAN errors in WaitAny Bug: 345541614 Change-Id: I10af9e8fc307be2c12dbab35a8816463105c90b7 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/192721 Commit-Queue: Austin Eng <enga@chromium.org> Reviewed-by: Loko Kung <lokokung@google.com> Reviewed-by: Quyen Le <lehoangquyen@chromium.org>
diff --git a/src/dawn/native/ExecutionQueue.cpp b/src/dawn/native/ExecutionQueue.cpp index adfeed6..75e9822 100644 --- a/src/dawn/native/ExecutionQueue.cpp +++ b/src/dawn/native/ExecutionQueue.cpp
@@ -27,6 +27,8 @@ #include "dawn/native/ExecutionQueue.h" +#include <atomic> + namespace dawn::native { ExecutionSerial ExecutionQueueBase::GetPendingCommandSerial() const { @@ -38,7 +40,7 @@ } ExecutionSerial ExecutionQueueBase::GetCompletedCommandSerial() const { - return mCompletedSerial; + return ExecutionSerial(mCompletedSerial.load(std::memory_order_acquire)); } MaybeError ExecutionQueueBase::CheckPassedSerials() { @@ -47,14 +49,13 @@ DAWN_ASSERT(completedSerial <= ExecutionSerial(mLastSubmittedSerial.load(std::memory_order_acquire))); - // completedSerial should not be less than mCompletedSerial unless it is 0. - // It can be 0 when there's no fences to check. - DAWN_ASSERT(completedSerial >= mCompletedSerial || completedSerial == ExecutionSerial(0)); - if (completedSerial > mCompletedSerial) { - mCompletedSerial = completedSerial; + // Atomically set mCompletedSerial to completedSerial if completedSerial is larger. + uint64_t current = mCompletedSerial.load(std::memory_order_acquire); + while (uint64_t(completedSerial) > current && + !mCompletedSerial.compare_exchange_weak(current, uint64_t(completedSerial), + std::memory_order_acq_rel)) { } - return {}; } @@ -70,8 +71,10 @@ void ExecutionQueueBase::AssumeCommandsComplete() { // Bump serials so any pending callbacks can be fired. + // TODO(crbug.com/dawn/831): This is called during device destroy, which is not + // thread-safe yet. Two threads calling destroy would race setting these serials. uint64_t prev = mLastSubmittedSerial.fetch_add(1u, std::memory_order_release); - mCompletedSerial = ExecutionSerial(prev + 1); + mCompletedSerial.store(prev + 1u, std::memory_order_release); } void ExecutionQueueBase::IncrementLastSubmittedCommandSerial() { @@ -79,8 +82,8 @@ } bool ExecutionQueueBase::HasScheduledCommands() const { - return ExecutionSerial(mLastSubmittedSerial.load(std::memory_order_acquire)) > - mCompletedSerial || + return mLastSubmittedSerial.load(std::memory_order_acquire) > + mCompletedSerial.load(std::memory_order_acquire) || HasPendingCommands(); }
diff --git a/src/dawn/native/ExecutionQueue.h b/src/dawn/native/ExecutionQueue.h index 7901f82..5bd86ff 100644 --- a/src/dawn/native/ExecutionQueue.h +++ b/src/dawn/native/ExecutionQueue.h
@@ -94,7 +94,7 @@ // mLastSubmittedSerial tracks the last submitted command serial. // During device removal, the serials could be artificially incremented // to make it appear as if commands have been compeleted. - ExecutionSerial mCompletedSerial = kBeginningOfGPUTime; + std::atomic<uint64_t> mCompletedSerial = static_cast<uint64_t>(kBeginningOfGPUTime); std::atomic<uint64_t> mLastSubmittedSerial = static_cast<uint64_t>(kBeginningOfGPUTime); // Indicates whether the backend has pending commands to be submitted as soon as possible.
diff --git a/src/dawn/native/vulkan/QueueVk.cpp b/src/dawn/native/vulkan/QueueVk.cpp index 8923720..298a9d9 100644 --- a/src/dawn/native/vulkan/QueueVk.cpp +++ b/src/dawn/native/vulkan/QueueVk.cpp
@@ -27,6 +27,9 @@ #include "dawn/native/vulkan/QueueVk.h" +#include <optional> +#include <utility> + #include "dawn/common/Math.h" #include "dawn/native/Buffer.h" #include "dawn/native/CommandValidation.h" @@ -161,7 +164,7 @@ // Update fenceSerial since fence is ready. fenceSerial = tentativeSerial; - mUnusedFences.push_back(fence); + mUnusedFences->push_back(fence); DAWN_ASSERT(fenceSerial > GetCompletedCommandSerial()); fencesInFlight->pop_front(); @@ -421,7 +424,7 @@ // If submitting to the queue fails, move the fence back into the unused fence // list, as if it were never acquired. Not doing so would leak the fence since // it would be neither in the unused list nor in the in-flight list. - mUnusedFences.push_back(fence); + mUnusedFences->push_back(fence); }); TRACE_EVENT_END0(device->GetPlatform(), Recording, "vkQueueSubmit"); @@ -464,12 +467,21 @@ Device* device = ToBackend(GetDevice()); VkDevice vkDevice = device->GetVkDevice(); - if (!mUnusedFences.empty()) { - VkFence fence = mUnusedFences.back(); - DAWN_TRY(CheckVkSuccess(device->fn.ResetFences(vkDevice, 1, &*fence), "vkResetFences")); + auto result = + mUnusedFences.Use([&](auto unusedFences) -> std::optional<ResultOrError<VkFence>> { + if (!unusedFences->empty()) { + VkFence fence = unusedFences->back(); + DAWN_ASSERT(fence != VK_NULL_HANDLE); + DAWN_TRY( + CheckVkSuccess(device->fn.ResetFences(vkDevice, 1, &*fence), "vkResetFences")); - mUnusedFences.pop_back(); - return fence; + unusedFences->pop_back(); + return fence; + } + return std::nullopt; + }); + if (result) { + return std::move(*result); } VkFenceCreateInfo createInfo; @@ -520,10 +532,12 @@ } }); - for (VkFence fence : mUnusedFences) { - device->fn.DestroyFence(vkDevice, fence, nullptr); - } - mUnusedFences.clear(); + mUnusedFences.Use([&](auto unusedFences) { + for (VkFence fence : *unusedFences) { + device->fn.DestroyFence(vkDevice, fence, nullptr); + } + unusedFences->clear(); + }); QueueBase::DestroyImpl(); }
diff --git a/src/dawn/native/vulkan/QueueVk.h b/src/dawn/native/vulkan/QueueVk.h index 2e4b823..cec9cb6 100644 --- a/src/dawn/native/vulkan/QueueVk.h +++ b/src/dawn/native/vulkan/QueueVk.h
@@ -83,7 +83,7 @@ // have finished. MutexProtected<std::deque<std::pair<VkFence, ExecutionSerial>>> mFencesInFlight; // Fences in the unused list aren't reset yet. - std::vector<VkFence> mUnusedFences; + MutexProtected<std::vector<VkFence>> mUnusedFences; MaybeError PrepareRecordingContext(); ResultOrError<CommandPoolAndBuffer> BeginVkCommandBuffer();
diff --git a/src/dawn/tests/end2end/CreatePipelineAsyncTests.cpp b/src/dawn/tests/end2end/CreatePipelineAsyncTests.cpp index fee0053..317220c 100644 --- a/src/dawn/tests/end2end/CreatePipelineAsyncTests.cpp +++ b/src/dawn/tests/end2end/CreatePipelineAsyncTests.cpp
@@ -172,6 +172,9 @@ TEST_P(CreatePipelineAsyncTest, CreateComputePipelineAsyncStress) { DAWN_TEST_UNSUPPORTED_IF(UsesWire()); + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + for (size_t i = 0; i < 100; i++) { wgpu::ComputePipelineDescriptor csDesc; std::string shader = R"( @@ -204,6 +207,9 @@ // eglMakeCurrent: Context can only be current on one thread DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES()); + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + auto f = [&](size_t t) { wgpu::ComputePipelineDescriptor csDesc; std::string shader = R"( @@ -335,6 +341,9 @@ TEST_P(CreatePipelineAsyncTest, CreateRenderPipelineAsyncStress) { DAWN_TEST_UNSUPPORTED_IF(UsesWire()); + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + for (size_t i = 0; i < 100; i++) { utils::ComboRenderPipelineDescriptor desc; std::string shader = R"( @@ -363,6 +372,9 @@ // eglMakeCurrent: Context can only be current on one thread DAWN_TEST_UNSUPPORTED_IF(IsOpenGL() || IsOpenGLES()); + // TODO(crbug.com/dawn/1766): TSAN reported race conditions in NVIDIA's vk driver. + DAWN_SUPPRESS_TEST_IF(IsVulkan() && IsNvidia() && IsTsan()); + auto f = [&](size_t t) { utils::ComboRenderPipelineDescriptor desc; std::string shader = R"(