[Vulkan] Handle fence wait timeout by retry when max timeout requested
Change-Id: Ie628e47a3556239c4eccad84936eb19ace65e188
Bug: 40073661, 344798087
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/211014
Commit-Queue: Colin Blundell <blundell@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/dawn/native/vulkan/QueueVk.cpp b/src/dawn/native/vulkan/QueueVk.cpp
index aa87b5a..0e40ea5 100644
--- a/src/dawn/native/vulkan/QueueVk.cpp
+++ b/src/dawn/native/vulkan/QueueVk.cpp
@@ -27,6 +27,7 @@
#include "dawn/native/vulkan/QueueVk.h"
+#include <limits>
#include <optional>
#include <utility>
@@ -520,32 +521,54 @@
ResultOrError<bool> Queue::WaitForQueueSerial(ExecutionSerial serial, Nanoseconds timeout) {
Device* device = ToBackend(GetDevice());
VkDevice vkDevice = device->GetVkDevice();
- VkResult waitResult = mFencesInFlight.Use([&](auto fencesInFlight) {
- // Search from for the first fence >= serial.
- VkFence waitFence = VK_NULL_HANDLE;
- for (auto it = fencesInFlight->begin(); it != fencesInFlight->end(); ++it) {
- if (it->second >= serial) {
- waitFence = it->first;
- break;
+ // If the client has passed a finite timeout, the function will eventually return due to
+ // either (1) the fences being signaled, (2) the timeout being reached, or (3) the device
+ // being lost. If the client has passed an infinite timeout, this function might hang forever
+ // if the fences are never signaled (which matches the semantics that the client has
+ // specified).
+ // TODO(crbug.com/344798087): Handle the issue of timeouts in a more general way further up the
+ // stack.
+ while (1) {
+ VkResult waitResult = mFencesInFlight.Use([&](auto fencesInFlight) {
+ // Search from for the first fence >= serial.
+ VkFence waitFence = VK_NULL_HANDLE;
+ for (auto it = fencesInFlight->begin(); it != fencesInFlight->end(); ++it) {
+ if (it->second >= serial) {
+ waitFence = it->first;
+ break;
+ }
}
+ if (waitFence == VK_NULL_HANDLE) {
+ // Fence not found. This serial must have already completed.
+ // Return a VK_SUCCESS status.
+ DAWN_ASSERT(serial <= GetCompletedCommandSerial());
+ return VkResult::WrapUnsafe(VK_SUCCESS);
+ }
+ // Wait for the fence.
+ return VkResult::WrapUnsafe(
+ INJECT_ERROR_OR_RUN(device->fn.WaitForFences(vkDevice, 1, &*waitFence, true,
+ static_cast<uint64_t>(timeout)),
+ VK_ERROR_DEVICE_LOST));
+ });
+ if (waitResult == VK_TIMEOUT) {
+ // There is evidence that `VK_TIMEOUT` can get returned even when the
+ // client has specified an infinite timeout (e.g., due to signals). Retry
+ // waiting on the fence in this case in order to satisfy the semantics
+ // that the function should return only when either (a) the fences are
+ // signaled or (b) the passed-in timeout is reached. Note that this can
+ // result in this function busy-looping forever in this case, but the
+ // client has explicitly requested this behavior by passing in an infinite
+ // timeout.
+ // TODO(crbug.com/344798087): Handle the issue of timeouts in a more general way further
+ // up the stack.
+ if (static_cast<uint64_t>(timeout) == std::numeric_limits<uint64_t>::max()) {
+ continue;
+ }
+ return false;
}
- if (waitFence == VK_NULL_HANDLE) {
- // Fence not found. This serial must have already completed.
- // Return a VK_SUCCESS status.
- DAWN_ASSERT(serial <= GetCompletedCommandSerial());
- return VkResult::WrapUnsafe(VK_SUCCESS);
- }
- // Wait for the fence.
- return VkResult::WrapUnsafe(
- INJECT_ERROR_OR_RUN(device->fn.WaitForFences(vkDevice, 1, &*waitFence, true,
- static_cast<uint64_t>(timeout)),
- VK_ERROR_DEVICE_LOST));
- });
- if (waitResult == VK_TIMEOUT) {
- return false;
+ DAWN_TRY(CheckVkSuccess(::VkResult(waitResult), "vkWaitForFences"));
+ return true;
}
- DAWN_TRY(CheckVkSuccess(::VkResult(waitResult), "vkWaitForFences"));
- return true;
}
} // namespace dawn::native::vulkan