Split Vulkan command buffers to work around bug
There's a bug in some Qualcomm devices where using a depth/stencil
texture as a render attachment and then sampling it in a compute pass
causes a crash. This only happens, however, if the two passes occur as
part of the same Vulkan command buffer.
To work around the issue, this change splits the Vulkan command buffer
while recording any time it identifies that the problematic scenario may
occur.
Bug: dawn:1564
Change-Id: Ie137e9118ef9cc41f5908ca32c72c33f3798cd71
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/104860
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index ae64158b..8ae66b7 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -312,6 +312,13 @@
"This toggle is enabled by default on Metal backend where GPU counters cannot be stored to"
"sampleBufferAttachments on empty blit encoder.",
"https://crbug.com/dawn/1473"}},
+ {Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
+ {"vulkan_split_command_buffer_on_depth_stencil_compute_sample_after_render_pass",
+ "Splits any command buffer that samples a depth/stencil texture in a compute pass after that "
+ "texture was used as an attachment for a prior render pass. This toggle is enabled by "
+ "default on Qualcomm GPUs, which have been observed experiencing a driver crash in this "
+ "situation.",
+ "https://crbug.com/dawn/1564"}},
// Comment to separate the }} so it is clearer what to copy-paste to add a toggle.
}};
} // anonymous namespace
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 942f76a..8cfc9d4 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -80,6 +80,7 @@
D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset,
ApplyClearBigIntegerColorValueWithDraw,
MetalUseDummyBlitEncoderForWriteTimestamp,
+ VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
EnumCount,
InvalidEnum = EnumCount,
diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp
index fedf6a5..ef88a92 100644
--- a/src/dawn/native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn/native/vulkan/CommandBufferVk.cpp
@@ -24,6 +24,7 @@
#include "dawn/native/DynamicUploader.h"
#include "dawn/native/EnumMaskIterator.h"
#include "dawn/native/RenderBundle.h"
+#include "dawn/native/vulkan/AdapterVk.h"
#include "dawn/native/vulkan/BindGroupVk.h"
#include "dawn/native/vulkan/BufferVk.h"
#include "dawn/native/vulkan/CommandRecordingContext.h"
@@ -887,6 +888,24 @@
const ComputePassResourceUsage& resourceUsages) {
Device* device = ToBackend(GetDevice());
+ // If required, split the command buffer any time we detect a dpeth/stencil attachment is
+ // used in a compute pass after being used as a render pass attachment in the same command
+ // buffer.
+ if (device->IsToggleEnabled(
+ Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
+ !mRenderPassDepthStencilAttachments.empty()) {
+ for (auto texture : resourceUsages.referencedTextures) {
+ if (texture->GetFormat().HasDepthOrStencil() &&
+ mRenderPassDepthStencilAttachments.find(texture) !=
+ mRenderPassDepthStencilAttachments.end()) {
+ // Identified a potential crash case, split the command buffer.
+ DAWN_TRY(device->SplitRecordingContext(recordingContext));
+ mRenderPassDepthStencilAttachments.clear();
+ break;
+ }
+ }
+ }
+
// Write timestamp at the beginning of compute pass if it's set
if (computePassCmd->beginTimestamp.querySet.Get() != nullptr) {
RecordWriteTimestampCmd(recordingContext, device,
@@ -1038,6 +1057,14 @@
DAWN_TRY(RecordBeginRenderPass(recordingContext, device, renderPassCmd));
+ // If required, track depth/stencil textures used as render pass attachments.
+ if (device->IsToggleEnabled(
+ Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass) &&
+ renderPassCmd->attachmentState->HasDepthStencilAttachment()) {
+ mRenderPassDepthStencilAttachments.insert(
+ renderPassCmd->depthStencilAttachment.view->GetTexture());
+ }
+
// Write timestamp at the beginning of render pass if it's set.
if (renderPassCmd->beginTimestamp.querySet.Get() != nullptr) {
RecordWriteTimestampCmd(recordingContext, device,
diff --git a/src/dawn/native/vulkan/CommandBufferVk.h b/src/dawn/native/vulkan/CommandBufferVk.h
index b748f0d..322d17f 100644
--- a/src/dawn/native/vulkan/CommandBufferVk.h
+++ b/src/dawn/native/vulkan/CommandBufferVk.h
@@ -15,6 +15,8 @@
#ifndef SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
#define SRC_DAWN_NATIVE_VULKAN_COMMANDBUFFERVK_H_
+#include <set>
+
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/Error.h"
@@ -50,6 +52,10 @@
const TextureCopy& srcCopy,
const TextureCopy& dstCopy,
const Extent3D& copySize);
+
+ // Need to track depth/stencil textures used by render passes if the
+ // VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle is enabled.
+ std::set<TextureBase*> mRenderPassDepthStencilAttachments;
};
} // namespace dawn::native::vulkan
diff --git a/src/dawn/native/vulkan/CommandRecordingContext.h b/src/dawn/native/vulkan/CommandRecordingContext.h
index ab43ae2..c8fb544 100644
--- a/src/dawn/native/vulkan/CommandRecordingContext.h
+++ b/src/dawn/native/vulkan/CommandRecordingContext.h
@@ -41,6 +41,13 @@
// For Device state tracking only.
VkCommandPool commandPool = VK_NULL_HANDLE;
bool used = false;
+
+ // In some cases command buffer will need to be split to accomodate driver bug workarounds.
+ // See the VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass toggle as an
+ // example. This tracks the list of all command buffers used for this recording context,
+ // with commandBuffer always being the last element.
+ std::vector<VkCommandBuffer> commandBufferList;
+ std::vector<VkCommandPool> commandPoolList;
};
} // namespace dawn::native::vulkan
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index 26d638e..d352263 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -321,8 +321,8 @@
submitInfo.waitSemaphoreCount = static_cast<uint32_t>(mRecordingContext.waitSemaphores.size());
submitInfo.pWaitSemaphores = AsVkArray(mRecordingContext.waitSemaphores.data());
submitInfo.pWaitDstStageMask = dstStageMasks.data();
- submitInfo.commandBufferCount = 1;
- submitInfo.pCommandBuffers = &mRecordingContext.commandBuffer;
+ submitInfo.commandBufferCount = mRecordingContext.commandBufferList.size();
+ submitInfo.pCommandBuffers = mRecordingContext.commandBufferList.data();
submitInfo.signalSemaphoreCount = (scopedSignalSemaphore.Get() == VK_NULL_HANDLE ? 0 : 1);
submitInfo.pSignalSemaphores = AsVkArray(scopedSignalSemaphore.InitializeInto());
@@ -345,9 +345,11 @@
ExecutionSerial lastSubmittedSerial = GetLastSubmittedCommandSerial();
mFencesInFlight.emplace(fence, lastSubmittedSerial);
- CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPool,
- mRecordingContext.commandBuffer};
- mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
+ for (size_t i = 0; i < mRecordingContext.commandBufferList.size(); ++i) {
+ CommandPoolAndBuffer submittedCommands = {mRecordingContext.commandPoolList[i],
+ mRecordingContext.commandBufferList[i]};
+ mCommandsInFlight.Enqueue(submittedCommands, lastSubmittedSerial);
+ }
if (mRecordingContext.externalTexturesForEagerTransition.size() > 0) {
// Export the signal semaphore.
@@ -610,6 +612,14 @@
// By default try to use S8 if available.
SetToggle(Toggle::VulkanUseS8, true);
+
+ // dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
+ // compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that
+ // bug, split the command buffer any time we can detect that situation.
+ if (ToBackend(GetAdapter())->IsAndroidQualcomm()) {
+ ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
+ true);
+ }
}
void Device::ApplyDepthStencilFormatToggles() {
@@ -695,9 +705,43 @@
ASSERT(mRecordingContext.commandBuffer == VK_NULL_HANDLE);
ASSERT(mRecordingContext.commandPool == VK_NULL_HANDLE);
+ CommandPoolAndBuffer commands;
+ DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
+
+ mRecordingContext.commandBuffer = commands.commandBuffer;
+ mRecordingContext.commandPool = commands.pool;
+ mRecordingContext.commandBufferList.push_back(commands.commandBuffer);
+ mRecordingContext.commandPoolList.push_back(commands.pool);
+
+ return {};
+}
+
+// Splits the recording context, ending the current command buffer and beginning a new one.
+// This should not be necessary in most cases, and is provided only to work around driver issues
+// on some hardware.
+MaybeError Device::SplitRecordingContext(CommandRecordingContext* recordingContext) {
+ ASSERT(recordingContext->used);
+
+ DAWN_TRY(
+ CheckVkSuccess(fn.EndCommandBuffer(recordingContext->commandBuffer), "vkEndCommandBuffer"));
+
+ CommandPoolAndBuffer commands;
+ DAWN_TRY_ASSIGN(commands, BeginVkCommandBuffer());
+
+ recordingContext->commandBuffer = commands.commandBuffer;
+ recordingContext->commandPool = commands.pool;
+ recordingContext->commandBufferList.push_back(commands.commandBuffer);
+ recordingContext->commandPoolList.push_back(commands.pool);
+
+ return {};
+}
+
+ResultOrError<Device::CommandPoolAndBuffer> Device::BeginVkCommandBuffer() {
+ CommandPoolAndBuffer commands;
+
// First try to recycle unused command pools.
if (!mUnusedCommands.empty()) {
- CommandPoolAndBuffer commands = mUnusedCommands.back();
+ commands = mUnusedCommands.back();
mUnusedCommands.pop_back();
DAWN_TRY_WITH_CLEANUP(
CheckVkSuccess(fn.ResetCommandPool(mVkDevice, commands.pool, 0), "vkResetCommandPool"),
@@ -714,9 +758,6 @@
fn.FreeCommandBuffers(mVkDevice, commands.pool, 1, &commands.commandBuffer);
fn.DestroyCommandPool(mVkDevice, commands.pool, nullptr);
});
-
- mRecordingContext.commandBuffer = commands.commandBuffer;
- mRecordingContext.commandPool = commands.pool;
} else {
// Create a new command pool for our commands and allocate the command buffer.
VkCommandPoolCreateInfo createInfo;
@@ -725,19 +766,19 @@
createInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
createInfo.queueFamilyIndex = mQueueFamily;
- DAWN_TRY(CheckVkSuccess(
- fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*mRecordingContext.commandPool),
- "vkCreateCommandPool"));
+ DAWN_TRY(
+ CheckVkSuccess(fn.CreateCommandPool(mVkDevice, &createInfo, nullptr, &*commands.pool),
+ "vkCreateCommandPool"));
VkCommandBufferAllocateInfo allocateInfo;
allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocateInfo.pNext = nullptr;
- allocateInfo.commandPool = mRecordingContext.commandPool;
+ allocateInfo.commandPool = commands.pool;
allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocateInfo.commandBufferCount = 1;
DAWN_TRY(CheckVkSuccess(
- fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &mRecordingContext.commandBuffer),
+ fn.AllocateCommandBuffers(mVkDevice, &allocateInfo, &commands.commandBuffer),
"vkAllocateCommandBuffers"));
}
@@ -748,8 +789,10 @@
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
- return CheckVkSuccess(fn.BeginCommandBuffer(mRecordingContext.commandBuffer, &beginInfo),
- "vkBeginCommandBuffer");
+ DAWN_TRY(CheckVkSuccess(fn.BeginCommandBuffer(commands.commandBuffer, &beginInfo),
+ "vkBeginCommandBuffer"));
+
+ return commands;
}
void Device::RecycleCompletedCommands() {
diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h
index a3bc2e5..8f04e9c 100644
--- a/src/dawn/native/vulkan/DeviceVk.h
+++ b/src/dawn/native/vulkan/DeviceVk.h
@@ -66,6 +66,7 @@
external_semaphore::Service* GetExternalSemaphoreService() const;
CommandRecordingContext* GetPendingRecordingContext();
+ MaybeError SplitRecordingContext(CommandRecordingContext* recordingContext);
MaybeError SubmitPendingCommands();
void EnqueueDeferredDeallocation(DescriptorSetAllocator* allocator);
@@ -205,13 +206,15 @@
const std::string mDebugPrefix;
std::vector<std::string> mDebugMessages;
- MaybeError PrepareRecordingContext();
- void RecycleCompletedCommands();
-
struct CommandPoolAndBuffer {
VkCommandPool pool = VK_NULL_HANDLE;
VkCommandBuffer commandBuffer = VK_NULL_HANDLE;
};
+
+ MaybeError PrepareRecordingContext();
+ ResultOrError<CommandPoolAndBuffer> BeginVkCommandBuffer();
+ void RecycleCompletedCommands();
+
SerialQueue<ExecutionSerial, CommandPoolAndBuffer> mCommandsInFlight;
// Command pools in the unused list haven't been reset yet.
std::vector<CommandPoolAndBuffer> mUnusedCommands;
diff --git a/src/dawn/tests/end2end/DepthStencilSamplingTests.cpp b/src/dawn/tests/end2end/DepthStencilSamplingTests.cpp
index b2a5028..9ea5600 100644
--- a/src/dawn/tests/end2end/DepthStencilSamplingTests.cpp
+++ b/src/dawn/tests/end2end/DepthStencilSamplingTests.cpp
@@ -625,9 +625,6 @@
// This test fails on ANGLE (both SwiftShader and D3D11).
DAWN_SUPPRESS_TEST_IF(IsANGLE());
- // TODO(dawn:1549) Fails on Qualcomm-based Android devices.
- DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
-
wgpu::TextureFormat format = GetParam().mTextureFormat;
DoSamplingExtraStencilComponentsRenderTest(TestAspectAndSamplerType::StencilAsUint, format,
@@ -639,9 +636,6 @@
// Test sampling both depth and stencil with a render/compute pipeline works.
TEST_P(DepthStencilSamplingTest, SampleDepthAndStencilRender) {
- // TODO(dawn:1549) Fails on Qualcomm-based Android devices.
- DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
-
wgpu::TextureFormat format = GetParam().mTextureFormat;
wgpu::SamplerDescriptor samplerDesc;
@@ -760,9 +754,6 @@
// Test that sampling a depth texture with a render/compute pipeline works
TEST_P(DepthSamplingTest, SampleDepthOnly) {
- // TODO(dawn:1549) Fails on Qualcomm-based Android devices.
- DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
-
wgpu::TextureFormat format = GetParam().mTextureFormat;
float tolerance = format == wgpu::TextureFormat::Depth16Unorm ? 0.001f : 0.0f;
@@ -812,9 +803,6 @@
// This test fails on SwANGLE (although it passes on other ANGLE backends).
DAWN_TEST_UNSUPPORTED_IF(IsANGLE());
- // TODO(dawn:1549) Fails on Qualcomm-based Android devices.
- DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsQualcomm());
-
wgpu::TextureFormat format = GetParam().mTextureFormat;
DoSamplingTest(TestAspectAndSamplerType::StencilAsUint,