Add VulkanAddWorkToEmptyResolvePass Toggle

This toggle is enabled by default on Qualcomm GPUs and adds a small
amount of work (a timestamp write) to render passes which contain no
draw commands. This works around a driver bug where the resolve targets
of the empty pass don't get resolved.

This case is covered by the CTS with
https://gpuweb.github.io/cts/standalone/?runnow=1&q=webgpu:api,operation,render_pass,resolve:render_pass_resolve:*
which was observed to fail on a Pixel 4 but with this fix now passes.

Bug: 411656647
Change-Id: If89e57df5289e18cf7314a9025d2ac985792ba8f
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/238334
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Loko Kung <lokokung@google.com>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index d85eb16..d1adc69 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -639,6 +639,11 @@
     {Toggle::UseVulkanMemoryModel,
      {"use_vulkan_memory_model", "Use the Vulkan Memory Model if available.",
       "https://crbug.com/392606604", ToggleStage::Adapter}},
+    {Toggle::VulkanAddWorkToEmptyResolvePass,
+     {"vulkan_add_work_to_empty_resolve_pass",
+      "Adds a small amount of work to empty render passes which perform a resolve. This toggle is "
+      "enabled by default on Qualcomm GPUs, where it is needed to force the resolve to complete.",
+      "https://crbug.com/411656647", ToggleStage::Device}},
     {Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
      {"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target",
       "MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 0d86dce..a761e87 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -151,6 +151,7 @@
     D3D12RelaxMinSubgroupSizeTo8,
     D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment,
     UseVulkanMemoryModel,
+    VulkanAddWorkToEmptyResolvePass,
 
     // Unresolved issues.
     NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
diff --git a/src/dawn/native/vulkan/CommandBufferVk.cpp b/src/dawn/native/vulkan/CommandBufferVk.cpp
index e795dec..cc444b1 100644
--- a/src/dawn/native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn/native/vulkan/CommandBufferVk.cpp
@@ -1261,9 +1261,13 @@
     DescriptorSetTracker descriptorSets = {};
     RenderPipeline* lastPipeline = nullptr;
 
+    // Tracks the number of commands that do significant GPU work (a draw or query write) this pass.
+    uint32_t workCommandCount = 0;
+
     auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) {
         switch (type) {
             case Command::Draw: {
+                workCommandCount++;
                 DrawCmd* draw = iter->NextCommand<DrawCmd>();
 
                 descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_GRAPHICS);
@@ -1274,6 +1278,7 @@
             }
 
             case Command::DrawIndexed: {
+                workCommandCount++;
                 DrawIndexedCmd* draw = iter->NextCommand<DrawIndexedCmd>();
 
                 descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_GRAPHICS);
@@ -1284,6 +1289,7 @@
             }
 
             case Command::DrawIndirect: {
+                workCommandCount++;
                 DrawIndirectCmd* draw = iter->NextCommand<DrawIndirectCmd>();
                 Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
 
@@ -1295,6 +1301,7 @@
             }
 
             case Command::DrawIndexedIndirect: {
+                workCommandCount++;
                 DrawIndexedIndirectCmd* draw = iter->NextCommand<DrawIndexedIndirectCmd>();
                 Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                 DAWN_ASSERT(buffer != nullptr);
@@ -1308,6 +1315,7 @@
             }
 
             case Command::MultiDrawIndirect: {
+                workCommandCount++;
                 MultiDrawIndirectCmd* cmd = iter->NextCommand<MultiDrawIndirectCmd>();
 
                 Buffer* indirectBuffer = ToBackend(cmd->indirectBuffer.Get());
@@ -1333,6 +1341,7 @@
                 break;
             }
             case Command::MultiDrawIndexedIndirect: {
+                workCommandCount++;
                 MultiDrawIndexedIndirectCmd* cmd = iter->NextCommand<MultiDrawIndexedIndirectCmd>();
 
                 Buffer* indirectBuffer = ToBackend(cmd->indirectBuffer.Get());
@@ -1476,6 +1485,17 @@
             case Command::EndRenderPass: {
                 mCommands.NextCommand<EndRenderPassCmd>();
 
+                // If no work-producing commands were executed during the render pass and the
+                // VulkanAddWorkToEmptyResolvePass toggle is enabled, add a small amount of work
+                // in the form of performing an occlusion query before ending the pass. This avoids
+                // a driver bug that fails to resolve render targets in empty passes.
+                if (workCommandCount == 0 &&
+                    device->IsToggleEnabled(Toggle::VulkanAddWorkToEmptyResolvePass)) {
+                    QuerySetBase* querySet = device->GetEmptyPassQuerySet();
+                    device->fn.CmdBeginQuery(commands, ToBackend(querySet)->GetHandle(), 0, 0);
+                    device->fn.CmdEndQuery(commands, ToBackend(querySet)->GetHandle(), 0);
+                }
+
                 device->fn.CmdEndRenderPass(commands);
 
                 // Write timestamp at the end of render pass if it's set.
@@ -1561,6 +1581,7 @@
             }
 
             case Command::BeginOcclusionQuery: {
+                workCommandCount++;
                 BeginOcclusionQueryCmd* cmd = mCommands.NextCommand<BeginOcclusionQueryCmd>();
 
                 device->fn.CmdBeginQuery(commands, ToBackend(cmd->querySet.Get())->GetHandle(),
@@ -1569,6 +1590,7 @@
             }
 
             case Command::EndOcclusionQuery: {
+                workCommandCount++;
                 EndOcclusionQueryCmd* cmd = mCommands.NextCommand<EndOcclusionQueryCmd>();
 
                 device->fn.CmdEndQuery(commands, ToBackend(cmd->querySet.Get())->GetHandle(),
@@ -1577,6 +1599,7 @@
             }
 
             case Command::WriteTimestamp: {
+                workCommandCount++;
                 WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
 
                 RecordWriteTimestampCmd(recordingContext, device, cmd->querySet.Get(),
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index 570c07da..67b7792 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -1100,4 +1100,17 @@
     return true;
 }
 
+// Gets or creates an occlusion Query object for use with Toggle::VulkanAddWorkToEmptyResolvePass.
+QuerySetBase* Device::GetEmptyPassQuerySet() {
+    DAWN_ASSERT(IsToggleEnabled(Toggle::VulkanAddWorkToEmptyResolvePass));
+
+    if (!mEmptyPassQuerySet) {
+        QuerySetDescriptor descriptor;
+        descriptor.type = wgpu::QueryType::Occlusion;
+        descriptor.count = 1;
+        mEmptyPassQuerySet = APICreateQuerySet(&descriptor);
+    }
+    return mEmptyPassQuerySet.Get();
+}
+
 }  // namespace dawn::native::vulkan
diff --git a/src/dawn/native/vulkan/DeviceVk.h b/src/dawn/native/vulkan/DeviceVk.h
index 99e43dc..764078e 100644
--- a/src/dawn/native/vulkan/DeviceVk.h
+++ b/src/dawn/native/vulkan/DeviceVk.h
@@ -132,6 +132,8 @@
                                                       wgpu::BufferUsage originalUsage,
                                                       size_t bufferSize) const override;
 
+    QuerySetBase* GetEmptyPassQuerySet();
+
   private:
     Device(AdapterBase* adapter,
            const UnpackedPtr<DeviceDescriptor>& descriptor,
@@ -210,6 +212,8 @@
 
     Ref<PipelineCache> mMonolithicPipelineCache;
 
+    Ref<QuerySetBase> mEmptyPassQuerySet;
+
     bool mSupportsMappableStorageBuffer = false;
 
     MaybeError ImportExternalImage(const ExternalImageDescriptorVk* descriptor,
diff --git a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
index c34da20..1817abb 100644
--- a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
+++ b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
@@ -792,6 +792,11 @@
         // texture. Work around it by resolving into a single level texture and then copying into
         // the intended layer.
         deviceToggles->Default(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true);
+
+        // chromium:411656647: Qualcomm devices have a bug where an empty render pass that has a
+        // resolve target doesn't perform the resolve. To work around it, add a small amount of work
+        // to the pass to force it to execute.
+        deviceToggles->Default(Toggle::VulkanAddWorkToEmptyResolvePass, true);
     }
 
     if (IsAndroidARM()) {