Implement AlwaysResolveIntoZeroLevelAndLayer globally

Previously this toggle was implemented only for the Metal backend, but
a need for it was identified on Android as well. This change moves the
implementation to the backend-agnostic command encoder recording so that
it works for all backends. Fundamentally it's still doing the same
thing, however: Swapping resolve targets that point at a non-zero mip
level or layer with a temporary texture and then performing a copy once
the render pass has ended.

Bug: dawn:1569
Change-Id: I292860cc74f653b2880e727d2ef3a7dfa3f10b91
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106040
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Brandon Jones <bajones@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index f082047..36603e7 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -656,6 +656,17 @@
     return true;
 }
 
+// Tracks the temporary resolve attachments used when the AlwaysResolveIntoZeroLevelAndLayer toggle
+// is active so that the results can be copied from the temporary resolve attachment into the
+// intended target after the render pass is complete.
+struct TemporaryResolveAttachment {
+    TemporaryResolveAttachment(Ref<TextureViewBase> src, Ref<TextureViewBase> dst)
+        : copySrc(std::move(src)), copyDst(std::move(dst)) {}
+
+    Ref<TextureViewBase> copySrc;
+    Ref<TextureViewBase> copyDst;
+};
+
 }  // namespace
 
 bool HasDeprecatedColor(const RenderPassColorAttachment& attachment) {
@@ -863,6 +874,8 @@
         return RenderPassEncoder::MakeError(device, this, &mEncodingContext);
     }
 
+    std::function<void()> passEndCallback = nullptr;
+
     bool success = mEncodingContext.TryEncode(
         this,
         [&](CommandAllocator* allocator) -> MaybeError {
@@ -1004,14 +1017,19 @@
                 usageTracker.TrackQueryAvailability(querySet, queryIndex);
             }
 
+            DAWN_TRY_ASSIGN(passEndCallback,
+                            ApplyRenderPassWorkarounds(device, &usageTracker, cmd));
+
             return {};
         },
         "encoding %s.BeginRenderPass(%s).", this, descriptor);
 
     if (success) {
-        Ref<RenderPassEncoder> passEncoder = RenderPassEncoder::Create(
-            device, descriptor, this, &mEncodingContext, std::move(usageTracker),
-            std::move(attachmentState), width, height, depthReadOnly, stencilReadOnly);
+        Ref<RenderPassEncoder> passEncoder =
+            RenderPassEncoder::Create(device, descriptor, this, &mEncodingContext,
+                                      std::move(usageTracker), std::move(attachmentState), width,
+                                      height, depthReadOnly, stencilReadOnly, passEndCallback);
+
         mEncodingContext.EnterPass(passEncoder.Get());
 
         if (ShouldApplyClearBigIntegerColorValueWithDraw(device, descriptor)) {
@@ -1028,6 +1046,101 @@
     return RenderPassEncoder::MakeError(device, this, &mEncodingContext);
 }
 
+// This function handles render pass workarounds. Because some cases may require
+// multiple workarounds, it applies any workarounds one by one and calls itself
+// recursively to handle the next workaround if needed.
+ResultOrError<std::function<void()>> CommandEncoder::ApplyRenderPassWorkarounds(
+    DeviceBase* device,
+    RenderPassResourceUsageTracker* usageTracker,
+    BeginRenderPassCmd* cmd,
+    std::function<void()> passEndCallback) {
+    // dawn:56, dawn:1569
+    // Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. This swaps out the given resolve attachment
+    // for a temporary one that has no layers or mip levels. The results are copied from the
+    // temporary attachment into the given attachment when the render pass ends. (Handled at the
+    // bottom of this function)
+    if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
+        std::vector<TemporaryResolveAttachment> temporaryResolveAttachments;
+
+        for (ColorAttachmentIndex index :
+             IterateBitSet(cmd->attachmentState->GetColorAttachmentsMask())) {
+            TextureViewBase* resolveTarget = cmd->colorAttachments[index].resolveTarget.Get();
+
+            if (resolveTarget != nullptr && (resolveTarget->GetBaseMipLevel() != 0 ||
+                                             resolveTarget->GetBaseArrayLayer() != 0)) {
+                // Create a temporary texture to resolve into
+                // TODO(dawn:1618): Defer allocation of temporary textures till submit time.
+                TextureDescriptor descriptor = {};
+                descriptor.usage =
+                    wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+                descriptor.format = resolveTarget->GetFormat().format;
+                descriptor.size =
+                    resolveTarget->GetTexture()->GetMipLevelSingleSubresourceVirtualSize(
+                        resolveTarget->GetBaseMipLevel());
+                descriptor.dimension = wgpu::TextureDimension::e2D;
+                descriptor.mipLevelCount = 1;
+
+                Ref<TextureBase> temporaryResolveTexture;
+                DAWN_TRY_ASSIGN(temporaryResolveTexture, device->CreateTexture(&descriptor));
+
+                TextureViewDescriptor viewDescriptor = {};
+                Ref<TextureViewBase> temporaryResolveView;
+                DAWN_TRY_ASSIGN(
+                    temporaryResolveView,
+                    device->CreateTextureView(temporaryResolveTexture.Get(), &viewDescriptor));
+
+                // Save the temporary and given render targets together for copying after
+                // the render pass ends.
+                temporaryResolveAttachments.emplace_back(temporaryResolveView, resolveTarget);
+
+                // Replace the given resolve attachment with the temporary one.
+                usageTracker->TextureViewUsedAs(temporaryResolveView.Get(),
+                                                wgpu::TextureUsage::RenderAttachment);
+                cmd->colorAttachments[index].resolveTarget = temporaryResolveView;
+            }
+        }
+
+        if (temporaryResolveAttachments.size()) {
+            // Check for other workarounds that need to be applied recursively.
+            return ApplyRenderPassWorkarounds(
+                device, usageTracker, cmd,
+                [this, passEndCallback = std::move(passEndCallback),
+                 temporaryResolveAttachments = std::move(temporaryResolveAttachments)]() -> void {
+                    // Called once the render pass has been ended.
+                    // Handle any copies needed for the AlwaysResolveIntoZeroLevelAndLayer
+                    // workaround immediately after the render pass ends and before any additional
+                    // commands are recorded.
+                    for (auto& copyTarget : temporaryResolveAttachments) {
+                        ImageCopyTexture srcImageCopyTexture = {};
+                        srcImageCopyTexture.texture = copyTarget.copySrc->GetTexture();
+                        srcImageCopyTexture.aspect = wgpu::TextureAspect::All;
+                        srcImageCopyTexture.mipLevel = 0;
+                        srcImageCopyTexture.origin = {0, 0, 0};
+
+                        ImageCopyTexture dstImageCopyTexture = {};
+                        dstImageCopyTexture.texture = copyTarget.copyDst->GetTexture();
+                        dstImageCopyTexture.aspect = wgpu::TextureAspect::All;
+                        dstImageCopyTexture.mipLevel = copyTarget.copyDst->GetBaseMipLevel();
+                        dstImageCopyTexture.origin = {0, 0,
+                                                      copyTarget.copyDst->GetBaseArrayLayer()};
+
+                        Extent3D extent3D = copyTarget.copySrc->GetTexture()->GetSize();
+
+                        this->APICopyTextureToTextureInternal(&srcImageCopyTexture,
+                                                              &dstImageCopyTexture, &extent3D);
+                    }
+
+                    // If there were any other callbacks in the workaround stack, call the next one.
+                    if (passEndCallback) {
+                        passEndCallback();
+                    }
+                });
+        }
+    }
+
+    return std::move(passEndCallback);
+}
+
 void CommandEncoder::APICopyBufferToBuffer(BufferBase* source,
                                            uint64_t sourceOffset,
                                            BufferBase* destination,
diff --git a/src/dawn/native/CommandEncoder.h b/src/dawn/native/CommandEncoder.h
index e43c6e7..d1e20f3 100644
--- a/src/dawn/native/CommandEncoder.h
+++ b/src/dawn/native/CommandEncoder.h
@@ -102,6 +102,12 @@
 
     void DestroyImpl() override;
 
+    ResultOrError<std::function<void()>> ApplyRenderPassWorkarounds(
+        DeviceBase* device,
+        RenderPassResourceUsageTracker* usageTracker,
+        BeginRenderPassCmd* cmd,
+        std::function<void()> passEndCallback = nullptr);
+
     // Helper to be able to implement both APICopyTextureToTexture and
     // APICopyTextureToTextureInternal. The only difference between both
     // copies, is that the Internal one will also check internal usage.
diff --git a/src/dawn/native/RenderPassEncoder.cpp b/src/dawn/native/RenderPassEncoder.cpp
index 018081e..0ebc0bf 100644
--- a/src/dawn/native/RenderPassEncoder.cpp
+++ b/src/dawn/native/RenderPassEncoder.cpp
@@ -59,7 +59,8 @@
                                      uint32_t renderTargetWidth,
                                      uint32_t renderTargetHeight,
                                      bool depthReadOnly,
-                                     bool stencilReadOnly)
+                                     bool stencilReadOnly,
+                                     std::function<void()> endCallback)
     : RenderEncoderBase(device,
                         descriptor->label,
                         encodingContext,
@@ -69,7 +70,8 @@
       mCommandEncoder(commandEncoder),
       mRenderTargetWidth(renderTargetWidth),
       mRenderTargetHeight(renderTargetHeight),
-      mOcclusionQuerySet(descriptor->occlusionQuerySet) {
+      mOcclusionQuerySet(descriptor->occlusionQuerySet),
+      mEndCallback(std::move(endCallback)) {
     mUsageTracker = std::move(usageTracker);
     const RenderPassDescriptorMaxDrawCount* maxDrawCountInfo = nullptr;
     FindInChain(descriptor->nextInChain, &maxDrawCountInfo);
@@ -89,11 +91,12 @@
                                                  uint32_t renderTargetWidth,
                                                  uint32_t renderTargetHeight,
                                                  bool depthReadOnly,
-                                                 bool stencilReadOnly) {
+                                                 bool stencilReadOnly,
+                                                 std::function<void()> endCallback) {
     return AcquireRef(new RenderPassEncoder(device, descriptor, commandEncoder, encodingContext,
                                             std::move(usageTracker), std::move(attachmentState),
                                             renderTargetWidth, renderTargetHeight, depthReadOnly,
-                                            stencilReadOnly));
+                                            stencilReadOnly, std::move(endCallback)));
 }
 
 RenderPassEncoder::RenderPassEncoder(DeviceBase* device,
@@ -157,6 +160,10 @@
             return {};
         },
         "encoding %s.End().", this);
+
+    if (mEndCallback) {
+        mEndCallback();
+    }
 }
 
 void RenderPassEncoder::APIEndPass() {
diff --git a/src/dawn/native/RenderPassEncoder.h b/src/dawn/native/RenderPassEncoder.h
index 32199f1..6328852 100644
--- a/src/dawn/native/RenderPassEncoder.h
+++ b/src/dawn/native/RenderPassEncoder.h
@@ -36,7 +36,8 @@
                                          uint32_t renderTargetWidth,
                                          uint32_t renderTargetHeight,
                                          bool depthReadOnly,
-                                         bool stencilReadOnly);
+                                         bool stencilReadOnly,
+                                         std::function<void()> endCallback = nullptr);
     static Ref<RenderPassEncoder> MakeError(DeviceBase* device,
                                             CommandEncoder* commandEncoder,
                                             EncodingContext* encodingContext);
@@ -72,7 +73,8 @@
                       uint32_t renderTargetWidth,
                       uint32_t renderTargetHeight,
                       bool depthReadOnly,
-                      bool stencilReadOnly);
+                      bool stencilReadOnly,
+                      std::function<void()> endCallback = nullptr);
     RenderPassEncoder(DeviceBase* device,
                       CommandEncoder* commandEncoder,
                       EncodingContext* encodingContext,
@@ -97,6 +99,8 @@
 
     // This is the hardcoded value in the WebGPU spec.
     uint64_t mMaxDrawCount = 50000000;
+
+    std::function<void()> mEndCallback;
 };
 
 }  // namespace dawn::native
diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp
index 5c525da..c462009 100644
--- a/src/dawn/native/Texture.cpp
+++ b/src/dawn/native/Texture.cpp
@@ -553,6 +553,16 @@
         mInternalUsage |= internalUsageDesc->internalUsage;
     }
     GetObjectTrackingList()->Track(this);
+
+    // dawn:1569: If a texture with multiple array layers or mip levels is specified as a texture
+    // attachment when this toggle is active, it needs to be given CopyDst usage internally.
+    bool applyAlwaysResolveIntoZeroLevelAndLayerToggle =
+        device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer) &&
+        (GetArrayLayers() > 1 || GetNumMipLevels() > 1) &&
+        (GetInternalUsage() & wgpu::TextureUsage::RenderAttachment);
+    if (applyAlwaysResolveIntoZeroLevelAndLayerToggle) {
+        AddInternalUsage(wgpu::TextureUsage::CopyDst);
+    }
 }
 
 TextureBase::~TextureBase() = default;
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index 1095dec..7371ee1 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -49,7 +49,8 @@
       "a texture, we first resolve into a temporarily 2D texture with only one mipmap level and "
       "one array layer, and copy the result of MSAA resolve into the true resolve target. This "
       "workaround is enabled by default on the Metal drivers that have bugs when setting non-zero "
-      "resolveLevel or resolveSlice.",
+      "resolveLevel or resolveSlice. It is also enabled by default on Qualcomm Vulkan drivers, "
+      "which have similar bugs.",
       "https://crbug.com/dawn/56"}},
     {Toggle::LazyClearResourceOnFirstUse,
      {"lazy_clear_resource_on_first_use",
diff --git a/src/dawn/native/metal/UtilsMetal.mm b/src/dawn/native/metal/UtilsMetal.mm
index 9a091b8..daaa390 100644
--- a/src/dawn/native/metal/UtilsMetal.mm
+++ b/src/dawn/native/metal/UtilsMetal.mm
@@ -111,23 +111,6 @@
     return result;
 }
 
-// Same as PatchAttachmentWithTemporary but for the resolve attachment.
-ResultOrError<SavedMetalAttachment> PatchResolveAttachmentWithTemporary(
-    Device* device,
-    MTLRenderPassAttachmentDescriptor* attachment) {
-    SavedMetalAttachment result;
-    DAWN_TRY_ASSIGN(
-        result, SaveAttachmentCreateTemporary(device, attachment.resolveTexture,
-                                              attachment.resolveLevel, attachment.resolveSlice));
-
-    // Replace the resolve attachment with the tempoary.
-    attachment.resolveTexture = result.temporary.Get();
-    attachment.resolveLevel = 0;
-    attachment.resolveSlice = 0;
-
-    return result;
-}
-
 // Helper function for Toggle EmulateStoreAndMSAAResolve
 void ResolveInAnotherRenderPass(
     CommandRecordingContext* commandContext,
@@ -334,45 +317,6 @@
     // workarounds to happen at the same time, it handles workarounds one by one and calls
     // itself recursively to handle the next workaround if needed.
 
-    // Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. We must handle this before applying
-    // the store + MSAA resolve workaround, otherwise this toggle will never be handled because
-    // the resolve texture is removed when applying the store + MSAA resolve workaround.
-    if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
-        std::array<SavedMetalAttachment, kMaxColorAttachments> trueResolveAttachments = {};
-        bool workaroundUsed = false;
-        for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
-            if (mtlRenderPass.colorAttachments[i].resolveTexture == nullptr) {
-                continue;
-            }
-
-            if (mtlRenderPass.colorAttachments[i].resolveLevel == 0 &&
-                mtlRenderPass.colorAttachments[i].resolveSlice == 0) {
-                continue;
-            }
-
-            DAWN_TRY_ASSIGN(
-                trueResolveAttachments[i],
-                PatchResolveAttachmentWithTemporary(device, mtlRenderPass.colorAttachments[i]));
-            workaroundUsed = true;
-        }
-
-        // If we need to use a temporary resolve texture we need to copy the result of MSAA
-        // resolve back to the true resolve targets.
-        if (workaroundUsed) {
-            DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
-                                           std::move(encodeInside), renderPassCmd));
-
-            for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
-                if (trueResolveAttachments[i].texture == nullptr) {
-                    continue;
-                }
-
-                trueResolveAttachments[i].CopyFromTemporaryToAttachment(commandContext);
-            }
-            return {};
-        }
-    }
-
     // Handles the workaround for r8unorm rg8unorm mipmap rendering being broken on some
     // devices. Render to a temporary texture instead and then copy back to the attachment.
     if (device->IsToggleEnabled(Toggle::MetalRenderR8RG8UnormSmallMipToTempTexture)) {
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index 63c94a2..7a7b61e 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -639,12 +639,16 @@
     // By default try to use S8 if available.
     SetToggle(Toggle::VulkanUseS8, true);
 
-    // dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
-    // compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around that
-    // bug, split the command buffer any time we can detect that situation.
     if (ToBackend(GetAdapter())->IsAndroidQualcomm()) {
-        ForceSetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass,
-                       true);
+        // dawn:1564: Clearing a depth/stencil buffer in a render pass and then sampling it in a
+        // compute pass in the same command buffer causes a crash on Qualcomm GPUs. To work around
+        // that bug, split the command buffer any time we can detect that situation.
+        SetToggle(Toggle::VulkanSplitCommandBufferOnDepthStencilComputeSampleAfterRenderPass, true);
+
+        // dawn:1569: Qualcomm devices have a bug resolving into a non-zero level of an array
+        // texture. Work around it by resolving into a single level texture and then copying into
+        // the intended layer.
+        SetToggle(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true);
     }
 }
 
diff --git a/src/dawn/tests/end2end/MultisampledRenderingTests.cpp b/src/dawn/tests/end2end/MultisampledRenderingTests.cpp
index ea7a28d..fb6a7a3 100644
--- a/src/dawn/tests/end2end/MultisampledRenderingTests.cpp
+++ b/src/dawn/tests/end2end/MultisampledRenderingTests.cpp
@@ -521,9 +521,8 @@
     // TODO(dawn:462): Issue in the D3D12 validation layers.
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsBackendValidationEnabled());
 
-    // TODO(dawn:1549) Fails on Qualcomm-based Android devices.
     // TODO(dawn:1550) Fails on ARM-based Android devices.
-    DAWN_SUPPRESS_TEST_IF(IsAndroid());
+    DAWN_SUPPRESS_TEST_IF(IsAndroid() && IsARM());
 
     wgpu::TextureView multisampledColorView2 =
         CreateTextureForRenderAttachment(kColorFormat, kSampleCount).CreateView();
@@ -1138,6 +1137,7 @@
                       OpenGLBackend(),
                       OpenGLESBackend(),
                       VulkanBackend(),
+                      VulkanBackend({"always_resolve_into_zero_level_and_layer"}),
                       MetalBackend({"emulate_store_and_msaa_resolve"}),
                       MetalBackend({"always_resolve_into_zero_level_and_layer"}),
                       MetalBackend({"always_resolve_into_zero_level_and_layer",