Vulkan: use one barrier to transit states for all subresources

If a texture follow this rule:
* old usages of all subresources are the same
* new usages of all subresources are the same
Then we can use one barrier to transit states for all subresources.
We don't need to use one barrier per each subresource.

This patch adds a flag at front-end to capture many (but not all)
situations which follow the rule above, then we don't need to loop
over every subresource at Vulkan backend.

Bug: dawn:441
Change-Id: I6124ee2cb09c9142fefd8f057dc6d2659301e2d4
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/22702
Commit-Queue: Yunchao He <yunchao.he@intel.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/dawn_native/PassResourceUsage.h b/src/dawn_native/PassResourceUsage.h
index 470aad5..9271114 100644
--- a/src/dawn_native/PassResourceUsage.h
+++ b/src/dawn_native/PassResourceUsage.h
@@ -28,11 +28,25 @@
     enum class PassType { Render, Compute };
 
     // Describe the usage of the whole texture and its subresources.
-    // subresourceUsages vector is used to track every subresource's usage within a texture.
-    // usage variable is used the track the whole texture even though it can be deduced from
-    // subresources' usages. This is designed deliberately to track texture usage in a fast path.
+    // - subresourceUsages vector is used to track every subresource's usage within a texture.
+    //
+    // - usage variable is used the track the whole texture even though it can be deduced from
+    // subresources' usages. This is designed deliberately to track texture usage in a fast path
+    // at frontend.
+    //
+    // - sameUsagesAcrossSubresources is used for optimization at backend. If the texture view
+    // we are using covers all subresources, then the texture's usages of all subresources are
+    // the same. Otherwise the texture's usages of all subresources are thought as different,
+    // although we can deliberately design some particular cases in which we have a few texture
+    // views and all of them have the same usages and they cover all subresources of the texture
+    // altogether.
+
+    // TODO(yunchao.he@intel.com): if sameUsagesAcrossSubresources is true, we don't need
+    // the vector to record every single subresource's Usages. The texture usage is enough. And we
+    // can decompress texture usage to a vector if necessary.
     struct PassTextureUsage {
         wgpu::TextureUsage usage;
+        bool sameUsagesAcrossSubresources;
         std::vector<wgpu::TextureUsage> subresourceUsages;
     };
 
diff --git a/src/dawn_native/PassResourceUsageTracker.cpp b/src/dawn_native/PassResourceUsageTracker.cpp
index dd1c022..f5e4a56 100644
--- a/src/dawn_native/PassResourceUsageTracker.cpp
+++ b/src/dawn_native/PassResourceUsageTracker.cpp
@@ -40,11 +40,12 @@
         // TODO (yunchao.he@intel.com): optimize this
         PassTextureUsage& textureUsage = mTextureUsages[texture];
 
-        // Set usage for the whole texture
+        // Set parameters for the whole texture
         textureUsage.usage |= usage;
+        uint32_t subresourceCount = texture->GetSubresourceCount();
+        textureUsage.sameUsagesAcrossSubresources = levelCount * layerCount == subresourceCount;
 
         // Set usages for subresources
-        uint32_t subresourceCount = texture->GetSubresourceCount();
         if (!textureUsage.subresourceUsages.size()) {
             textureUsage.subresourceUsages =
                 std::vector<wgpu::TextureUsage>(subresourceCount, wgpu::TextureUsage::None);
@@ -63,6 +64,7 @@
                                                    const PassTextureUsage& textureUsage) {
         PassTextureUsage& passTextureUsage = mTextureUsages[texture];
         passTextureUsage.usage |= textureUsage.usage;
+        passTextureUsage.sameUsagesAcrossSubresources &= textureUsage.sameUsagesAcrossSubresources;
 
         uint32_t subresourceCount = texture->GetSubresourceCount();
         ASSERT(textureUsage.subresourceUsages.size() == subresourceCount);
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index 6f55a01..ccd04ea 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -399,8 +399,7 @@
                                                                  texture->GetNumMipLevels(), 0,
                                                                  texture->GetArrayLayers());
                 }
-                texture->TransitionUsageForPass(recordingContext,
-                                                usages.textureUsages[i].subresourceUsages,
+                texture->TransitionUsageForPass(recordingContext, usages.textureUsages[i],
                                                 &imageBarriers, &srcStages, &dstStages);
             }
 
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index 15d7137..af3b604 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -229,8 +229,10 @@
                                                 const VkImage& image,
                                                 wgpu::TextureUsage lastUsage,
                                                 wgpu::TextureUsage usage,
-                                                uint32_t mipLevel,
-                                                uint32_t arrayLayer) {
+                                                uint32_t baseMipLevel,
+                                                uint32_t levelCount,
+                                                uint32_t baseArrayLayer,
+                                                uint32_t layerCount) {
             VkImageMemoryBarrier barrier;
             barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
             barrier.pNext = nullptr;
@@ -240,10 +242,10 @@
             barrier.newLayout = VulkanImageLayout(usage, format);
             barrier.image = image;
             barrier.subresourceRange.aspectMask = VulkanAspectMask(format);
-            barrier.subresourceRange.baseMipLevel = mipLevel;
-            barrier.subresourceRange.levelCount = 1;
-            barrier.subresourceRange.baseArrayLayer = arrayLayer;
-            barrier.subresourceRange.layerCount = 1;
+            barrier.subresourceRange.baseMipLevel = baseMipLevel;
+            barrier.subresourceRange.levelCount = levelCount;
+            barrier.subresourceRange.baseArrayLayer = baseArrayLayer;
+            barrier.subresourceRange.layerCount = layerCount;
 
             barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
             barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@@ -683,7 +685,7 @@
             if (barriers->size() == transitionBarrierStart) {
                 barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
                                                        wgpu::TextureUsage::None,
-                                                       wgpu::TextureUsage::None, 0, 0));
+                                                       wgpu::TextureUsage::None, 0, 1, 0, 1));
             }
 
             // Transfer texture from external queue to graphics queue
@@ -697,7 +699,7 @@
             if (barriers->size() == transitionBarrierStart) {
                 barriers->push_back(BuildMemoryBarrier(GetFormat(), mHandle,
                                                        wgpu::TextureUsage::None,
-                                                       wgpu::TextureUsage::None, 0, 0));
+                                                       wgpu::TextureUsage::None, 0, 1, 0, 1));
             }
 
             // Transfer texture from graphics queue to external queue
@@ -715,13 +717,22 @@
         mWaitRequirements.clear();
     }
 
+    bool Texture::CanReuseWithoutBarrier(wgpu::TextureUsage lastUsage, wgpu::TextureUsage usage) {
+        // Reuse the texture directly and avoid encoding barriers when it isn't needed.
+        bool lastReadOnly = (lastUsage & kReadOnlyTextureUsages) == lastUsage;
+        if (lastReadOnly && lastUsage == usage && mLastExternalState == mExternalState) {
+            return true;
+        }
+        return false;
+    }
+
     void Texture::TransitionFullUsage(CommandRecordingContext* recordingContext,
                                       wgpu::TextureUsage usage) {
         TransitionUsageNow(recordingContext, usage, 0, GetNumMipLevels(), 0, GetArrayLayers());
     }
 
     void Texture::TransitionUsageForPass(CommandRecordingContext* recordingContext,
-                                         const std::vector<wgpu::TextureUsage>& subresourceUsages,
+                                         const PassTextureUsage& textureUsages,
                                          std::vector<VkImageMemoryBarrier>* imageBarriers,
                                          VkPipelineStageFlags* srcStages,
                                          VkPipelineStageFlags* dstStages) {
@@ -731,32 +742,49 @@
         wgpu::TextureUsage allUsages = wgpu::TextureUsage::None;
         wgpu::TextureUsage allLastUsages = wgpu::TextureUsage::None;
 
-        ASSERT(subresourceUsages.size() == GetSubresourceCount());
+        uint32_t subresourceCount = GetSubresourceCount();
+        ASSERT(textureUsages.subresourceUsages.size() == subresourceCount);
         // This transitions assume it is a 2D texture
         ASSERT(GetDimension() == wgpu::TextureDimension::e2D);
 
-        for (uint32_t arrayLayer = 0; arrayLayer < GetArrayLayers(); ++arrayLayer) {
-            for (uint32_t mipLevel = 0; mipLevel < GetNumMipLevels(); ++mipLevel) {
-                uint32_t index = GetSubresourceIndex(mipLevel, arrayLayer);
+        // If new usages of all subresources are the same and old usages of all subresources are
+        // the same too, we can use one barrier to do state transition for all subresources.
+        // Note that if the texture has only one mip level and one array slice, it will fall into
+        // this category.
+        if (textureUsages.sameUsagesAcrossSubresources && mSameLastUsagesAcrossSubresources) {
+            if (CanReuseWithoutBarrier(mSubresourceLastUsages[0], textureUsages.usage)) {
+                return;
+            }
 
-                // Avoid encoding barriers when it isn't needed.
-                if (subresourceUsages[index] == wgpu::TextureUsage::None) {
-                    continue;
+            imageBarriers->push_back(BuildMemoryBarrier(format, mHandle, mSubresourceLastUsages[0],
+                                                        textureUsages.usage, 0, GetNumMipLevels(),
+                                                        0, GetArrayLayers()));
+            allLastUsages = mSubresourceLastUsages[0];
+            allUsages = textureUsages.usage;
+            for (uint32_t i = 0; i < subresourceCount; ++i) {
+                mSubresourceLastUsages[i] = textureUsages.usage;
+            }
+        } else {
+            for (uint32_t arrayLayer = 0; arrayLayer < GetArrayLayers(); ++arrayLayer) {
+                for (uint32_t mipLevel = 0; mipLevel < GetNumMipLevels(); ++mipLevel) {
+                    uint32_t index = GetSubresourceIndex(mipLevel, arrayLayer);
+
+                    // Avoid encoding barriers when it isn't needed.
+                    if (textureUsages.subresourceUsages[index] == wgpu::TextureUsage::None) {
+                        continue;
+                    }
+
+                    if (CanReuseWithoutBarrier(mSubresourceLastUsages[index],
+                                               textureUsages.subresourceUsages[index])) {
+                        continue;
+                    }
+                    imageBarriers->push_back(BuildMemoryBarrier(
+                        format, mHandle, mSubresourceLastUsages[index],
+                        textureUsages.subresourceUsages[index], mipLevel, 1, arrayLayer, 1));
+                    allLastUsages |= mSubresourceLastUsages[index];
+                    allUsages |= textureUsages.subresourceUsages[index];
+                    mSubresourceLastUsages[index] = textureUsages.subresourceUsages[index];
                 }
-                bool lastReadOnly = (mLastSubresourceUsages[index] & kReadOnlyTextureUsages) ==
-                                    mLastSubresourceUsages[index];
-                if (lastReadOnly && mLastSubresourceUsages[index] == subresourceUsages[index] &&
-                    mLastExternalState == mExternalState) {
-                    continue;
-                }
-
-                imageBarriers->push_back(
-                    BuildMemoryBarrier(format, mHandle, mLastSubresourceUsages[index],
-                                       subresourceUsages[index], mipLevel, arrayLayer));
-
-                allUsages |= subresourceUsages[index];
-                allLastUsages |= mLastSubresourceUsages[index];
-                mLastSubresourceUsages[index] = subresourceUsages[index];
             }
         }
 
@@ -767,6 +795,7 @@
 
         *srcStages |= VulkanPipelineStage(allLastUsages, format);
         *dstStages |= VulkanPipelineStage(allUsages, format);
+        mSameLastUsagesAcrossSubresources = textureUsages.sameUsagesAcrossSubresources;
     }
 
     void Texture::TransitionUsageNow(CommandRecordingContext* recordingContext,
@@ -783,23 +812,18 @@
         // This transitions assume it is a 2D texture
         ASSERT(GetDimension() == wgpu::TextureDimension::e2D);
 
-        for (uint32_t arrayLayer = 0; arrayLayer < layerCount; ++arrayLayer) {
-            for (uint32_t mipLevel = 0; mipLevel < levelCount; ++mipLevel) {
-                uint32_t index =
-                    GetSubresourceIndex(baseMipLevel + mipLevel, baseArrayLayer + arrayLayer);
-                wgpu::TextureUsage lastUsage = mLastSubresourceUsages[index];
+        for (uint32_t layer = baseArrayLayer; layer < baseArrayLayer + layerCount; ++layer) {
+            for (uint32_t level = baseMipLevel; level < baseMipLevel + levelCount; ++level) {
+                uint32_t index = GetSubresourceIndex(level, layer);
 
-                // Avoid encoding barriers when it isn't needed.
-                bool lastReadOnly = (lastUsage & kReadOnlyTextureUsages) == lastUsage;
-                if (lastReadOnly && lastUsage == usage && mLastExternalState == mExternalState) {
-                    return;
+                if (CanReuseWithoutBarrier(mSubresourceLastUsages[index], usage)) {
+                    continue;
                 }
 
-                barriers.push_back(BuildMemoryBarrier(format, mHandle, lastUsage, usage,
-                                                      baseMipLevel + mipLevel,
-                                                      baseArrayLayer + arrayLayer));
-                allLastUsages |= lastUsage;
-                mLastSubresourceUsages[index] = usage;
+                barriers.push_back(BuildMemoryBarrier(
+                    format, mHandle, mSubresourceLastUsages[index], usage, level, 1, layer, 1));
+                allLastUsages |= mSubresourceLastUsages[index];
+                mSubresourceLastUsages[index] = usage;
             }
         }
 
@@ -812,6 +836,10 @@
         ToBackend(GetDevice())
             ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
                                     nullptr, 0, nullptr, barriers.size(), barriers.data());
+
+        // TODO(yunchao.he@intel.com): do the optimization to combine all barriers into a single one
+        // for a texture if possible.
+        mSameLastUsagesAcrossSubresources = levelCount * layerCount == GetSubresourceCount();
     }
 
     MaybeError Texture::ClearTexture(CommandRecordingContext* recordingContext,
diff --git a/src/dawn_native/vulkan/TextureVk.h b/src/dawn_native/vulkan/TextureVk.h
index a1df372..4bb9ab6 100644
--- a/src/dawn_native/vulkan/TextureVk.h
+++ b/src/dawn_native/vulkan/TextureVk.h
@@ -18,6 +18,7 @@
 #include "dawn_native/Texture.h"
 
 #include "common/vulkan_platform.h"
+#include "dawn_native/PassResourceUsage.h"
 #include "dawn_native/ResourceMemoryAllocation.h"
 #include "dawn_native/vulkan/ExternalHandle.h"
 #include "dawn_native/vulkan/external_memory/MemoryService.h"
@@ -73,7 +74,7 @@
                                 uint32_t baseArrayLayer,
                                 uint32_t layerCount);
         void TransitionUsageForPass(CommandRecordingContext* recordingContext,
-                                    const std::vector<wgpu::TextureUsage>& subresourceUsages,
+                                    const PassTextureUsage& textureUsages,
                                     std::vector<VkImageMemoryBarrier>* imageBarriers,
                                     VkPipelineStageFlags* srcStages,
                                     VkPipelineStageFlags* dstStages);
@@ -112,6 +113,7 @@
         void TweakTransitionForExternalUsage(CommandRecordingContext* recordingContext,
                                              std::vector<VkImageMemoryBarrier>* barriers,
                                              size_t transitionBarrierStart);
+        bool CanReuseWithoutBarrier(wgpu::TextureUsage lastUsage, wgpu::TextureUsage usage);
 
         VkImage mHandle = VK_NULL_HANDLE;
         ResourceMemoryAllocation mMemoryAllocation;
@@ -130,9 +132,11 @@
         VkSemaphore mSignalSemaphore = VK_NULL_HANDLE;
         std::vector<VkSemaphore> mWaitRequirements;
 
+        bool mSameLastUsagesAcrossSubresources = true;
+
         // A usage of none will make sure the texture is transitioned before its first use as
         // required by the Vulkan spec.
-        std::vector<wgpu::TextureUsage> mLastSubresourceUsages =
+        std::vector<wgpu::TextureUsage> mSubresourceLastUsages =
             std::vector<wgpu::TextureUsage>(GetSubresourceCount(), wgpu::TextureUsage::None);
     };