Vulkan: Combine all the barriers before dispatch() in one call

This patch combines all the resource barriers added before each
dispatch() into one call to reduce the number of
vkCmdPipelineBarrier() in the Vulkan command buffer.

BUG=dawn:522

Change-Id: I1b5943e62eb0a09db96de12196fcabb3448e9e4d
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/28283
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp
index f8e0acd..21f7db2 100644
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@@ -186,56 +186,52 @@
 
     void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
                                     wgpu::BufferUsage usage) {
-        std::vector<VkBufferMemoryBarrier> barriers;
+        VkBufferMemoryBarrier barrier;
         VkPipelineStageFlags srcStages = 0;
         VkPipelineStageFlags dstStages = 0;
 
-        TransitionUsageNow(recordingContext, usage, &barriers, &srcStages, &dstStages);
-
-        if (barriers.size() > 0) {
-            ASSERT(barriers.size() == 1);
+        if (TransitionUsageAndGetResourceBarrier(usage, &barrier, &srcStages, &dstStages)) {
+            ASSERT(srcStages != 0 && dstStages != 0);
             ToBackend(GetDevice())
                 ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
-                                        nullptr, barriers.size(), barriers.data(), 0, nullptr);
+                                        nullptr, 1u, &barrier, 0, nullptr);
         }
     }
 
-    void Buffer::TransitionUsageNow(CommandRecordingContext* recordingContext,
-                                    wgpu::BufferUsage usage,
-                                    std::vector<VkBufferMemoryBarrier>* bufferBarriers,
-                                    VkPipelineStageFlags* srcStages,
-                                    VkPipelineStageFlags* dstStages) {
+    bool Buffer::TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
+                                                      VkBufferMemoryBarrier* barrier,
+                                                      VkPipelineStageFlags* srcStages,
+                                                      VkPipelineStageFlags* dstStages) {
         bool lastIncludesTarget = (mLastUsage & usage) == usage;
         bool lastReadOnly = (mLastUsage & kReadOnlyBufferUsages) == mLastUsage;
 
         // We can skip transitions to already current read-only usages.
         if (lastIncludesTarget && lastReadOnly) {
-            return;
+            return false;
         }
 
         // Special-case for the initial transition: Vulkan doesn't allow access flags to be 0.
         if (mLastUsage == wgpu::BufferUsage::None) {
             mLastUsage = usage;
-            return;
+            return false;
         }
 
         *srcStages |= VulkanPipelineStage(mLastUsage);
         *dstStages |= VulkanPipelineStage(usage);
 
-        VkBufferMemoryBarrier barrier;
-        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
-        barrier.pNext = nullptr;
-        barrier.srcAccessMask = VulkanAccessFlags(mLastUsage);
-        barrier.dstAccessMask = VulkanAccessFlags(usage);
-        barrier.srcQueueFamilyIndex = 0;
-        barrier.dstQueueFamilyIndex = 0;
-        barrier.buffer = mHandle;
-        barrier.offset = 0;
-        barrier.size = GetSize();
-
-        bufferBarriers->push_back(barrier);
+        barrier->sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
+        barrier->pNext = nullptr;
+        barrier->srcAccessMask = VulkanAccessFlags(mLastUsage);
+        barrier->dstAccessMask = VulkanAccessFlags(usage);
+        barrier->srcQueueFamilyIndex = 0;
+        barrier->dstQueueFamilyIndex = 0;
+        barrier->buffer = mHandle;
+        barrier->offset = 0;
+        barrier->size = GetSize();
 
         mLastUsage = usage;
+
+        return true;
     }
 
     bool Buffer::IsCPUWritableAtCreation() const {
diff --git a/src/dawn_native/vulkan/BufferVk.h b/src/dawn_native/vulkan/BufferVk.h
index 45922ec..b55cf9b 100644
--- a/src/dawn_native/vulkan/BufferVk.h
+++ b/src/dawn_native/vulkan/BufferVk.h
@@ -37,11 +37,10 @@
         // `commands`.
         // TODO(cwallez@chromium.org): coalesce barriers and do them early when possible.
         void TransitionUsageNow(CommandRecordingContext* recordingContext, wgpu::BufferUsage usage);
-        void TransitionUsageNow(CommandRecordingContext* recordingContext,
-                                wgpu::BufferUsage usage,
-                                std::vector<VkBufferMemoryBarrier>* bufferBarriers,
-                                VkPipelineStageFlags* srcStages,
-                                VkPipelineStageFlags* dstStages);
+        bool TransitionUsageAndGetResourceBarrier(wgpu::BufferUsage usage,
+                                                  VkBufferMemoryBarrier* barrier,
+                                                  VkPipelineStageFlags* srcStages,
+                                                  VkPipelineStageFlags* dstStages);
 
         void EnsureDataInitialized(CommandRecordingContext* recordingContext);
         void EnsureDataInitializedAsDestination(CommandRecordingContext* recordingContext,
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index aadf46d..240614c 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -146,43 +146,59 @@
                                     mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
                                     mDynamicOffsetCounts, mDynamicOffsets);
 
-                // TODO(jiawei.shao@intel.com): combine the following barriers in one
-                // vkCmdPipelineBarrier() call.
+                std::vector<VkBufferMemoryBarrier> bufferBarriers;
+                std::vector<VkImageMemoryBarrier> imageBarriers;
+                VkPipelineStageFlags srcStages = 0;
+                VkPipelineStageFlags dstStages = 0;
+
                 for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
                     BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
                     for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
                         switch (layout->GetBindingInfo(binding).type) {
                             case wgpu::BindingType::StorageBuffer:
-                            case wgpu::BindingType::ReadonlyStorageBuffer:
-                                ToBackend(
-                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
-                                    ->TransitionUsageNow(recordingContext,
-                                                         wgpu::BufferUsage::Storage);
+                            case wgpu::BindingType::ReadonlyStorageBuffer: {
+                                VkBufferMemoryBarrier bufferBarrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TransitionUsageAndGetResourceBarrier(
+                                            wgpu::BufferUsage::Storage, &bufferBarrier, &srcStages,
+                                            &dstStages)) {
+                                    bufferBarriers.push_back(bufferBarrier);
+                                }
                                 break;
+                            }
 
                             case wgpu::BindingType::ReadonlyStorageTexture:
                             case wgpu::BindingType::WriteonlyStorageTexture: {
                                 TextureViewBase* view =
                                     mBindGroups[index]->GetBindingAsTextureView(binding);
                                 ToBackend(view->GetTexture())
-                                    ->TransitionUsageNow(recordingContext,
-                                                         wgpu::TextureUsage::Storage,
-                                                         view->GetSubresourceRange());
+                                    ->TransitionUsageAndGetResourceBarrier(
+                                        wgpu::TextureUsage::Storage, view->GetSubresourceRange(),
+                                        &imageBarriers, &srcStages, &dstStages);
                                 break;
                             }
-                            case wgpu::BindingType::UniformBuffer:
-                                ToBackend(
-                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
-                                    ->TransitionUsageNow(recordingContext,
-                                                         wgpu::BufferUsage::Uniform);
+                            case wgpu::BindingType::UniformBuffer: {
+                                VkBufferMemoryBarrier bufferBarrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TransitionUsageAndGetResourceBarrier(
+                                            wgpu::BufferUsage::Uniform, &bufferBarrier, &srcStages,
+                                            &dstStages)) {
+                                    bufferBarriers.push_back(bufferBarrier);
+                                }
                                 break;
+                            }
+
                             case wgpu::BindingType::SampledTexture: {
                                 TextureViewBase* view =
                                     mBindGroups[index]->GetBindingAsTextureView(binding);
                                 ToBackend(view->GetTexture())
-                                    ->TransitionUsageNow(recordingContext,
-                                                         wgpu::TextureUsage::Sampled,
-                                                         view->GetSubresourceRange());
+                                    ->TransitionUsageAndGetResourceBarrier(
+                                        wgpu::TextureUsage::Sampled, view->GetSubresourceRange(),
+                                        &imageBarriers, &srcStages, &dstStages);
                                 break;
                             }
 
@@ -200,6 +216,15 @@
                         }
                     }
                 }
+
+                if (!bufferBarriers.empty() || !imageBarriers.empty()) {
+                    ASSERT(srcStages != 0 && dstStages != 0);
+                    device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages,
+                                                  dstStages, 0, 0, nullptr, bufferBarriers.size(),
+                                                  bufferBarriers.data(), imageBarriers.size(),
+                                                  imageBarriers.data());
+                }
+
                 DidApply();
             }
         };
@@ -459,8 +484,12 @@
             for (size_t i = 0; i < usages.buffers.size(); ++i) {
                 Buffer* buffer = ToBackend(usages.buffers[i]);
                 buffer->EnsureDataInitialized(recordingContext);
-                buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i],
-                                           &bufferBarriers, &srcStages, &dstStages);
+
+                VkBufferMemoryBarrier bufferBarrier;
+                if (buffer->TransitionUsageAndGetResourceBarrier(
+                        usages.bufferUsages[i], &bufferBarrier, &srcStages, &dstStages)) {
+                    bufferBarriers.push_back(bufferBarrier);
+                }
             }
 
             for (size_t i = 0; i < usages.textures.size(); ++i) {
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index d234b23..f7e6dfb 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -818,6 +818,32 @@
                                      wgpu::TextureUsage usage,
                                      const SubresourceRange& range) {
         std::vector<VkImageMemoryBarrier> barriers;
+
+        VkPipelineStageFlags srcStages = 0;
+        VkPipelineStageFlags dstStages = 0;
+
+        TransitionUsageAndGetResourceBarrier(usage, range, &barriers, &srcStages, &dstStages);
+
+        if (mExternalState != ExternalState::InternalOnly) {
+            TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
+        }
+
+        if (!barriers.empty()) {
+            ASSERT(srcStages != 0 && dstStages != 0);
+            ToBackend(GetDevice())
+                ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
+                                        nullptr, 0, nullptr, barriers.size(), barriers.data());
+        }
+    }
+
+    void Texture::TransitionUsageAndGetResourceBarrier(
+        wgpu::TextureUsage usage,
+        const SubresourceRange& range,
+        std::vector<VkImageMemoryBarrier>* imageBarriers,
+        VkPipelineStageFlags* srcStages,
+        VkPipelineStageFlags* dstStages) {
+        ASSERT(imageBarriers != nullptr);
+
         const Format& format = GetFormat();
 
         wgpu::TextureUsage allLastUsages = wgpu::TextureUsage::None;
@@ -837,7 +863,7 @@
             if (CanReuseWithoutBarrier(mSubresourceLastUsages[0], usage)) {
                 return;
             }
-            barriers.push_back(
+            imageBarriers->push_back(
                 BuildMemoryBarrier(format, mHandle, mSubresourceLastUsages[0], usage, range));
             allLastUsages = mSubresourceLastUsages[0];
             for (uint32_t i = 0; i < GetSubresourceCount(); ++i) {
@@ -868,22 +894,15 @@
                         mSubresourceLastUsages[index] = usage;
                     }
 
-                    barriers.push_back(BuildMemoryBarrier(
+                    imageBarriers->push_back(BuildMemoryBarrier(
                         format, mHandle, lastUsage, usage,
                         SubresourceRange::SingleMipAndLayer(level, layer, format.aspects)));
                 }
             }
         }
 
-        if (mExternalState != ExternalState::InternalOnly) {
-            TweakTransitionForExternalUsage(recordingContext, &barriers, 0);
-        }
-
-        VkPipelineStageFlags srcStages = VulkanPipelineStage(allLastUsages, format);
-        VkPipelineStageFlags dstStages = VulkanPipelineStage(usage, format);
-        ToBackend(GetDevice())
-            ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
-                                    nullptr, 0, nullptr, barriers.size(), barriers.data());
+        *srcStages |= VulkanPipelineStage(allLastUsages, format);
+        *dstStages |= VulkanPipelineStage(usage, format);
 
         mSameLastUsagesAcrossSubresources = areAllSubresourcesCovered;
     }
diff --git a/src/dawn_native/vulkan/TextureVk.h b/src/dawn_native/vulkan/TextureVk.h
index 6748ebd..6cf52f5 100644
--- a/src/dawn_native/vulkan/TextureVk.h
+++ b/src/dawn_native/vulkan/TextureVk.h
@@ -70,6 +70,11 @@
         void TransitionUsageNow(CommandRecordingContext* recordingContext,
                                 wgpu::TextureUsage usage,
                                 const SubresourceRange& range);
+        void TransitionUsageAndGetResourceBarrier(wgpu::TextureUsage usage,
+                                                  const SubresourceRange& range,
+                                                  std::vector<VkImageMemoryBarrier>* imageBarriers,
+                                                  VkPipelineStageFlags* srcStages,
+                                                  VkPipelineStageFlags* dstStages);
         void TransitionUsageForPass(CommandRecordingContext* recordingContext,
                                     const PassTextureUsage& textureUsages,
                                     std::vector<VkImageMemoryBarrier>* imageBarriers,