D3D12: use one barrier to transit states for all subresources

If a texture's old states of all subresources are the same, and its
new states are the same too, then we can use one barrier to transit
states for all subresources. We don't need to use one barrier per
each subresource.

This change can reduce barriers we dispatched, in order to improve
performance for particular situations.

Bug: dawn:441

Change-Id: I9fe9dabda725e05d4ce5a8e69ee7b40e6724a22a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23145
Commit-Queue: Yunchao He <yunchao.he@intel.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index 4771cde..f813b56 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -506,8 +506,8 @@
 
             for (size_t i = 0; i < usages.textures.size(); ++i) {
                 ToBackend(usages.textures[i])
-                    ->TrackUsageAndGetResourceBarrierForPass(
-                        commandContext, &barriers, usages.textureUsages[i].subresourceUsages);
+                    ->TrackUsageAndGetResourceBarrierForPass(commandContext, &barriers,
+                                                             usages.textureUsages[i]);
                 textureUsages |= usages.textureUsages[i].usage;
             }
 
diff --git a/src/dawn_native/d3d12/TextureD3D12.cpp b/src/dawn_native/d3d12/TextureD3D12.cpp
index f9bd295..5a6b600 100644
--- a/src/dawn_native/d3d12/TextureD3D12.cpp
+++ b/src/dawn_native/d3d12/TextureD3D12.cpp
@@ -582,12 +582,14 @@
         }
     }
 
-    void Texture::TransitionSingleSubresource(std::vector<D3D12_RESOURCE_BARRIER>* barriers,
-                                              D3D12_RESOURCE_STATES newState,
-                                              uint32_t index,
-                                              const Serial pendingCommandSerial) {
+    void Texture::TransitionSingleOrAllSubresources(std::vector<D3D12_RESOURCE_BARRIER>* barriers,
+                                                    uint32_t index,
+                                                    D3D12_RESOURCE_STATES newState,
+                                                    const Serial pendingCommandSerial,
+                                                    bool allSubresources) {
         StateAndDecay* state = &mSubresourceStateAndDecay[index];
-        // Avoid transitioning the texture when it isn't needed.
+        // Reuse the subresource(s) directly and avoid transition when it isn't needed, and
+        // return false.
         // TODO(cwallez@chromium.org): Need some form of UAV barriers at some point.
         if (state->lastState == newState) {
             return;
@@ -647,7 +649,8 @@
         barrier.Transition.pResource = GetD3D12Resource();
         barrier.Transition.StateBefore = lastState;
         barrier.Transition.StateAfter = newState;
-        barrier.Transition.Subresource = index;
+        barrier.Transition.Subresource =
+            allSubresources ? D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES : index;
         barriers->push_back(barrier);
         // TODO(yunchao.he@intel.com): support subresource for depth/stencil. Depth stencil
         // texture has different plane slices. While the current implementation only has differernt
@@ -656,7 +659,8 @@
         // correctly. We force the transition to be the same for all planes to match what the
         // frontend validation checks for. This hack might be incorrect for stencil-only texture
         // because we always set transition barrier for depth plane.
-        if (newState == D3D12_RESOURCE_STATE_DEPTH_WRITE && GetFormat().HasStencil()) {
+        if (!allSubresources && newState == D3D12_RESOURCE_STATE_DEPTH_WRITE &&
+            GetFormat().HasStencil()) {
             D3D12_RESOURCE_BARRIER barrierStencil = barrier;
             barrierStencil.Transition.Subresource += GetArrayLayers() * GetNumMipLevels();
             barriers->push_back(barrierStencil);
@@ -682,38 +686,85 @@
         HandleTransitionSpecialCases(commandContext);
 
         const Serial pendingCommandSerial = ToBackend(GetDevice())->GetPendingCommandSerial();
+        uint32_t subresourceCount = GetSubresourceCount();
+
+        // This transitions assume it is a 2D texture
+        ASSERT(GetDimension() == wgpu::TextureDimension::e2D);
+
+        // If the usages transitions can cover all subresources, and old usages of all subresources
+        // are the same, then we can use one barrier to do state transition for all subresources.
+        // Note that if the texture has only one mip level and one array slice, it will fall into
+        // this category.
+        bool areAllSubresourcesCovered = range.levelCount * range.layerCount == subresourceCount;
+        if (mSameLastUsagesAcrossSubresources && areAllSubresourcesCovered) {
+            TransitionSingleOrAllSubresources(barriers, 0, newState, pendingCommandSerial, true);
+
+            // TODO(yunchao.he@intel.com): compress and decompress if all subresources have the
+            // same states. We may need to retain mSubresourceStateAndDecay[0] only.
+            for (uint32_t i = 1; i < subresourceCount; ++i) {
+                mSubresourceStateAndDecay[i] = mSubresourceStateAndDecay[0];
+            }
+
+            return;
+        }
         for (uint32_t arrayLayer = 0; arrayLayer < range.layerCount; ++arrayLayer) {
             for (uint32_t mipLevel = 0; mipLevel < range.levelCount; ++mipLevel) {
                 uint32_t index = GetSubresourceIndex(range.baseMipLevel + mipLevel,
                                                      range.baseArrayLayer + arrayLayer);
 
-                TransitionSingleSubresource(barriers, newState, index, pendingCommandSerial);
+                TransitionSingleOrAllSubresources(barriers, index, newState, pendingCommandSerial,
+                                                  false);
             }
         }
+        mSameLastUsagesAcrossSubresources = areAllSubresourcesCovered;
     }
 
     void Texture::TrackUsageAndGetResourceBarrierForPass(
         CommandRecordingContext* commandContext,
         std::vector<D3D12_RESOURCE_BARRIER>* barriers,
-        const std::vector<wgpu::TextureUsage>& subresourceUsages) {
+        const PassTextureUsage& textureUsages) {
         HandleTransitionSpecialCases(commandContext);
 
         const Serial pendingCommandSerial = ToBackend(GetDevice())->GetPendingCommandSerial();
+        uint32_t subresourceCount = GetSubresourceCount();
+        ASSERT(textureUsages.subresourceUsages.size() == subresourceCount);
+        // This transitions assume it is a 2D texture
+        ASSERT(GetDimension() == wgpu::TextureDimension::e2D);
+
+        // If new usages of all subresources are the same and old usages of all subresources are
+        // the same too, we can use one barrier to do state transition for all subresources.
+        // Note that if the texture has only one mip level and one array slice, it will fall into
+        // this category.
+        if (textureUsages.sameUsagesAcrossSubresources && mSameLastUsagesAcrossSubresources) {
+            D3D12_RESOURCE_STATES newState = D3D12TextureUsage(textureUsages.usage, GetFormat());
+            TransitionSingleOrAllSubresources(barriers, 0, newState, pendingCommandSerial, true);
+
+            // TODO(yunchao.he@intel.com): compress and decompress if all subresources have the
+            // same states. We may need to retain mSubresourceStateAndDecay[0] only.
+            for (uint32_t i = 1; i < subresourceCount; ++i) {
+                mSubresourceStateAndDecay[i] = mSubresourceStateAndDecay[0];
+            }
+
+            return;
+        }
+
         for (uint32_t arrayLayer = 0; arrayLayer < GetArrayLayers(); ++arrayLayer) {
             for (uint32_t mipLevel = 0; mipLevel < GetNumMipLevels(); ++mipLevel) {
                 uint32_t index = GetSubresourceIndex(mipLevel, arrayLayer);
 
                 // Skip if this subresource is not used during the current pass
-                if (subresourceUsages[index] == wgpu::TextureUsage::None) {
+                if (textureUsages.subresourceUsages[index] == wgpu::TextureUsage::None) {
                     continue;
                 }
 
                 D3D12_RESOURCE_STATES newState =
-                    D3D12TextureUsage(subresourceUsages[index], GetFormat());
+                    D3D12TextureUsage(textureUsages.subresourceUsages[index], GetFormat());
 
-                TransitionSingleSubresource(barriers, newState, index, pendingCommandSerial);
+                TransitionSingleOrAllSubresources(barriers, index, newState, pendingCommandSerial,
+                                                  false);
             }
         }
+        mSameLastUsagesAcrossSubresources = textureUsages.sameUsagesAcrossSubresources;
     }
 
     D3D12_RENDER_TARGET_VIEW_DESC Texture::GetRTVDescriptor(uint32_t mipLevel,
diff --git a/src/dawn_native/d3d12/TextureD3D12.h b/src/dawn_native/d3d12/TextureD3D12.h
index e152836..ef17bf4 100644
--- a/src/dawn_native/d3d12/TextureD3D12.h
+++ b/src/dawn_native/d3d12/TextureD3D12.h
@@ -19,6 +19,7 @@
 #include "dawn_native/Texture.h"
 
 #include "dawn_native/DawnNative.h"
+#include "dawn_native/PassResourceUsage.h"
 #include "dawn_native/d3d12/ResourceHeapAllocationD3D12.h"
 #include "dawn_native/d3d12/d3d12_platform.h"
 
@@ -57,10 +58,9 @@
         void EnsureSubresourceContentInitialized(CommandRecordingContext* commandContext,
                                                  const SubresourceRange& range);
 
-        void TrackUsageAndGetResourceBarrierForPass(
-            CommandRecordingContext* commandContext,
-            std::vector<D3D12_RESOURCE_BARRIER>* barrier,
-            const std::vector<wgpu::TextureUsage>& subresourceUsages);
+        void TrackUsageAndGetResourceBarrierForPass(CommandRecordingContext* commandContext,
+                                                    std::vector<D3D12_RESOURCE_BARRIER>* barrier,
+                                                    const PassTextureUsage& textureUsages);
         void TrackUsageAndTransitionNow(CommandRecordingContext* commandContext,
                                         wgpu::TextureUsage usage,
                                         const SubresourceRange& range);
@@ -94,12 +94,15 @@
                                                   D3D12_RESOURCE_STATES newState,
                                                   const SubresourceRange& range);
 
-        void TransitionSingleSubresource(std::vector<D3D12_RESOURCE_BARRIER>* barriers,
-                                         D3D12_RESOURCE_STATES subresourceNewState,
-                                         uint32_t index,
-                                         const Serial pendingCommandSerial);
+        void TransitionSingleOrAllSubresources(std::vector<D3D12_RESOURCE_BARRIER>* barriers,
+                                               uint32_t index,
+                                               D3D12_RESOURCE_STATES subresourceNewState,
+                                               const Serial pendingCommandSerial,
+                                               bool allSubresources);
         void HandleTransitionSpecialCases(CommandRecordingContext* commandContext);
 
+        bool mSameLastUsagesAcrossSubresources = true;
+
         struct StateAndDecay {
             D3D12_RESOURCE_STATES lastState;
             Serial lastDecaySerial;
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index 110d618..eb502f8 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -810,8 +810,8 @@
         // are the same, then we can use one barrier to do state transition for all subresources.
         // Note that if the texture has only one mip level and one array slice, it will fall into
         // this category.
-        bool isAllSubresourcesCovered = range.levelCount * range.layerCount == subresourceCount;
-        if (mSameLastUsagesAcrossSubresources && isAllSubresourcesCovered) {
+        bool areAllSubresourcesCovered = range.levelCount * range.layerCount == subresourceCount;
+        if (mSameLastUsagesAcrossSubresources && areAllSubresourcesCovered) {
             ASSERT(range.baseMipLevel == 0 && range.baseArrayLayer == 0);
             if (CanReuseWithoutBarrier(mSubresourceLastUsages[0], usage)) {
                 return;
@@ -852,7 +852,7 @@
             ->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages, 0, 0,
                                     nullptr, 0, nullptr, barriers.size(), barriers.data());
 
-        mSameLastUsagesAcrossSubresources = isAllSubresourcesCovered;
+        mSameLastUsagesAcrossSubresources = areAllSubresourcesCovered;
     }
 
     MaybeError Texture::ClearTexture(CommandRecordingContext* recordingContext,
diff --git a/src/fuzzers/BUILD.gn b/src/fuzzers/BUILD.gn
index 6d3cc01..f6400b4 100644
--- a/src/fuzzers/BUILD.gn
+++ b/src/fuzzers/BUILD.gn
@@ -175,6 +175,6 @@
   ]
 
   if (is_win) {
-    deps += [":dawn_wire_server_and_d3d12_backend_fuzzer"]
+    deps += [ ":dawn_wire_server_and_d3d12_backend_fuzzer" ]
   }
 }