Transition bind group resource states before dispatch in compute pass

This patch fixes a crash issue in both D3D12 and Vulkan backends.
Previously on D3D12 and Vulkan before a compute pass we transitioned
the states of all the resources used in the pass, and before each
dispatch call we only checked if the states of the storage buffers,
read-only storage textures and write-only storage textures need to
be transitioned. This behavior causes two issues:

1. In a compute pass a buffer or texture can be used as both read-only
and writable usages in different dispatch calls (e.g. as storage
buffer in the first dispatch, and as the uniform buffer in the next
dispatch), while this is invalid state combination on D3D12 and isn't
allowed by D3D12 validation layer.
2. In the above case, the state of the buffer is not transitioned into
UNIFORM, which does not match the required state in the next dispatch.

This patch fixes this issue by transitioning all the states in the
current bind group before each dispatch() instead of the beginning
of the compute pass.

BUG=dawn:522
TEST=dawn_end2end_tests
Change-Id: Ibeb6c41dc493ee1068b43bd89ed5a15f2331ef75
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27942
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/BUILD.gn b/src/dawn_native/BUILD.gn
index 6f9dbd5..6b70082 100644
--- a/src/dawn_native/BUILD.gn
+++ b/src/dawn_native/BUILD.gn
@@ -153,7 +153,6 @@
     "BackendConnection.h",
     "BindGroup.cpp",
     "BindGroup.h",
-    "BindGroupAndStorageBarrierTracker.h",
     "BindGroupLayout.cpp",
     "BindGroupLayout.h",
     "BindGroupTracker.h",
diff --git a/src/dawn_native/BindGroupAndStorageBarrierTracker.h b/src/dawn_native/BindGroupAndStorageBarrierTracker.h
deleted file mode 100644
index e34a165..0000000
--- a/src/dawn_native/BindGroupAndStorageBarrierTracker.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright 2019 The Dawn Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
-#define DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
-
-#include "common/ityp_bitset.h"
-#include "common/ityp_stack_vec.h"
-#include "dawn_native/BindGroup.h"
-#include "dawn_native/BindGroupTracker.h"
-#include "dawn_native/Buffer.h"
-#include "dawn_native/Texture.h"
-
-namespace dawn_native {
-
-    // Extends BindGroupTrackerBase to also keep track of resources that need a usage transition.
-    template <bool CanInheritBindGroups, typename DynamicOffset>
-    class BindGroupAndStorageBarrierTrackerBase
-        : public BindGroupTrackerBase<CanInheritBindGroups, DynamicOffset> {
-        using Base = BindGroupTrackerBase<CanInheritBindGroups, DynamicOffset>;
-
-      public:
-        BindGroupAndStorageBarrierTrackerBase() = default;
-
-        void OnSetBindGroup(BindGroupIndex index,
-                            BindGroupBase* bindGroup,
-                            uint32_t dynamicOffsetCount,
-                            uint32_t* dynamicOffsets) {
-            ASSERT(index < kMaxBindGroupsTyped);
-
-            if (this->mBindGroups[index] != bindGroup) {
-                const BindGroupLayoutBase* layout = bindGroup->GetLayout();
-
-                mBindings[index].resize(layout->GetBindingCount());
-                mBindingTypes[index].resize(layout->GetBindingCount());
-                mBindingsNeedingBarrier[index] = {};
-
-                for (BindingIndex bindingIndex{0}; bindingIndex < layout->GetBindingCount();
-                     ++bindingIndex) {
-                    const BindingInfo& bindingInfo = layout->GetBindingInfo(bindingIndex);
-
-                    if ((bindingInfo.visibility & wgpu::ShaderStage::Compute) == 0) {
-                        continue;
-                    }
-
-                    mBindingTypes[index][bindingIndex] = bindingInfo.type;
-                    switch (bindingInfo.type) {
-                        case wgpu::BindingType::UniformBuffer:
-                        case wgpu::BindingType::ReadonlyStorageBuffer:
-                        case wgpu::BindingType::Sampler:
-                        case wgpu::BindingType::ComparisonSampler:
-                        case wgpu::BindingType::SampledTexture:
-                            // Don't require barriers.
-                            break;
-
-                        case wgpu::BindingType::StorageBuffer:
-                            mBindingsNeedingBarrier[index].set(bindingIndex);
-                            mBindings[index][bindingIndex] = static_cast<ObjectBase*>(
-                                bindGroup->GetBindingAsBufferBinding(bindingIndex).buffer);
-                            break;
-
-                        // Read-only and write-only storage textures must use general layout
-                        // because load and store operations on storage images can only be done on
-                        // the images in VK_IMAGE_LAYOUT_GENERAL layout.
-                        case wgpu::BindingType::ReadonlyStorageTexture:
-                        case wgpu::BindingType::WriteonlyStorageTexture:
-                            mBindingsNeedingBarrier[index].set(bindingIndex);
-                            mBindings[index][bindingIndex] = static_cast<ObjectBase*>(
-                                bindGroup->GetBindingAsTextureView(bindingIndex));
-                            break;
-
-                        case wgpu::BindingType::StorageTexture:
-                            // Not implemented.
-                        default:
-                            UNREACHABLE();
-                            break;
-                    }
-                }
-            }
-
-            Base::OnSetBindGroup(index, bindGroup, dynamicOffsetCount, dynamicOffsets);
-        }
-
-      protected:
-        ityp::array<BindGroupIndex,
-                    ityp::bitset<BindingIndex, kMaxBindingsPerPipelineLayout>,
-                    kMaxBindGroups>
-            mBindingsNeedingBarrier = {};
-        ityp::array<BindGroupIndex,
-                    ityp::stack_vec<BindingIndex, wgpu::BindingType, kMaxOptimalBindingsPerGroup>,
-                    kMaxBindGroups>
-            mBindingTypes = {};
-        ityp::array<BindGroupIndex,
-                    ityp::stack_vec<BindingIndex, ObjectBase*, kMaxOptimalBindingsPerGroup>,
-                    kMaxBindGroups>
-            mBindings = {};
-    };
-
-}  // namespace dawn_native
-
-#endif  // DAWNNATIVE_BINDGROUPANDSTORAGEBARRIERTRACKER_H_
diff --git a/src/dawn_native/CMakeLists.txt b/src/dawn_native/CMakeLists.txt
index feb9dc0..1b34402 100644
--- a/src/dawn_native/CMakeLists.txt
+++ b/src/dawn_native/CMakeLists.txt
@@ -31,7 +31,6 @@
     "BackendConnection.h"
     "BindGroup.cpp"
     "BindGroup.h"
-    "BindGroupAndStorageBarrierTracker.h"
     "BindGroupLayout.cpp"
     "BindGroupLayout.h"
     "BindGroupTracker.h"
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index 82ad2d8..4efb9cf 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -15,7 +15,7 @@
 #include "dawn_native/d3d12/CommandBufferD3D12.h"
 
 #include "common/Assert.h"
-#include "dawn_native/BindGroupAndStorageBarrierTracker.h"
+#include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/CommandValidation.h"
 #include "dawn_native/Commands.h"
@@ -143,12 +143,12 @@
         }
     }  // anonymous namespace
 
-    class BindGroupStateTracker : public BindGroupAndStorageBarrierTrackerBase<false, uint64_t> {
-        using Base = BindGroupAndStorageBarrierTrackerBase;
+    class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
+        using Base = BindGroupTrackerBase;
 
       public:
         BindGroupStateTracker(Device* device)
-            : BindGroupAndStorageBarrierTrackerBase(),
+            : BindGroupTrackerBase(),
               mDevice(device),
               mViewAllocator(device->GetViewShaderVisibleDescriptorAllocator()),
               mSamplerAllocator(device->GetSamplerShaderVisibleDescriptorAllocator()) {
@@ -225,12 +225,14 @@
             if (mInCompute) {
                 std::vector<D3D12_RESOURCE_BARRIER> barriers;
                 for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
-                    for (BindingIndex binding : IterateBitSet(mBindingsNeedingBarrier[index])) {
-                        wgpu::BindingType bindingType = mBindingTypes[index][binding];
-                        switch (bindingType) {
+                    BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
+                    for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
+                        switch (layout->GetBindingInfo(binding).type) {
                             case wgpu::BindingType::StorageBuffer: {
                                 D3D12_RESOURCE_BARRIER barrier;
-                                if (static_cast<Buffer*>(mBindings[index][binding])
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
                                         ->TrackUsageAndGetResourceBarrier(
                                             commandContext, &barrier, wgpu::BufferUsage::Storage)) {
                                     barriers.push_back(barrier);
@@ -240,7 +242,7 @@
 
                             case wgpu::BindingType::ReadonlyStorageTexture: {
                                 TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][binding]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                 ToBackend(view->GetTexture())
                                     ->TransitionUsageAndGetResourceBarrier(
                                         commandContext, &barriers, kReadonlyStorageTexture,
@@ -249,22 +251,52 @@
                             }
                             case wgpu::BindingType::WriteonlyStorageTexture: {
                                 TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][binding]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                 ToBackend(view->GetTexture())
                                     ->TransitionUsageAndGetResourceBarrier(
                                         commandContext, &barriers, wgpu::TextureUsage::Storage,
                                         view->GetSubresourceRange());
                                 break;
                             }
-                            case wgpu::BindingType::StorageTexture:
-                                // Not implemented.
+                            case wgpu::BindingType::ReadonlyStorageBuffer: {
+                                D3D12_RESOURCE_BARRIER barrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TrackUsageAndGetResourceBarrier(commandContext, &barrier,
+                                                                          kReadOnlyStorageBuffer)) {
+                                    barriers.push_back(barrier);
+                                }
+                                break;
+                            }
+                            case wgpu::BindingType::SampledTexture: {
+                                TextureViewBase* view =
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
+                                ToBackend(view->GetTexture())
+                                    ->TransitionUsageAndGetResourceBarrier(
+                                        commandContext, &barriers, wgpu::TextureUsage::Sampled,
+                                        view->GetSubresourceRange());
+                                break;
+                            }
+                            case wgpu::BindingType::UniformBuffer: {
+                                D3D12_RESOURCE_BARRIER barrier;
+                                if (ToBackend(mBindGroups[index]
+                                                  ->GetBindingAsBufferBinding(binding)
+                                                  .buffer)
+                                        ->TrackUsageAndGetResourceBarrier(
+                                            commandContext, &barrier, wgpu::BufferUsage::Uniform)) {
+                                    barriers.push_back(barrier);
+                                }
+                                break;
+                            }
 
-                            case wgpu::BindingType::UniformBuffer:
-                            case wgpu::BindingType::ReadonlyStorageBuffer:
                             case wgpu::BindingType::Sampler:
                             case wgpu::BindingType::ComparisonSampler:
-                            case wgpu::BindingType::SampledTexture:
                                 // Don't require barriers.
+                                break;
+
+                            case wgpu::BindingType::StorageTexture:
+                                // Not implemented.
 
                             default:
                                 UNREACHABLE();
@@ -572,7 +604,7 @@
         bindingTracker.SetID3D12DescriptorHeaps(commandList);
 
         // Records the necessary barriers for the resource usage pre-computed by the frontend
-        auto PrepareResourcesForSubmission = [](CommandRecordingContext* commandContext,
+        auto PrepareResourcesForRenderPass = [](CommandRecordingContext* commandContext,
                                                 const PassResourceUsage& usages) -> bool {
             std::vector<D3D12_RESOURCE_BARRIER> barriers;
 
@@ -595,6 +627,8 @@
                 bufferUsages |= usages.bufferUsages[i];
             }
 
+            wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
+
             for (size_t i = 0; i < usages.textures.size(); ++i) {
                 Texture* texture = ToBackend(usages.textures[i]);
                 // Clear textures that are not output attachments. Output attachments will be
@@ -604,11 +638,7 @@
                     texture->EnsureSubresourceContentInitialized(commandContext,
                                                                  texture->GetAllSubresources());
                 }
-            }
 
-            wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
-
-            for (size_t i = 0; i < usages.textures.size(); ++i) {
                 ToBackend(usages.textures[i])
                     ->TrackUsageAndGetResourceBarrierForPass(commandContext, &barriers,
                                                              usages.textureUsages[i]);
@@ -623,6 +653,25 @@
                     textureUsages & wgpu::TextureUsage::Storage);
         };
 
+        // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
+        // for compute passes.
+        auto PrepareResourcesForComputePass = [](CommandRecordingContext* commandContext,
+                                                 const PassResourceUsage& usages) -> void {
+            for (size_t i = 0; i < usages.buffers.size(); ++i) {
+                Buffer* buffer = ToBackend(usages.buffers[i]);
+
+                // TODO(jiawei.shao@intel.com): clear storage buffers with
+                // ClearUnorderedAccessView*().
+                buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext));
+            }
+
+            for (size_t i = 0; i < usages.textures.size(); ++i) {
+                Texture* texture = ToBackend(usages.textures[i]);
+                texture->EnsureSubresourceContentInitialized(commandContext,
+                                                             texture->GetAllSubresources());
+            }
+        };
+
         const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
         uint32_t nextPassNumber = 0;
 
@@ -632,8 +681,8 @@
                 case Command::BeginComputePass: {
                     mCommands.NextCommand<BeginComputePassCmd>();
 
-                    PrepareResourcesForSubmission(commandContext,
-                                                  passResourceUsages[nextPassNumber]);
+                    PrepareResourcesForComputePass(commandContext,
+                                                   passResourceUsages[nextPassNumber]);
                     bindingTracker.SetInComputePass(true);
                     DAWN_TRY(RecordComputePass(commandContext, &bindingTracker));
 
@@ -645,7 +694,7 @@
                     BeginRenderPassCmd* beginRenderPassCmd =
                         mCommands.NextCommand<BeginRenderPassCmd>();
 
-                    const bool passHasUAV = PrepareResourcesForSubmission(
+                    const bool passHasUAV = PrepareResourcesForRenderPass(
                         commandContext, passResourceUsages[nextPassNumber]);
                     bindingTracker.SetInComputePass(false);
 
@@ -892,6 +941,7 @@
 
                     DAWN_TRY(bindingTracker->Apply(commandContext));
                     Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get());
+                    buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::Indirect);
                     ComPtr<ID3D12CommandSignature> signature =
                         ToBackend(GetDevice())->GetDispatchIndirectSignature();
                     commandList->ExecuteIndirect(signature.Get(), 1, buffer->GetD3D12Resource(),
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index aad8d51..aadf46d 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -14,7 +14,7 @@
 
 #include "dawn_native/vulkan/CommandBufferVk.h"
 
-#include "dawn_native/BindGroupAndStorageBarrierTracker.h"
+#include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/CommandValidation.h"
 #include "dawn_native/Commands.h"
@@ -134,8 +134,7 @@
             }
         };
 
-        class ComputeDescriptorSetTracker
-            : public BindGroupAndStorageBarrierTrackerBase<true, uint32_t> {
+        class ComputeDescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
           public:
             ComputeDescriptorSetTracker() = default;
 
@@ -147,12 +146,16 @@
                                     mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
                                     mDynamicOffsetCounts, mDynamicOffsets);
 
+                // TODO(jiawei.shao@intel.com): combine the following barriers in one
+                // vkCmdPipelineBarrier() call.
                 for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
-                    for (BindingIndex bindingIndex :
-                         IterateBitSet(mBindingsNeedingBarrier[index])) {
-                        switch (mBindingTypes[index][bindingIndex]) {
+                    BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
+                    for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
+                        switch (layout->GetBindingInfo(binding).type) {
                             case wgpu::BindingType::StorageBuffer:
-                                static_cast<Buffer*>(mBindings[index][bindingIndex])
+                            case wgpu::BindingType::ReadonlyStorageBuffer:
+                                ToBackend(
+                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
                                     ->TransitionUsageNow(recordingContext,
                                                          wgpu::BufferUsage::Storage);
                                 break;
@@ -160,22 +163,36 @@
                             case wgpu::BindingType::ReadonlyStorageTexture:
                             case wgpu::BindingType::WriteonlyStorageTexture: {
                                 TextureViewBase* view =
-                                    static_cast<TextureViewBase*>(mBindings[index][bindingIndex]);
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
                                 ToBackend(view->GetTexture())
                                     ->TransitionUsageNow(recordingContext,
                                                          wgpu::TextureUsage::Storage,
                                                          view->GetSubresourceRange());
                                 break;
                             }
-                            case wgpu::BindingType::StorageTexture:
-                                // Not implemented.
-
                             case wgpu::BindingType::UniformBuffer:
-                            case wgpu::BindingType::ReadonlyStorageBuffer:
+                                ToBackend(
+                                    mBindGroups[index]->GetBindingAsBufferBinding(binding).buffer)
+                                    ->TransitionUsageNow(recordingContext,
+                                                         wgpu::BufferUsage::Uniform);
+                                break;
+                            case wgpu::BindingType::SampledTexture: {
+                                TextureViewBase* view =
+                                    mBindGroups[index]->GetBindingAsTextureView(binding);
+                                ToBackend(view->GetTexture())
+                                    ->TransitionUsageNow(recordingContext,
+                                                         wgpu::TextureUsage::Sampled,
+                                                         view->GetSubresourceRange());
+                                break;
+                            }
+
                             case wgpu::BindingType::Sampler:
                             case wgpu::BindingType::ComparisonSampler:
-                            case wgpu::BindingType::SampledTexture:
                                 // Don't require barriers.
+                                break;
+
+                            case wgpu::BindingType::StorageTexture:
+                                // Not implemented.
 
                             default:
                                 UNREACHABLE();
@@ -431,8 +448,9 @@
         VkCommandBuffer commands = recordingContext->commandBuffer;
 
         // Records the necessary barriers for the resource usage pre-computed by the frontend
-        auto TransitionForPass = [](Device* device, CommandRecordingContext* recordingContext,
-                                    const PassResourceUsage& usages) {
+        auto PrepareResourcesForRenderPass = [](Device* device,
+                                                CommandRecordingContext* recordingContext,
+                                                const PassResourceUsage& usages) {
             std::vector<VkBufferMemoryBarrier> bufferBarriers;
             std::vector<VkImageMemoryBarrier> imageBarriers;
             VkPipelineStageFlags srcStages = 0;
@@ -466,6 +484,23 @@
             }
         };
 
+        // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
+        // for compute passes.
+        auto PrepareResourcesForComputePass = [](Device* device,
+                                                 CommandRecordingContext* recordingContext,
+                                                 const PassResourceUsage& usages) {
+            for (size_t i = 0; i < usages.buffers.size(); ++i) {
+                Buffer* buffer = ToBackend(usages.buffers[i]);
+                buffer->EnsureDataInitialized(recordingContext);
+            }
+
+            for (size_t i = 0; i < usages.textures.size(); ++i) {
+                Texture* texture = ToBackend(usages.textures[i]);
+                texture->EnsureSubresourceContentInitialized(recordingContext,
+                                                             texture->GetAllSubresources());
+            }
+        };
+
         const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
         size_t nextPassNumber = 0;
 
@@ -644,7 +679,8 @@
                 case Command::BeginRenderPass: {
                     BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
 
-                    TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
+                    PrepareResourcesForRenderPass(device, recordingContext,
+                                                  passResourceUsages[nextPassNumber]);
 
                     LazyClearRenderPassAttachments(cmd);
                     DAWN_TRY(RecordRenderPass(recordingContext, cmd));
@@ -656,7 +692,8 @@
                 case Command::BeginComputePass: {
                     mCommands.NextCommand<BeginComputePassCmd>();
 
-                    TransitionForPass(device, recordingContext, passResourceUsages[nextPassNumber]);
+                    PrepareResourcesForComputePass(device, recordingContext,
+                                                   passResourceUsages[nextPassNumber]);
                     DAWN_TRY(RecordComputePass(recordingContext));
 
                     nextPassNumber++;
@@ -705,6 +742,8 @@
 
                 case Command::DispatchIndirect: {
                     DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
+                    ToBackend(dispatch->indirectBuffer)
+                        ->TransitionUsageNow(recordingContext, wgpu::BufferUsage::Indirect);
                     VkBuffer indirectBuffer = ToBackend(dispatch->indirectBuffer)->GetHandle();
 
                     descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_COMPUTE);
diff --git a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
index 31e66e2..e5b2586 100644
--- a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
+++ b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
@@ -127,6 +127,70 @@
     EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }
 
+// Test that multiple dispatches to increment values by ping-ponging between storage buffers and
+// read-only storage buffers are synchronized in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std430, set = 0, binding = 0) readonly buffer Src { uint src[kNumValues]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uint dst[kNumValues]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    for (uint32_t i = 0; i < kIterations / 2; ++i) {
+        pass.SetBindGroup(0, bindGroups[0]);
+        pass.Dispatch(kNumValues);
+        pass.SetBindGroup(0, bindGroups[1]);
+        pass.Dispatch(kNumValues);
+    }
+    pass.EndPass();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 // Test that Storage to Uniform buffer transitions work and synchronize correctly
 // by ping-ponging between Storage/Uniform usage in sequential compute passes.
 TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
@@ -192,6 +256,70 @@
     EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
 }
 
+// Test that Storage to Uniform buffer transitions work and synchronize correctly
+// by ping-ponging between Storage/Uniform usage in one compute pass.
+TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
+    std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+
+    wgpu::Buffer bufferA = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::Buffer bufferB = utils::CreateBufferFromData(
+        device, data.data(), bufferSize,
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        #define kNumValues 100
+        layout(std140, set = 0, binding = 0) uniform Src { uvec4 src[kNumValues / 4]; };
+        layout(std430, set = 0, binding = 1) buffer Dst { uvec4 dst[kNumValues / 4]; };
+        void main() {
+            uint index = gl_GlobalInvocationID.x;
+            dst[index] = src[index] + 0x1234;
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferA, 0, bufferSize},
+                                                          {1, bufferB, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, bufferB, 0, bufferSize},
+                                                          {1, bufferA, 0, bufferSize},
+                                                      });
+
+    wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroups[b]);
+        pass.Dispatch(kNumValues / 4);
+    }
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
+    EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
+}
+
 DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
                       D3D12Backend(),
                       MetalBackend(),
diff --git a/src/tests/end2end/StorageTextureTests.cpp b/src/tests/end2end/StorageTextureTests.cpp
index cef7c94..441ec0f 100644
--- a/src/tests/end2end/StorageTextureTests.cpp
+++ b/src/tests/end2end/StorageTextureTests.cpp
@@ -944,6 +944,163 @@
     CheckOutputStorageTexture(writeonlyStorageTexture, kTextureFormat, kArrayLayerCount);
 }
 
+// Test that multiple dispatches to increment values by ping-ponging between a read-only storage
+// texture and a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, ReadonlyAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0, r32ui) uniform readonly uimage2D Src;
+        layout(set = 0, binding = 1, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = imageLoad(Src, ivec2(0, 0));
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture1.CreateView()},
+                                                          {1, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, storageTexture2.CreateView()},
+                                                          {1, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
+// Test that multiple dispatches to increment values by ping-ponging between a sampled texture and
+// a write-only storage texture are synchronized in one pass.
+TEST_P(StorageTextureTests, SampledAndWriteonlyStorageTexturePingPong) {
+    constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
+    wgpu::Texture storageTexture1 = CreateTexture(
+        kTextureFormat,
+        wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u,
+        1u);
+    wgpu::Texture storageTexture2 = CreateTexture(
+        kTextureFormat, wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage, 1u, 1u);
+    wgpu::SamplerDescriptor samplerDesc;
+    wgpu::Sampler sampler = device.CreateSampler(&samplerDesc);
+
+    wgpu::ShaderModule module =
+        utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
+        #version 450
+        layout(set = 0, binding = 0) uniform sampler mySampler;
+        layout(set = 0, binding = 1) uniform utexture2D Src;
+        layout(set = 0, binding = 2, r32ui) uniform writeonly uimage2D Dst;
+        void main() {
+            uvec4 srcValue = texelFetch(usampler2D(Src, mySampler), ivec2(0, 0), 0);
+            ++srcValue.x;
+            imageStore(Dst, ivec2(0, 0), srcValue);
+        }
+    )");
+
+    wgpu::ComputePipelineDescriptor pipelineDesc = {};
+    pipelineDesc.computeStage.module = module;
+    pipelineDesc.computeStage.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture1.CreateView()},
+                                                          {2, storageTexture2.CreateView()},
+                                                      });
+
+    // In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
+    // bound as write-only storage texture.
+    wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                      {
+                                                          {0, sampler},
+                                                          {1, storageTexture2.CreateView()},
+                                                          {2, storageTexture1.CreateView()},
+                                                      });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+
+    // After the first dispatch the value in storageTexture2 should be 1u.
+    pass.SetBindGroup(0, bindGroupA);
+    pass.Dispatch(1);
+
+    // After the second dispatch the value in storageTexture1 should be 2u;
+    pass.SetBindGroup(0, bindGroupB);
+    pass.Dispatch(1);
+
+    pass.EndPass();
+
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.size = sizeof(uint32_t);
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
+
+    wgpu::TextureCopyView textureCopyView;
+    textureCopyView.texture = storageTexture1;
+
+    wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
+    wgpu::Extent3D extent3D = {1, 1, 1};
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
+    EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
+}
+
 DAWN_INSTANTIATE_TEST(StorageTextureTests,
                       D3D12Backend(),
                       MetalBackend(),