Vulkan: Fix layout of Sampled+ROStorage texture.

Vulkan requires that storage images be in the GENERAL layout, and requires
that we choose a layout at VkDescriptorSet creation. This means that
since Sampled+ROStorage texture may sometimes be used as both usages in
the same pass, they must always be in the GENERAL layout even for
SampledTexture bindings.

Fix this by looking at the texture's creation usage in VulkanImageLayout
for wgpu::TextureUsage::Sampled.

Also add a regression test that triggers a Vulkan Validation Layer error
without this fix.

Bug: dawn:635

Change-Id: I4a5b94e1af20839b3b8cc080d36fca59d79f09bb
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/38107
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Auto-Submit: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/vulkan/BindGroupVk.cpp b/src/dawn_native/vulkan/BindGroupVk.cpp
index b993090..e80f6e2 100644
--- a/src/dawn_native/vulkan/BindGroupVk.cpp
+++ b/src/dawn_native/vulkan/BindGroupVk.cpp
@@ -84,10 +84,10 @@
                     TextureView* view = ToBackend(GetBindingAsTextureView(bindingIndex));
 
                     writeImageInfo[numWrites].imageView = view->GetHandle();
-                    // TODO(cwallez@chromium.org): This isn't true in general: if the image has
-                    // two read-only usages one of which is Sampled. Works for now though :)
-                    writeImageInfo[numWrites].imageLayout =
-                        VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+                    // The layout may be GENERAL here because of interactions between the Sampled
+                    // and ReadOnlyStorage usages. See the logic in VulkanImageLayout.
+                    writeImageInfo[numWrites].imageLayout = VulkanImageLayout(
+                        ToBackend(view->GetTexture()), wgpu::TextureUsage::Sampled);
 
                     write.pImageInfo = &writeImageInfo[numWrites];
                     break;
diff --git a/src/dawn_native/vulkan/TextureVk.cpp b/src/dawn_native/vulkan/TextureVk.cpp
index 218e60b..d145104 100644
--- a/src/dawn_native/vulkan/TextureVk.cpp
+++ b/src/dawn_native/vulkan/TextureVk.cpp
@@ -105,48 +105,6 @@
             return flags;
         }
 
-        // Chooses which Vulkan image layout should be used for the given Dawn usage
-        VkImageLayout VulkanImageLayout(wgpu::TextureUsage usage, const Format& format) {
-            if (usage == wgpu::TextureUsage::None) {
-                return VK_IMAGE_LAYOUT_UNDEFINED;
-            }
-
-            if (!wgpu::HasZeroOrOneBits(usage)) {
-                return VK_IMAGE_LAYOUT_GENERAL;
-            }
-
-            // Usage has a single bit so we can switch on its value directly.
-            switch (usage) {
-                case wgpu::TextureUsage::CopyDst:
-                    return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
-                case wgpu::TextureUsage::Sampled:
-                    return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-                // Vulkan texture copy functions require the image to be in _one_  known layout.
-                // Depending on whether parts of the texture have been transitioned to only
-                // CopySrc or a combination with something else, the texture could be in a
-                // combination of GENERAL and TRANSFER_SRC_OPTIMAL. This would be a problem, so we
-                // make CopySrc use GENERAL.
-                case wgpu::TextureUsage::CopySrc:
-                // Read-only and write-only storage textures must use general layout because load
-                // and store operations on storage images can only be done on the images in
-                // VK_IMAGE_LAYOUT_GENERAL layout.
-                case wgpu::TextureUsage::Storage:
-                case kReadOnlyStorageTexture:
-                    return VK_IMAGE_LAYOUT_GENERAL;
-                case wgpu::TextureUsage::RenderAttachment:
-                    if (format.HasDepthOrStencil()) {
-                        return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-                    } else {
-                        return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-                    }
-                case kPresentTextureUsage:
-                    return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
-
-                case wgpu::TextureUsage::None:
-                    UNREACHABLE();
-            }
-        }
-
         // Computes which Vulkan pipeline stage can access a texture in the given Dawn usage
         VkPipelineStageFlags VulkanPipelineStage(wgpu::TextureUsage usage, const Format& format) {
             VkPipelineStageFlags flags = 0;
@@ -205,19 +163,18 @@
             return flags;
         }
 
-        VkImageMemoryBarrier BuildMemoryBarrier(const Format& format,
-                                                const VkImage& image,
+        VkImageMemoryBarrier BuildMemoryBarrier(const Texture* texture,
                                                 wgpu::TextureUsage lastUsage,
                                                 wgpu::TextureUsage usage,
                                                 const SubresourceRange& range) {
             VkImageMemoryBarrier barrier;
             barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
             barrier.pNext = nullptr;
-            barrier.srcAccessMask = VulkanAccessFlags(lastUsage, format);
-            barrier.dstAccessMask = VulkanAccessFlags(usage, format);
-            barrier.oldLayout = VulkanImageLayout(lastUsage, format);
-            barrier.newLayout = VulkanImageLayout(usage, format);
-            barrier.image = image;
+            barrier.srcAccessMask = VulkanAccessFlags(lastUsage, texture->GetFormat());
+            barrier.dstAccessMask = VulkanAccessFlags(usage, texture->GetFormat());
+            barrier.oldLayout = VulkanImageLayout(texture, lastUsage);
+            barrier.newLayout = VulkanImageLayout(texture, usage);
+            barrier.image = texture->GetHandle();
             barrier.subresourceRange.aspectMask = VulkanAspectMask(range.aspects);
             barrier.subresourceRange.baseMipLevel = range.baseMipLevel;
             barrier.subresourceRange.levelCount = range.levelCount;
@@ -409,6 +366,68 @@
         return flags;
     }
 
+    // Chooses which Vulkan image layout should be used for the given Dawn usage. Note that this
+    // layout must match the layout given to various Vulkan operations as well as the layout given
+    // to descriptor set writes.
+    VkImageLayout VulkanImageLayout(const Texture* texture, wgpu::TextureUsage usage) {
+        if (usage == wgpu::TextureUsage::None) {
+            return VK_IMAGE_LAYOUT_UNDEFINED;
+        }
+
+        if (!wgpu::HasZeroOrOneBits(usage)) {
+            // Sampled | ReadOnlyStorage is the only possible multi-bit usage, if more appear  we
+            // might need additional special-casing.
+            ASSERT(usage == (wgpu::TextureUsage::Sampled | kReadOnlyStorageTexture));
+            return VK_IMAGE_LAYOUT_GENERAL;
+        }
+
+        // Usage has a single bit so we can switch on its value directly.
+        switch (usage) {
+            case wgpu::TextureUsage::CopyDst:
+                return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+                // A texture that's sampled and storage may be used as both usages in the same pass.
+                // When that happens, the layout must be GENERAL because that's a requirement for
+                // the storage usage. We can't know at bindgroup creation time if that case will
+                // happen so we must prepare for the pessimistic case and always use the GENERAL
+                // layout.
+            case wgpu::TextureUsage::Sampled:
+                if (texture->GetUsage() & wgpu::TextureUsage::Storage) {
+                    return VK_IMAGE_LAYOUT_GENERAL;
+                } else {
+                    return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+                }
+
+                // Vulkan texture copy functions require the image to be in _one_  known layout.
+                // Depending on whether parts of the texture have been transitioned to only CopySrc
+                // or a combination with something else, the texture could be in a combination of
+                // GENERAL and TRANSFER_SRC_OPTIMAL. This would be a problem, so we make CopySrc use
+                // GENERAL.
+                // TODO(cwallez@chromium.org): We no longer need to transition resources all at
+                // once and can instead track subresources so we should lift this limitation.
+            case wgpu::TextureUsage::CopySrc:
+                // Read-only and write-only storage textures must use general layout because load
+                // and store operations on storage images can only be done on the images in
+                // VK_IMAGE_LAYOUT_GENERAL layout.
+            case wgpu::TextureUsage::Storage:
+            case kReadOnlyStorageTexture:
+                return VK_IMAGE_LAYOUT_GENERAL;
+
+            case wgpu::TextureUsage::RenderAttachment:
+                if (texture->GetFormat().HasDepthOrStencil()) {
+                    return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+                } else {
+                    return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+                }
+
+            case kPresentTextureUsage:
+                return VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+
+            case wgpu::TextureUsage::None:
+                UNREACHABLE();
+        }
+    }
+
     VkSampleCountFlagBits VulkanSampleCount(uint32_t sampleCount) {
         switch (sampleCount) {
             case 1:
@@ -650,7 +669,7 @@
         barrier.dstAccessMask = 0;  // The barrier must be paired with another barrier that will
                                     // specify the dst access mask on the importing queue.
 
-        barrier.oldLayout = VulkanImageLayout(usage, GetFormat());
+        barrier.oldLayout = VulkanImageLayout(this, usage);
         if (desiredLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
             // VK_IMAGE_LAYOUT_UNDEFINED is invalid here. We use it as a
             // special value to indicate no layout transition should be done.
@@ -748,7 +767,7 @@
         if (mExternalState == ExternalState::PendingAcquire) {
             if (barriers->size() == transitionBarrierStart) {
                 barriers->push_back(BuildMemoryBarrier(
-                    GetFormat(), mHandle, wgpu::TextureUsage::None, wgpu::TextureUsage::None,
+                    this, wgpu::TextureUsage::None, wgpu::TextureUsage::None,
                     SubresourceRange::SingleMipAndLayer(0, 0, GetFormat().aspects)));
             }
 
@@ -887,8 +906,7 @@
                     return;
                 }
 
-                imageBarriers->push_back(
-                    BuildMemoryBarrier(format, mHandle, *lastUsage, newUsage, range));
+                imageBarriers->push_back(BuildMemoryBarrier(this, *lastUsage, newUsage, range));
 
                 allLastUsages |= *lastUsage;
                 allUsages |= newUsage;
@@ -963,17 +981,17 @@
         ASSERT(GetDimension() == wgpu::TextureDimension::e2D);
 
         wgpu::TextureUsage allLastUsages = wgpu::TextureUsage::None;
-        mSubresourceLastUsages.Update(range, [&](const SubresourceRange& range,
-                                                 wgpu::TextureUsage* lastUsage) {
-            if (CanReuseWithoutBarrier(*lastUsage, usage)) {
-                return;
-            }
+        mSubresourceLastUsages.Update(
+            range, [&](const SubresourceRange& range, wgpu::TextureUsage* lastUsage) {
+                if (CanReuseWithoutBarrier(*lastUsage, usage)) {
+                    return;
+                }
 
-            imageBarriers->push_back(BuildMemoryBarrier(format, mHandle, *lastUsage, usage, range));
+                imageBarriers->push_back(BuildMemoryBarrier(this, *lastUsage, usage, range));
 
-            allLastUsages |= *lastUsage;
-            *lastUsage = usage;
-        });
+                allLastUsages |= *lastUsage;
+                *lastUsage = usage;
+            });
 
         *srcStages |= VulkanPipelineStage(allLastUsages, format);
         *dstStages |= VulkanPipelineStage(usage, format);
@@ -1131,7 +1149,7 @@
     }
 
     VkImageLayout Texture::GetCurrentLayoutForSwapChain() const {
-        return VulkanImageLayout(mSubresourceLastUsages.Get(Aspect::Color, 0, 0), GetFormat());
+        return VulkanImageLayout(this, mSubresourceLastUsages.Get(Aspect::Color, 0, 0));
     }
 
     // static
diff --git a/src/dawn_native/vulkan/TextureVk.h b/src/dawn_native/vulkan/TextureVk.h
index 2e4f79d..9af6701 100644
--- a/src/dawn_native/vulkan/TextureVk.h
+++ b/src/dawn_native/vulkan/TextureVk.h
@@ -27,9 +27,11 @@
 
     struct CommandRecordingContext;
     class Device;
+    class Texture;
 
     VkFormat VulkanImageFormat(const Device* device, wgpu::TextureFormat format);
     VkImageUsageFlags VulkanImageUsage(wgpu::TextureUsage usage, const Format& format);
+    VkImageLayout VulkanImageLayout(const Texture* texture, wgpu::TextureUsage usage);
     VkSampleCountFlagBits VulkanSampleCount(uint32_t sampleCount);
 
     MaybeError ValidateVulkanImageCanBeWrapped(const DeviceBase* device,
diff --git a/src/tests/end2end/GpuMemorySynchronizationTests.cpp b/src/tests/end2end/GpuMemorySynchronizationTests.cpp
index 4db671c..e5f97f8 100644
--- a/src/tests/end2end/GpuMemorySynchronizationTests.cpp
+++ b/src/tests/end2end/GpuMemorySynchronizationTests.cpp
@@ -234,6 +234,75 @@
     EXPECT_PIXEL_RGBA8_EQ(RGBA8(2, 0, 0, 255), renderPass.color, 0, 0);
 }
 
+// Use an image as both sampled and readonly storage in a compute pass. This is a regression test
+// for the Vulkan backend choosing different layouts for Sampled and ReadOnlyStorage.
+TEST_P(GpuMemorySyncTests, SampledAndROStorageTextureInComputePass) {
+    // TODO(dawn:483): Test is skipped on OpenGL because it uses WriteTexture which is
+    // unimplemented.
+    DAWN_SKIP_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+    // Create a storage + sampled texture of one texel initialized to 1
+    wgpu::TextureDescriptor texDesc;
+    texDesc.format = wgpu::TextureFormat::R32Uint;
+    texDesc.size = {1, 1, 1};
+    texDesc.usage =
+        wgpu::TextureUsage::Storage | wgpu::TextureUsage::Sampled | wgpu::TextureUsage::CopyDst;
+    wgpu::Texture tex = device.CreateTexture(&texDesc);
+
+    wgpu::TextureCopyView copyView;
+    copyView.texture = tex;
+    wgpu::TextureDataLayout layout;
+    wgpu::Extent3D copySize = {1, 1, 1};
+    uint32_t kOne = 1;
+    queue.WriteTexture(&copyView, &kOne, sizeof(kOne), &layout, &copySize);
+
+    // Create a pipeline that loads the texture from both the sampled and storage paths.
+    wgpu::ComputePipelineDescriptor pipelineDesc;
+    pipelineDesc.computeStage.entryPoint = "main";
+    pipelineDesc.computeStage.module = utils::CreateShaderModuleFromWGSL(device, R"(
+        [[block]] struct Output {
+            [[offset(0)]] sampledOut: u32;
+            [[offset(4)]] storageOut: u32;
+        };
+        [[group(0), binding(0)]] var<storage_buffer> output : [[access(write)]] Output;
+        [[group(0), binding(1)]] var<uniform_constant> sampledTex : texture_2d<u32>;
+        [[group(0), binding(2)]] var<uniform_constant> storageTex : [[access(read)]] texture_storage_2d<r32uint>;
+
+        [[stage(compute)]] fn main() -> void {
+            output.sampledOut = textureLoad(sampledTex, vec2<i32>(0, 0), 0).x;
+            output.storageOut = textureLoad(storageTex, vec2<i32>(0, 0)).x;
+        }
+    )");
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // Run the compute pipeline and store the result in the buffer.
+    wgpu::BufferDescriptor outputDesc;
+    outputDesc.size = 8;
+    outputDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
+    wgpu::Buffer outputBuffer = device.CreateBuffer(&outputDesc);
+
+    wgpu::BindGroup bg = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                              {
+                                                  {0, outputBuffer},
+                                                  {1, tex.CreateView()},
+                                                  {2, tex.CreateView()},
+                                              });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetBindGroup(0, bg);
+    pass.SetPipeline(pipeline);
+    pass.Dispatch(1);
+    pass.EndPass();
+
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    // Check the buffer's content is what we expect.
+    EXPECT_BUFFER_U32_EQ(1, outputBuffer, 0);
+    EXPECT_BUFFER_U32_EQ(1, outputBuffer, 4);
+}
+
 DAWN_INSTANTIATE_TEST(GpuMemorySyncTests,
                       D3D12Backend(),
                       MetalBackend(),