Support PLS on Metal tiler GPUs.

This is implemented by making storage attachments (or implicit PLS
slots) being render attachments packed with the other color attachments.

End to end tests are added that check a variety of configurations
including mixed implicit vs explicit, copying to a render attachment,
etc.

Bug: dawn:1704

Change-Id: I45f3b2b4e67d24a1fdc1111161f7de56da7ea9fc
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/150060
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/AttachmentState.cpp b/src/dawn/native/AttachmentState.cpp
index f783b74..c7ba823 100644
--- a/src/dawn/native/AttachmentState.cpp
+++ b/src/dawn/native/AttachmentState.cpp
@@ -279,4 +279,25 @@
 const std::vector<wgpu::TextureFormat>& AttachmentState::GetStorageAttachmentSlots() const {
     return mStorageAttachmentSlots;
 }
+
+std::vector<ColorAttachmentIndex>
+AttachmentState::ComputeStorageAttachmentPackingInColorAttachments() const {
+    // TODO(dawn:1704): Consider caching this on AttachmentState creation, but does it become part
+    // of the hashing and comparison operators? Fill with garbage data to more easily detect cases
+    // where an incorrect slot is accessed.
+    std::vector<ColorAttachmentIndex> result(
+        mStorageAttachmentSlots.size(), ColorAttachmentIndex(uint8_t(kMaxColorAttachments + 1)));
+
+    // Iterate over the empty bits of mColorAttachmentsSet to pack storage attachment in them.
+    auto availableSlots = ~mColorAttachmentsSet;
+    for (size_t i = 0; i < mStorageAttachmentSlots.size(); i++) {
+        DAWN_ASSERT(!availableSlots.none());
+        auto slot = ColorAttachmentIndex(uint8_t(ScanForward(availableSlots.to_ulong())));
+        availableSlots.reset(slot);
+        result[i] = slot;
+    }
+
+    return result;
+}
+
 }  // namespace dawn::native
diff --git a/src/dawn/native/AttachmentState.h b/src/dawn/native/AttachmentState.h
index 94383c6..a1ca097 100644
--- a/src/dawn/native/AttachmentState.h
+++ b/src/dawn/native/AttachmentState.h
@@ -68,6 +68,7 @@
     bool IsMSAARenderToSingleSampledEnabled() const;
     bool HasPixelLocalStorage() const;
     const std::vector<wgpu::TextureFormat>& GetStorageAttachmentSlots() const;
+    std::vector<ColorAttachmentIndex> ComputeStorageAttachmentPackingInColorAttachments() const;
 
     struct EqualityFunc {
         bool operator()(const AttachmentState* a, const AttachmentState* b) const;
diff --git a/src/dawn/native/CommandBuffer.cpp b/src/dawn/native/CommandBuffer.cpp
index 1e189bb..93bdc4d 100644
--- a/src/dawn/native/CommandBuffer.cpp
+++ b/src/dawn/native/CommandBuffer.cpp
@@ -224,6 +224,41 @@
         view->GetTexture()->SetIsSubresourceContentInitialized(
             attachmentInfo.stencilStoreOp == wgpu::StoreOp::Store, stencilRange);
     }
+
+    if (renderPass->attachmentState->HasPixelLocalStorage()) {
+        for (auto& attachmentInfo : renderPass->storageAttachments) {
+            TextureViewBase* view = attachmentInfo.storage.Get();
+
+            if (view == nullptr) {
+                continue;
+            }
+
+            DAWN_ASSERT(view->GetLayerCount() == 1);
+            DAWN_ASSERT(view->GetLevelCount() == 1);
+            const SubresourceRange& range = view->GetSubresourceRange();
+
+            // If the loadOp is Load, but the subresource is not initialized, use Clear instead.
+            if (attachmentInfo.loadOp == wgpu::LoadOp::Load &&
+                !view->GetTexture()->IsSubresourceContentInitialized(range)) {
+                attachmentInfo.loadOp = wgpu::LoadOp::Clear;
+                attachmentInfo.clearColor = {0.f, 0.f, 0.f, 0.f};
+            }
+
+            switch (attachmentInfo.storeOp) {
+                case wgpu::StoreOp::Store:
+                    view->GetTexture()->SetIsSubresourceContentInitialized(true, range);
+                    break;
+
+                case wgpu::StoreOp::Discard:
+                    view->GetTexture()->SetIsSubresourceContentInitialized(false, range);
+                    break;
+
+                case wgpu::StoreOp::Undefined:
+                    DAWN_UNREACHABLE();
+                    break;
+            }
+        }
+    }
 }
 
 bool IsFullBufferOverwrittenInTextureToBufferCopy(const CopyTextureToBufferCmd* copy) {
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index bf638d2..ee12458 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -1185,7 +1185,17 @@
             FindInChain(descriptor->nextInChain, &pls);
             if (pls != nullptr) {
                 for (size_t i = 0; i < pls->storageAttachmentCount; i++) {
-                    usageTracker.TextureViewUsedAs(pls->storageAttachments[i].storage,
+                    const RenderPassStorageAttachment& apiAttachment = pls->storageAttachments[i];
+                    RenderPassStorageAttachmentInfo* attachmentInfo =
+                        &cmd->storageAttachments[apiAttachment.offset / kPLSSlotByteSize];
+
+                    attachmentInfo->storage = apiAttachment.storage;
+                    attachmentInfo->loadOp = apiAttachment.loadOp;
+                    attachmentInfo->storeOp = apiAttachment.storeOp;
+                    attachmentInfo->clearColor = ClampClearColorValueToLegalRange(
+                        apiAttachment.clearValue, apiAttachment.storage->GetFormat());
+
+                    usageTracker.TextureViewUsedAs(apiAttachment.storage,
                                                    wgpu::TextureUsage::StorageAttachment);
                 }
             }
diff --git a/src/dawn/native/Commands.cpp b/src/dawn/native/Commands.cpp
index 1392ca9..ac9adc2 100644
--- a/src/dawn/native/Commands.cpp
+++ b/src/dawn/native/Commands.cpp
@@ -396,6 +396,9 @@
 RenderPassColorAttachmentInfo::RenderPassColorAttachmentInfo() = default;
 RenderPassColorAttachmentInfo::~RenderPassColorAttachmentInfo() = default;
 
+RenderPassStorageAttachmentInfo::RenderPassStorageAttachmentInfo() = default;
+RenderPassStorageAttachmentInfo::~RenderPassStorageAttachmentInfo() = default;
+
 RenderPassDepthStencilAttachmentInfo::RenderPassDepthStencilAttachmentInfo() = default;
 RenderPassDepthStencilAttachmentInfo::~RenderPassDepthStencilAttachmentInfo() = default;
 
diff --git a/src/dawn/native/Commands.h b/src/dawn/native/Commands.h
index 9602b81..4204880 100644
--- a/src/dawn/native/Commands.h
+++ b/src/dawn/native/Commands.h
@@ -122,6 +122,16 @@
     dawn::native::Color clearColor;
 };
 
+struct RenderPassStorageAttachmentInfo {
+    RenderPassStorageAttachmentInfo();
+    ~RenderPassStorageAttachmentInfo();
+
+    Ref<TextureViewBase> storage;
+    wgpu::LoadOp loadOp;
+    wgpu::StoreOp storeOp;
+    dawn::native::Color clearColor;
+};
+
 struct RenderPassDepthStencilAttachmentInfo {
     RenderPassDepthStencilAttachmentInfo();
     ~RenderPassDepthStencilAttachmentInfo();
@@ -146,6 +156,8 @@
         colorAttachments;
     RenderPassDepthStencilAttachmentInfo depthStencilAttachment;
 
+    std::array<RenderPassStorageAttachmentInfo, kMaxPLSSlots> storageAttachments;
+
     // Cache the width and height of all attachments for convenience
     uint32_t width;
     uint32_t height;
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index e084016..0b628d2 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -463,7 +463,7 @@
     // DAWN_ASSERT(device.IsLockedByCurrentThread())
     bool IsLockedByCurrentThreadIfNeeded() const;
 
-    // TODO(dawn:XXX): remove this enum forwarding once no longer necessary.
+    // TODO(dawn:1413): remove this enum forwarding once no longer necessary.
     using SubmitMode = ExecutionQueueBase::SubmitMode;
 
     // TODO(dawn:1413): Remove this proxy methods in favor of using the ExecutionQueue directly.
diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm
index c543b4d..eea91c1 100644
--- a/src/dawn/native/metal/BackendMTL.mm
+++ b/src/dawn/native/metal/BackendMTL.mm
@@ -564,6 +564,14 @@
             }
         }
 
+        if (@available(macOS 11.0, iOS 10.0, *)) {
+            // Image block functionality is available starting from the Apple4 family.
+            if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
+                EnableFeature(Feature::PixelLocalStorageCoherent);
+                EnableFeature(Feature::PixelLocalStorageNonCoherent);
+            }
+        }
+
         EnableFeature(Feature::IndirectFirstInstance);
         EnableFeature(Feature::ShaderF16);
         EnableFeature(Feature::RG11B10UfloatRenderable);
diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm
index 3bf552c..c17f524 100644
--- a/src/dawn/native/metal/CommandBufferMTL.mm
+++ b/src/dawn/native/metal/CommandBufferMTL.mm
@@ -328,6 +328,80 @@
         }
     }
 
+    if (renderPass->attachmentState->HasPixelLocalStorage()) {
+        const std::vector<wgpu::TextureFormat>& storageAttachmentSlots =
+            renderPass->attachmentState->GetStorageAttachmentSlots();
+        std::vector<ColorAttachmentIndex> storageAttachmentPacking =
+            renderPass->attachmentState->ComputeStorageAttachmentPackingInColorAttachments();
+
+        for (size_t attachment = 0; attachment < storageAttachmentSlots.size(); attachment++) {
+            uint8_t i = static_cast<uint8_t>(storageAttachmentPacking[attachment]);
+            MTLRenderPassColorAttachmentDescriptor* mtlAttachment = descriptor.colorAttachments[i];
+
+            // For implicit pixel local storage slots use transient memoryless textures.
+            if (storageAttachmentSlots[attachment] == wgpu::TextureFormat::Undefined) {
+                NSRef<MTLTextureDescriptor> texDescRef = AcquireNSRef([MTLTextureDescriptor new]);
+                MTLTextureDescriptor* texDesc = texDescRef.Get();
+                texDesc.textureType = MTLTextureType2D;
+                texDesc.width = renderPass->width;
+                texDesc.height = renderPass->height;
+                texDesc.usage = MTLTextureUsageRenderTarget;
+                if (@available(macOS 11.0, iOS 10.0, *)) {
+                    texDesc.storageMode = MTLStorageModeMemoryless;
+                } else {
+                    DAWN_UNREACHABLE();
+                }
+                texDesc.pixelFormat =
+                    MetalPixelFormat(device, RenderPipeline::kImplicitPLSSlotFormat);
+
+                NSPRef<id<MTLTexture>> implicitAttachment =
+                    AcquireNSPRef([device->GetMTLDevice() newTextureWithDescriptor:texDesc]);
+
+                mtlAttachment.loadAction = MTLLoadActionClear;
+                mtlAttachment.clearColor = MTLClearColorMake(0, 0, 0, 0);
+                mtlAttachment.storeAction = MTLStoreActionDontCare;
+                mtlAttachment.texture = *implicitAttachment;
+                continue;
+            }
+
+            auto& attachmentInfo = renderPass->storageAttachments[attachment];
+
+            switch (attachmentInfo.loadOp) {
+                case wgpu::LoadOp::Clear:
+                    mtlAttachment.loadAction = MTLLoadActionClear;
+                    mtlAttachment.clearColor =
+                        MTLClearColorMake(attachmentInfo.clearColor.r, attachmentInfo.clearColor.g,
+                                          attachmentInfo.clearColor.b, attachmentInfo.clearColor.a);
+                    break;
+
+                case wgpu::LoadOp::Load:
+                    mtlAttachment.loadAction = MTLLoadActionLoad;
+                    break;
+
+                case wgpu::LoadOp::Undefined:
+                    DAWN_UNREACHABLE();
+                    break;
+            }
+
+            switch (attachmentInfo.storeOp) {
+                case wgpu::StoreOp::Store:
+                    mtlAttachment.storeAction = MTLStoreActionStore;
+                    break;
+                case wgpu::StoreOp::Discard:
+                    mtlAttachment.storeAction = MTLStoreActionDontCare;
+                    break;
+                case wgpu::StoreOp::Undefined:
+                    DAWN_UNREACHABLE();
+                    break;
+            }
+
+            auto storageAttachment = ToBackend(attachmentInfo.storage)->GetAttachmentInfo();
+            mtlAttachment.texture = storageAttachment.texture.Get();
+            mtlAttachment.level = storageAttachment.baseMipLevel;
+            mtlAttachment.slice = storageAttachment.baseArrayLayer;
+        }
+    }
+
     return descriptorRef;
 }
 
@@ -746,12 +820,13 @@
         for (size_t i = 0; i < scope.textures.size(); ++i) {
             Texture* texture = ToBackend(scope.textures[i]);
 
-            // Clear subresources that are not render attachments. Render attachments will be
-            // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
-            // subresource has not been initialized before the render pass.
+            // Clear subresources that are not attachments. Attachments will be cleared in
+            // RecordBeginRenderPass by setting the loadop to clear when the texture subresource
+            // has not been initialized before the render pass.
             DAWN_TRY(scope.textureUsages[i].Iterate([&](const SubresourceRange& range,
                                                         wgpu::TextureUsage usage) -> MaybeError {
-                if (usage & ~wgpu::TextureUsage::RenderAttachment) {
+                if (usage & ~(wgpu::TextureUsage::RenderAttachment |
+                              wgpu::TextureUsage::StorageAttachment)) {
                     DAWN_TRY(texture->EnsureSubresourceContentInitialized(commandContext, range));
                 }
                 return {};
diff --git a/src/dawn/native/metal/DeviceMTL.h b/src/dawn/native/metal/DeviceMTL.h
index 6e29a98..62ba2e3 100644
--- a/src/dawn/native/metal/DeviceMTL.h
+++ b/src/dawn/native/metal/DeviceMTL.h
@@ -60,7 +60,7 @@
 
     MaybeError TickImpl() override;
 
-    id<MTLDevice> GetMTLDevice();
+    id<MTLDevice> GetMTLDevice() const;
 
     // TODO(dawn:1413) Use the metal::Queue directly instead of this proxy method.
     CommandRecordingContext* GetPendingCommandContext(
diff --git a/src/dawn/native/metal/DeviceMTL.mm b/src/dawn/native/metal/DeviceMTL.mm
index cd45955..30b1645 100644
--- a/src/dawn/native/metal/DeviceMTL.mm
+++ b/src/dawn/native/metal/DeviceMTL.mm
@@ -305,7 +305,7 @@
     return {};
 }
 
-id<MTLDevice> Device::GetMTLDevice() {
+id<MTLDevice> Device::GetMTLDevice() const {
     return mMtlDevice.Get();
 }
 
diff --git a/src/dawn/native/metal/RenderPipelineMTL.h b/src/dawn/native/metal/RenderPipelineMTL.h
index 0033dd6..01bbcb7 100644
--- a/src/dawn/native/metal/RenderPipelineMTL.h
+++ b/src/dawn/native/metal/RenderPipelineMTL.h
@@ -62,6 +62,7 @@
     uint32_t GetMtlVertexBufferIndex(VertexBufferSlot slot) const;
 
     wgpu::ShaderStage GetStagesRequiringStorageBufferLength() const;
+    static constexpr wgpu::TextureFormat kImplicitPLSSlotFormat = wgpu::TextureFormat::R32Uint;
 
     MaybeError Initialize() override;
 
diff --git a/src/dawn/native/metal/RenderPipelineMTL.mm b/src/dawn/native/metal/RenderPipelineMTL.mm
index e5028a5..990fe00 100644
--- a/src/dawn/native/metal/RenderPipelineMTL.mm
+++ b/src/dawn/native/metal/RenderPipelineMTL.mm
@@ -392,7 +392,8 @@
         ShaderModule::MetalFunctionData fragmentData;
         DAWN_TRY(ToBackend(fragmentStage.module.Get())
                      ->CreateFunction(SingleShaderStage::Fragment, fragmentStage,
-                                      ToBackend(GetLayout()), &fragmentData, GetSampleMask()));
+                                      ToBackend(GetLayout()), &fragmentData, GetSampleMask(),
+                                      this));
 
         descriptorMTL.fragmentFunction = fragmentData.function.Get();
         if (fragmentData.needsStorageBufferLength) {
@@ -407,6 +408,25 @@
             ComputeBlendDesc(descriptorMTL.colorAttachments[static_cast<uint8_t>(i)], descriptor,
                              fragmentOutputsWritten[i]);
         }
+
+        if (GetAttachmentState()->HasPixelLocalStorage()) {
+            const std::vector<wgpu::TextureFormat>& storageAttachmentSlots =
+                GetAttachmentState()->GetStorageAttachmentSlots();
+            std::vector<ColorAttachmentIndex> storageAttachmentPacking =
+                GetAttachmentState()->ComputeStorageAttachmentPackingInColorAttachments();
+
+            for (size_t i = 0; i < storageAttachmentSlots.size(); i++) {
+                uint8_t index = static_cast<uint8_t>(storageAttachmentPacking[i]);
+
+                if (storageAttachmentSlots[i] == wgpu::TextureFormat::Undefined) {
+                    descriptorMTL.colorAttachments[index].pixelFormat =
+                        MetalPixelFormat(GetDevice(), kImplicitPLSSlotFormat);
+                } else {
+                    descriptorMTL.colorAttachments[index].pixelFormat =
+                        MetalPixelFormat(GetDevice(), storageAttachmentSlots[i]);
+                }
+            }
+        }
     }
 
     if (HasDepthStencilAttachment()) {
diff --git a/src/dawn/native/metal/ShaderModuleMTL.mm b/src/dawn/native/metal/ShaderModuleMTL.mm
index c200a1e..acebf23 100644
--- a/src/dawn/native/metal/ShaderModuleMTL.mm
+++ b/src/dawn/native/metal/ShaderModuleMTL.mm
@@ -189,6 +189,19 @@
         substituteOverrideConfig = BuildSubstituteOverridesTransformConfig(programmableStage);
     }
 
+    tint::PixelLocalOptions pixelLocal;
+    if (stage == SingleShaderStage::Fragment && layout->HasPixelLocalStorage()) {
+        const AttachmentState* attachmentState = renderPipeline->GetAttachmentState();
+        const std::vector<wgpu::TextureFormat>& storageAttachmentSlots =
+            attachmentState->GetStorageAttachmentSlots();
+        std::vector<ColorAttachmentIndex> storageAttachmentPacking =
+            attachmentState->ComputeStorageAttachmentPackingInColorAttachments();
+
+        for (size_t i = 0; i < storageAttachmentSlots.size(); i++) {
+            pixelLocal.attachments[i] = uint8_t(storageAttachmentPacking[i]);
+        }
+    }
+
     MslCompilationRequest req = {};
     req.stage = stage;
     req.inputProgram = programmableStage.module->GetTintProgram();
@@ -208,6 +221,7 @@
     req.tintOptions.array_length_from_uniform = std::move(arrayLengthFromUniform);
     req.tintOptions.binding_remapper_options = std::move(bindingRemapper);
     req.tintOptions.external_texture_options = BuildExternalTextureTransformBindings(layout);
+    req.tintOptions.pixel_local_options = std::move(pixelLocal);
 
     const CombinedLimits& limits = device->GetLimits();
     req.limits = LimitsForCompilationRequest::Create(limits.v1);
diff --git a/src/dawn/native/metal/TextureMTL.mm b/src/dawn/native/metal/TextureMTL.mm
index 32377bf..2365b60 100644
--- a/src/dawn/native/metal/TextureMTL.mm
+++ b/src/dawn/native/metal/TextureMTL.mm
@@ -70,6 +70,11 @@
         result |= MTLTextureUsageRenderTarget;
     }
 
+    if (usage & wgpu::TextureUsage::StorageAttachment) {
+        // TODO(dawn:1704): Support PLS on non-tiler Metal devices.
+        result |= MTLTextureUsageRenderTarget;
+    }
+
     return result;
 }
 
@@ -98,8 +103,9 @@
                                     const TextureViewDescriptor* textureViewDescriptor) {
     constexpr wgpu::TextureUsage kShaderUsageNeedsView =
         wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::TextureBinding;
-    constexpr wgpu::TextureUsage kUsageNeedsView =
-        kShaderUsageNeedsView | wgpu::TextureUsage::RenderAttachment;
+    constexpr wgpu::TextureUsage kUsageNeedsView = kShaderUsageNeedsView |
+                                                   wgpu::TextureUsage::RenderAttachment |
+                                                   wgpu::TextureUsage::StorageAttachment;
     if ((texture->GetInternalUsage() & kUsageNeedsView) == 0) {
         return false;
     }
@@ -1369,7 +1375,8 @@
 }
 
 TextureView::AttachmentInfo TextureView::GetAttachmentInfo() const {
-    DAWN_ASSERT(GetTexture()->GetInternalUsage() & wgpu::TextureUsage::RenderAttachment);
+    DAWN_ASSERT(GetTexture()->GetInternalUsage() &
+                (wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::StorageAttachment));
     // Use our own view if the formats do not match.
     // If the formats do not match, format reinterpretation will be required.
     // Note: Depth/stencil formats don't support reinterpretation.
diff --git a/src/dawn/tests/BUILD.gn b/src/dawn/tests/BUILD.gn
index 2491bae..1b22bb2 100644
--- a/src/dawn/tests/BUILD.gn
+++ b/src/dawn/tests/BUILD.gn
@@ -573,6 +573,7 @@
     "end2end/OpArrayLengthTests.cpp",
     "end2end/PipelineCachingTests.cpp",
     "end2end/PipelineLayoutTests.cpp",
+    "end2end/PixelLocalStorageTests.cpp",
     "end2end/PrimitiveStateTests.cpp",
     "end2end/PrimitiveTopologyTests.cpp",
     "end2end/QueryTests.cpp",
diff --git a/src/dawn/tests/end2end/PixelLocalStorageTests.cpp b/src/dawn/tests/end2end/PixelLocalStorageTests.cpp
new file mode 100644
index 0000000..4522e4f
--- /dev/null
+++ b/src/dawn/tests/end2end/PixelLocalStorageTests.cpp
@@ -0,0 +1,630 @@
+// Copyright 2023 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <vector>
+
+#include "dawn/tests/DawnTest.h"
+#include "dawn/utils/ComboRenderPipelineDescriptor.h"
+#include "dawn/utils/WGPUHelpers.h"
+
+namespace dawn {
+namespace {
+
+class PixelLocalStorageTests : public DawnTest {
+  protected:
+    void SetUp() override {
+        DawnTest::SetUp();
+        DAWN_TEST_UNSUPPORTED_IF(
+            !device.HasFeature(wgpu::FeatureName::PixelLocalStorageCoherent) &&
+            !device.HasFeature(wgpu::FeatureName::PixelLocalStorageNonCoherent));
+
+        supportsCoherent = device.HasFeature(wgpu::FeatureName::PixelLocalStorageCoherent);
+    }
+
+    std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
+        std::vector<wgpu::FeatureName> requiredFeatures = {};
+        if (SupportsFeatures({wgpu::FeatureName::PixelLocalStorageCoherent})) {
+            requiredFeatures.push_back(wgpu::FeatureName::PixelLocalStorageCoherent);
+            supportsCoherent = true;
+        }
+        if (SupportsFeatures({wgpu::FeatureName::PixelLocalStorageNonCoherent})) {
+            requiredFeatures.push_back(wgpu::FeatureName::PixelLocalStorageNonCoherent);
+        }
+        return requiredFeatures;
+    }
+
+    struct StorageSpec {
+        uint64_t offset;
+        wgpu::TextureFormat format;
+        wgpu::LoadOp loadOp = wgpu::LoadOp::Clear;
+        wgpu::StoreOp storeOp = wgpu::StoreOp::Store;
+        wgpu::Color clearValue = {0, 0, 0, 0};
+        bool discardAfterInit = false;
+    };
+
+    enum class CheckMethod {
+        StorageBuffer,
+        ReadStorageAttachments,
+        RenderAttachment,
+    };
+
+    struct PLSSpec {
+        uint64_t totalSize;
+        std::vector<StorageSpec> attachments;
+        CheckMethod checkMethod = CheckMethod::ReadStorageAttachments;
+    };
+
+    // Builds a shader module with multiple entry points used for testing PLS.
+    //
+    //  - A trivial vertex entrypoint to render a point.
+    //  - Various fragment entrypoints using a pixel_local block matching the `spec`.
+    //    - An accumulator entrypoint adding (slot + 1) to each pls slot so we can check that
+    //      access to the PLS is correctly synchronized.
+    //    - An entrypoint copying the PLS data to a storage buffer for readback.
+    //    - An entrypoint copying the PLS data to a render attachment for readback.
+    wgpu::ShaderModule MakeTestModule(const PLSSpec& spec) const {
+        std::vector<const char*> plsTypes;
+        plsTypes.resize(spec.totalSize / kPLSSlotByteSize, "u32");
+        for (const auto& attachment : spec.attachments) {
+            switch (attachment.format) {
+                case wgpu::TextureFormat::R32Uint:
+                    plsTypes[attachment.offset / kPLSSlotByteSize] = "u32";
+                    break;
+                case wgpu::TextureFormat::R32Sint:
+                    plsTypes[attachment.offset / kPLSSlotByteSize] = "i32";
+                    break;
+                case wgpu::TextureFormat::R32Float:
+                    plsTypes[attachment.offset / kPLSSlotByteSize] = "f32";
+                    break;
+                default:
+                    DAWN_UNREACHABLE();
+            }
+        }
+
+        std::ostringstream o;
+        o << R"(
+            enable chromium_experimental_pixel_local;
+
+            @vertex fn vs() -> @builtin(position) vec4f {
+                return vec4f(0, 0, 0, 0.5);
+            }
+
+        )";
+        o << "struct PLS {\n";
+        for (size_t i = 0; i < plsTypes.size(); i++) {
+            // e.g.: a0 : u32,
+            o << "  a" << i << " : " << plsTypes[i] << ",\n";
+        }
+        o << "}\n";
+        o << "var<pixel_local> pls : PLS;\n";
+        o << "@fragment fn accumulator() {\n";
+        for (size_t i = 0; i < plsTypes.size(); i++) {
+            // e.g.: pls.a0 = pls.a0 + 1;
+            o << "    pls.a" << i << " = pls.a" << i << " + " << (i + 1) << ";\n";
+        }
+        o << "}\n";
+        o << "\n";
+        o << "@group(0) @binding(0) var<storage, read_write> readbackStorageBuffer : array<u32>;\n";
+        o << "@fragment fn readbackToStorageBuffer() {\n";
+        for (size_t i = 0; i < plsTypes.size(); i++) {
+            // e.g.: readbackStorageBuffer[0] = u32(pls.a0);
+            o << "    readbackStorageBuffer[" << i << "] = u32(pls.a" << i << ");\n";
+        }
+        o << "}\n";
+        o << "\n";
+        o << "@fragment fn copyToColorAttachment() -> @location(0) vec4f {\n";
+        o << "    var result : vec4f;\n";
+        for (size_t i = 0; i < plsTypes.size(); i++) {
+            // e.g.: result[0] = f32(pls.a0) / 255.0;
+            o << "    result[" << i << "] = f32(pls.a" << i << ") / 255.0;\n";
+        }
+        o << "    return result;";
+        o << "}\n";
+
+        return utils::CreateShaderModule(device, o.str().c_str());
+    }
+
+    wgpu::PipelineLayout MakeTestLayout(const PLSSpec& spec, wgpu::BindGroupLayout bgl = {}) const {
+        std::vector<wgpu::PipelineLayoutStorageAttachment> storageAttachments;
+        for (const auto& attachmentSpec : spec.attachments) {
+            wgpu::PipelineLayoutStorageAttachment attachment;
+            attachment.format = attachmentSpec.format;
+            attachment.offset = attachmentSpec.offset;
+            storageAttachments.push_back(attachment);
+        }
+
+        wgpu::PipelineLayoutPixelLocalStorage pls;
+        pls.totalPixelLocalStorageSize = spec.totalSize;
+        pls.storageAttachmentCount = storageAttachments.size();
+        pls.storageAttachments = storageAttachments.data();
+
+        wgpu::PipelineLayoutDescriptor plDesc;
+        plDesc.nextInChain = &pls;
+        plDesc.bindGroupLayoutCount = 0;
+        if (bgl != nullptr) {
+            plDesc.bindGroupLayoutCount = 1;
+            plDesc.bindGroupLayouts = &bgl;
+        }
+
+        return device.CreatePipelineLayout(&plDesc);
+    }
+
+    std::vector<wgpu::Texture> MakeTestStorageAttachments(const PLSSpec& spec) const {
+        std::vector<wgpu::Texture> attachments;
+        for (size_t i = 0; i < spec.attachments.size(); i++) {
+            const StorageSpec& attachmentSpec = spec.attachments[i];
+
+            wgpu::TextureDescriptor desc;
+            desc.format = attachmentSpec.format;
+            desc.size = {1, 1};
+            desc.usage = wgpu::TextureUsage::StorageAttachment | wgpu::TextureUsage::CopySrc |
+                         wgpu::TextureUsage::CopyDst;
+            if (attachmentSpec.discardAfterInit) {
+                desc.usage |= wgpu::TextureUsage::RenderAttachment;
+            }
+
+            wgpu::Texture attachment = device.CreateTexture(&desc);
+
+            // Initialize the attachment with 1s if LoadOp is Load, copying from another texture
+            // so that we avoid adding the extra RenderAttachment usage to the storage attachment.
+            if (attachmentSpec.loadOp == wgpu::LoadOp::Load) {
+                desc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+                wgpu::Texture clearedTexture = device.CreateTexture(&desc);
+
+                wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+
+                // The pass that clears clearedTexture.
+                utils::ComboRenderPassDescriptor rpDesc({clearedTexture.CreateView()});
+                rpDesc.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear;
+                rpDesc.cColorAttachments[0].clearValue = attachmentSpec.clearValue;
+                wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&rpDesc);
+                pass.End();
+
+                // Copy clearedTexture -> attachment.
+                wgpu::ImageCopyTexture src = utils::CreateImageCopyTexture(clearedTexture);
+                wgpu::ImageCopyTexture dst = utils::CreateImageCopyTexture(attachment);
+                wgpu::Extent3D copySize = {1, 1, 1};
+                encoder.CopyTextureToTexture(&src, &dst, &copySize);
+
+                wgpu::CommandBuffer commands = encoder.Finish();
+                queue.Submit(1, &commands);
+            }
+
+            // Discard after initialization to check that the lazy zero init is actually triggered
+            // (and it's not just that the resource happened to be zeroes already).
+            if (attachmentSpec.discardAfterInit) {
+                utils::ComboRenderPassDescriptor rpDesc({attachment.CreateView()});
+                rpDesc.cColorAttachments[0].loadOp = wgpu::LoadOp::Load;
+                rpDesc.cColorAttachments[0].storeOp = wgpu::StoreOp::Discard;
+
+                wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+                wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&rpDesc);
+                pass.End();
+                wgpu::CommandBuffer commands = encoder.Finish();
+                queue.Submit(1, &commands);
+            }
+
+            attachments.push_back(attachment);
+        }
+
+        return attachments;
+    }
+
+    wgpu::RenderPassEncoder BeginTestRenderPass(
+        const PLSSpec& spec,
+        const wgpu::CommandEncoder& encoder,
+        const std::vector<wgpu::Texture>& storageAttachments,
+        wgpu::Texture colorAttachment) const {
+        std::vector<wgpu::RenderPassStorageAttachment> attachmentDescs;
+        for (size_t i = 0; i < spec.attachments.size(); i++) {
+            const StorageSpec& attachmentSpec = spec.attachments[i];
+
+            wgpu::RenderPassStorageAttachment attachment;
+            attachment.storage = storageAttachments[i].CreateView();
+            attachment.offset = attachmentSpec.offset;
+            attachment.loadOp = attachmentSpec.loadOp;
+            attachment.storeOp = attachmentSpec.storeOp;
+            attachment.clearValue = attachmentSpec.clearValue;
+            attachmentDescs.push_back(attachment);
+        }
+
+        wgpu::RenderPassPixelLocalStorage rpPlsDesc;
+        rpPlsDesc.totalPixelLocalStorageSize = spec.totalSize;
+        rpPlsDesc.storageAttachmentCount = attachmentDescs.size();
+        rpPlsDesc.storageAttachments = attachmentDescs.data();
+
+        wgpu::RenderPassDescriptor rpDesc;
+        rpDesc.nextInChain = &rpPlsDesc;
+        rpDesc.colorAttachmentCount = 0;
+        rpDesc.depthStencilAttachment = nullptr;
+
+        wgpu::RenderPassColorAttachment rpColor;
+        if (colorAttachment != nullptr) {
+            rpColor.view = colorAttachment.CreateView();
+            rpColor.loadOp = wgpu::LoadOp::Clear;
+            rpColor.clearValue = {0, 0, 0, 0};
+            rpColor.storeOp = wgpu::StoreOp::Store;
+
+            rpDesc.colorAttachments = &rpColor;
+            rpDesc.colorAttachmentCount = 1;
+        }
+
+        return encoder.BeginRenderPass(&rpDesc);
+    }
+
+    uint32_t ComputeExpectedValue(const PLSSpec& spec, size_t slot) {
+        for (const StorageSpec& attachment : spec.attachments) {
+            if (attachment.offset / kPLSSlotByteSize != slot) {
+                continue;
+            }
+
+            // Compute the expected value depending on load/store ops by "replaying" the operations
+            // that would be done.
+            uint32_t expectedValue = 0;
+            if (!attachment.discardAfterInit) {
+                expectedValue = attachment.clearValue.r;
+            }
+            expectedValue += (slot + 1) * kIterations;
+
+            if (attachment.storeOp == wgpu::StoreOp::Discard) {
+                expectedValue = 0;
+            }
+
+            return expectedValue;
+        }
+
+        // This is not an explicit storage attachment.
+        return (slot + 1) * kIterations;
+    }
+
+    void CheckByReadingStorageAttachments(const PLSSpec& spec,
+                                          const std::vector<wgpu::Texture>& storageAttachments) {
+        for (size_t i = 0; i < spec.attachments.size(); i++) {
+            const StorageSpec& attachmentSpec = spec.attachments[i];
+            uint32_t slot = attachmentSpec.offset / kPLSSlotByteSize;
+
+            uint32_t expectedValue = ComputeExpectedValue(spec, slot);
+
+            switch (spec.attachments[i].format) {
+                case wgpu::TextureFormat::R32Float:
+                    EXPECT_TEXTURE_EQ(static_cast<float>(expectedValue), storageAttachments[i],
+                                      {0, 0});
+                    break;
+                case wgpu::TextureFormat::R32Uint:
+                case wgpu::TextureFormat::R32Sint:
+                    EXPECT_TEXTURE_EQ(expectedValue, storageAttachments[i], {0, 0});
+                    break;
+                default:
+                    DAWN_UNREACHABLE();
+            }
+        }
+    }
+
+    void CheckByReadingColorAttachment(const PLSSpec& spec, wgpu::Texture color) {
+        std::array<uint32_t, 4> expected = {0, 0, 0, 0};
+        for (size_t slot = 0; slot < spec.totalSize / kPLSSlotByteSize; slot++) {
+            expected[slot] = ComputeExpectedValue(spec, slot);
+        }
+
+        utils::RGBA8 expectedColor(expected[0], expected[1], expected[2], expected[3]);
+        EXPECT_TEXTURE_EQ(expectedColor, color, {0, 0});
+    }
+
+    void CheckByReadingStorageBuffer(const PLSSpec& spec, wgpu::Buffer buffer) {
+        for (size_t slot = 0; slot < spec.totalSize / kPLSSlotByteSize; slot++) {
+            uint32_t expectedValue = ComputeExpectedValue(spec, slot);
+            EXPECT_BUFFER_U32_EQ(expectedValue, buffer, slot * kPLSSlotByteSize);
+        }
+    }
+
+    bool RequiresColorAttachment(const PLSSpec& spec) {
+        return spec.attachments.empty() || spec.checkMethod == CheckMethod::RenderAttachment;
+    }
+
+    void SetColorTargets(const PLSSpec& spec,
+                         utils::ComboRenderPipelineDescriptor* desc,
+                         bool writesColor) {
+        if (RequiresColorAttachment(spec)) {
+            desc->cFragment.targetCount = 1;
+            desc->cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+            desc->cTargets[0].writeMask =
+                writesColor ? wgpu::ColorWriteMask::All : wgpu::ColorWriteMask::None;
+        } else {
+            desc->cFragment.targetCount = 0;
+        }
+    }
+
+    void DoTest(const PLSSpec& spec) {
+        wgpu::ShaderModule module = MakeTestModule(spec);
+
+        // Make the pipeline that will draw a point that adds i to the i-th slot of the PLS.
+        wgpu::RenderPipeline accumulatorPipeline;
+        {
+            utils::ComboRenderPipelineDescriptor desc;
+            desc.layout = MakeTestLayout(spec);
+            desc.vertex.module = module;
+            desc.vertex.entryPoint = "vs";
+            desc.cFragment.module = module;
+            desc.cFragment.entryPoint = "accumulator";
+            desc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+            SetColorTargets(spec, &desc, false);
+            accumulatorPipeline = device.CreateRenderPipeline(&desc);
+        }
+
+        wgpu::RenderPipeline checkPipeline;
+        wgpu::BindGroup checkBindGroup;
+        wgpu::Buffer readbackStorageBuffer;
+
+        if (spec.checkMethod == CheckMethod::StorageBuffer) {
+            // Make the pipeline copying the PLS to the storage buffer.
+            wgpu::BindGroupLayout bgl = utils::MakeBindGroupLayout(
+                device, {{0, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Storage}});
+
+            utils::ComboRenderPipelineDescriptor desc;
+            desc.layout = MakeTestLayout(spec, bgl);
+            desc.vertex.module = module;
+            desc.vertex.entryPoint = "vs";
+            desc.cFragment.module = module;
+            desc.cFragment.entryPoint = "readbackToStorageBuffer";
+            desc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+            SetColorTargets(spec, &desc, false);
+            checkPipeline = device.CreateRenderPipeline(&desc);
+
+            wgpu::BufferDescriptor bufDesc;
+            bufDesc.size = spec.totalSize;
+            bufDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
+            readbackStorageBuffer = device.CreateBuffer(&bufDesc);
+
+            checkBindGroup = utils::MakeBindGroup(device, bgl, {{0, readbackStorageBuffer}});
+        }
+        if (spec.checkMethod == CheckMethod::RenderAttachment) {
+            // Make the pipeline copying the PLS to the render attachment.
+            utils::ComboRenderPipelineDescriptor desc;
+            desc.layout = MakeTestLayout(spec);
+            desc.vertex.module = module;
+            desc.vertex.entryPoint = "vs";
+            desc.cFragment.module = module;
+            desc.cFragment.entryPoint = "copyToColorAttachment";
+            desc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+            SetColorTargets(spec, &desc, true);
+            checkPipeline = device.CreateRenderPipeline(&desc);
+        }
+
+        // Make all the attachments.
+        std::vector<wgpu::Texture> storageAttachments = MakeTestStorageAttachments(spec);
+
+        wgpu::Texture colorAttachment;
+        if (RequiresColorAttachment(spec)) {
+            wgpu::TextureDescriptor desc;
+            desc.size = {1, 1};
+            desc.format = wgpu::TextureFormat::RGBA8Unorm;
+            desc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+            colorAttachment = device.CreateTexture(&desc);
+        }
+
+        {
+            // Build the render pass with the specified storage attachments
+            wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+            wgpu::RenderPassEncoder pass =
+                BeginTestRenderPass(spec, encoder, storageAttachments, colorAttachment);
+
+            // Draw the points accumulating to PLS, with a PLS barrier if needed.
+            pass.SetPipeline(accumulatorPipeline);
+            if (supportsCoherent) {
+                pass.Draw(kIterations);
+            } else {
+                for (uint32_t i = 0; i < kIterations; i++) {
+                    pass.Draw(1);
+                    pass.PixelLocalStorageBarrier();
+                }
+            }
+
+            // Run the checkPipeline, if any.
+            if (checkPipeline != nullptr) {
+                pass.SetPipeline(checkPipeline);
+                if (checkBindGroup != nullptr) {
+                    pass.SetBindGroup(0, checkBindGroup);
+                }
+                pass.Draw(1);
+            }
+
+            pass.End();
+            wgpu::CommandBuffer commands = encoder.Finish();
+            queue.Submit(1, &commands);
+        }
+
+        switch (spec.checkMethod) {
+            case CheckMethod::StorageBuffer:
+                CheckByReadingStorageBuffer(spec, readbackStorageBuffer);
+                break;
+            case CheckMethod::ReadStorageAttachments:
+                CheckByReadingStorageAttachments(spec, storageAttachments);
+                break;
+            case CheckMethod::RenderAttachment:
+                CheckByReadingColorAttachment(spec, colorAttachment);
+                break;
+        }
+
+        // Youpi!
+    }
+
+    static constexpr uint32_t kIterations = 10;
+    bool supportsCoherent;
+};
+
+// Test that the various supported PLS format work for accumulation.
+TEST_P(PixelLocalStorageTests, Formats) {
+    for (const auto format : {wgpu::TextureFormat::R32Uint, wgpu::TextureFormat::R32Sint,
+                              wgpu::TextureFormat::R32Float}) {
+        PLSSpec spec = {4, {{0, format}}};
+        DoTest(spec);
+    }
+}
+
+// Tests the storage attachment load ops
+TEST_P(PixelLocalStorageTests, LoadOp) {
+    // Test LoadOp::Clear with a couple values.
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].loadOp = wgpu::LoadOp::Clear;
+
+        spec.attachments[0].clearValue.r = 42;
+        DoTest(spec);
+
+        spec.attachments[0].clearValue.r = 38;
+        DoTest(spec);
+    }
+
+    // Test LoadOp::Load (the test helper clears the texture to clearValue).
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].clearValue.r = 18;
+        spec.attachments[0].loadOp = wgpu::LoadOp::Load;
+        DoTest(spec);
+    }
+}
+
+// Tests the storage attachment store ops
+TEST_P(PixelLocalStorageTests, StoreOp) {
+    // Test StoreOp::Store.
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].storeOp = wgpu::StoreOp::Store;
+        DoTest(spec);
+    }
+
+    // Test StoreOp::Discard.
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].storeOp = wgpu::StoreOp::Discard;
+        DoTest(spec);
+    }
+}
+
+// Test lazy zero initialization of the storage attachments.
+TEST_P(PixelLocalStorageTests, ZeroInit) {
+    // Discard causes the storage attachment to be lazy zeroed.
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].storeOp = wgpu::StoreOp::Discard;
+        DoTest(spec);
+    }
+
+    // Discard before using as a storage attachment, it should be lazy-cleared.
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
+        spec.attachments[0].loadOp = wgpu::LoadOp::Load;
+        spec.attachments[0].clearValue.r = 18;
+        spec.attachments[0].discardAfterInit = true;
+        DoTest(spec);
+    }
+}
+
+// Test many explicit storage attachments.
+TEST_P(PixelLocalStorageTests, MultipleStorageAttachments) {
+    PLSSpec spec = {16,
+                    {
+                        {0, wgpu::TextureFormat::R32Sint},
+                        {4, wgpu::TextureFormat::R32Uint},
+                        {8, wgpu::TextureFormat::R32Float},
+                        {12, wgpu::TextureFormat::R32Sint},
+                    }};
+    DoTest(spec);
+}
+
+// Test explicit storage attachments in inverse offset order
+TEST_P(PixelLocalStorageTests, InvertedOffsetOrder) {
+    PLSSpec spec = {8,
+                    {
+                        {4, wgpu::TextureFormat::R32Uint},
+                        {0, wgpu::TextureFormat::R32Sint},
+                    }};
+    DoTest(spec);
+}
+
+// Test implicit pixel local slot.
+TEST_P(PixelLocalStorageTests, ImplicitSlot) {
+    PLSSpec spec = {4, {}, CheckMethod::StorageBuffer};
+    DoTest(spec);
+}
+
+// Test multiple implicit pixel local slot.
+TEST_P(PixelLocalStorageTests, MultipleImplicitSlot) {
+    PLSSpec spec = {16, {}, CheckMethod::StorageBuffer};
+    DoTest(spec);
+}
+
+// Test mixed implicit / explicit pixel local slot.
+TEST_P(PixelLocalStorageTests, MixedImplicitExplicit) {
+    {
+        PLSSpec spec = {16,
+                        {{4, wgpu::TextureFormat::R32Uint}, {8, wgpu::TextureFormat::R32Float}},
+                        CheckMethod::StorageBuffer};
+        DoTest(spec);
+    }
+    {
+        PLSSpec spec = {16,
+                        {{4, wgpu::TextureFormat::R32Uint}, {12, wgpu::TextureFormat::R32Float}},
+                        CheckMethod::StorageBuffer};
+        DoTest(spec);
+    }
+    {
+        PLSSpec spec = {16,
+                        {{0, wgpu::TextureFormat::R32Uint}, {12, wgpu::TextureFormat::R32Float}},
+                        CheckMethod::StorageBuffer};
+        DoTest(spec);
+    }
+}
+
+// Test using PLS and then copying it to a render attachment, fully implicit version.
+TEST_P(PixelLocalStorageTests, CopyToRenderAttachment) {
+    {
+        PLSSpec spec = {4, {}, CheckMethod::RenderAttachment};
+        DoTest(spec);
+    }
+    {
+        PLSSpec spec = {16, {}, CheckMethod::RenderAttachment};
+        DoTest(spec);
+    }
+}
+
+// Test using PLS and then copying it to a render attachment, fully implicit version.
+TEST_P(PixelLocalStorageTests, CopyToRenderAttachmentWithStorageAttachments) {
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Float}}, CheckMethod::RenderAttachment};
+        DoTest(spec);
+    }
+    {
+        PLSSpec spec = {16, {{8, wgpu::TextureFormat::R32Uint}}, CheckMethod::RenderAttachment};
+        DoTest(spec);
+    }
+}
+
+DAWN_INSTANTIATE_TEST(PixelLocalStorageTests, MetalBackend());
+
+}  // anonymous namespace
+}  // namespace dawn