D3D11: Implement Pixel Local Storage

This patch implements Pixel Local Storage (PLS) on D3D11 backends.
- The PLS UAVs are always allocated at the end of all RTV and UAV
  slots when creating the render pipeline.
- The PLS UAVs are treated as fixed UAVs in BindGroupTracker and
  will always be set together with other UAVs in BindGroups. The
  the UAV slots in OMSetRenderTargetsAndUnorderedAccessViews() are
  allocated as below:
  (RTVs) , null, null, ..., UAVs-from-bindgroups, UAVs-for-PLS
- Internal texture views are created to implement implicit PLS.

Bug: dawn:1704
Test: dawn_end2end_tests

Change-Id: I70a46f48d35c26a87a251076c03b28179ba54fb9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/161601
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
Reviewed-by: Peng Huang <penghuang@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Peng Huang <penghuang@google.com>
diff --git a/src/dawn/native/RenderPipeline.h b/src/dawn/native/RenderPipeline.h
index 6dcb2f6..9ba6f0c 100644
--- a/src/dawn/native/RenderPipeline.h
+++ b/src/dawn/native/RenderPipeline.h
@@ -142,6 +142,8 @@
         bool operator()(const RenderPipelineBase* a, const RenderPipelineBase* b) const;
     };
 
+    static constexpr wgpu::TextureFormat kImplicitPLSSlotFormat = wgpu::TextureFormat::R32Uint;
+
   protected:
     void DestroyImpl() override;
 
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
index 819a1b9..92fad27 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.cpp
@@ -130,12 +130,15 @@
 
 }  // namespace
 
-BindGroupTracker::BindGroupTracker(const ScopedSwapStateCommandRecordingContext* commandContext,
-                                   bool isRenderPass)
+BindGroupTracker::BindGroupTracker(
+    const ScopedSwapStateCommandRecordingContext* commandContext,
+    bool isRenderPass,
+    std::vector<ComPtr<ID3D11UnorderedAccessView>> pixelLocalStorageUAVs)
     : mCommandContext(commandContext),
       mIsRenderPass(isRenderPass),
       mVisibleStages(isRenderPass ? wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment
-                                  : wgpu::ShaderStage::Compute) {
+                                  : wgpu::ShaderStage::Compute),
+      mPixelLocalStorageUAVs(std::move(pixelLocalStorageUAVs)) {
     mLastAppliedPipelineLayout = mCommandContext->GetDevice()->GetEmptyPipelineLayout();
 }
 
@@ -160,7 +163,7 @@
         // all UAV slot assignments in the bind groups, and then bind them all together.
         const BindGroupLayoutMask uavBindGroups =
             ToBackend(mPipelineLayout)->GetUAVBindGroupLayoutsMask();
-        std::vector<ComPtr<ID3D11UnorderedAccessView>> d3d11UAVs;
+        std::vector<ComPtr<ID3D11UnorderedAccessView>> uavsInBindGroup;
         for (BindGroupIndex index : IterateBitSet(uavBindGroups)) {
             BindGroupBase* group = mBindGroups[index];
             const ityp::vector<BindingIndex, uint64_t>& dynamicOffsets = mDynamicOffsets[index];
@@ -189,7 +192,8 @@
                                                               ->CreateD3D11UnorderedAccessView1(
                                                                   offset, binding.size));
                                 ToBackend(binding.buffer)->MarkMutated();
-                                d3d11UAVs.insert(d3d11UAVs.begin(), std::move(d3d11UAV));
+                                uavsInBindGroup.insert(uavsInBindGroup.begin(),
+                                                       std::move(d3d11UAV));
                                 break;
                             }
                             case wgpu::BufferBindingType::Uniform:
@@ -210,7 +214,8 @@
                                     ToBackend(group->GetBindingAsTextureView(bindingIndex));
                                 DAWN_TRY_ASSIGN(d3d11UAV,
                                                 view->GetOrCreateD3D11UnorderedAccessView());
-                                d3d11UAVs.insert(d3d11UAVs.begin(), std::move(d3d11UAV));
+                                uavsInBindGroup.insert(uavsInBindGroup.begin(),
+                                                       std::move(d3d11UAV));
                                 break;
                             }
                             case wgpu::StorageTextureAccess::ReadOnly:
@@ -229,15 +234,20 @@
                 }
             }
         }
+
         uint32_t uavSlotCount = ToBackend(mPipelineLayout->GetDevice())->GetUAVSlotCount();
         std::vector<ID3D11UnorderedAccessView*> views;
-        for (auto& uav : d3d11UAVs) {
+        views.reserve(uavsInBindGroup.size() + mPixelLocalStorageUAVs.size());
+        for (auto& uav : uavsInBindGroup) {
             views.push_back(uav.Get());
         }
+        for (auto& uav : mPixelLocalStorageUAVs) {
+            views.push_back(uav.Get());
+        }
+        DAWN_ASSERT(uavSlotCount >= views.size());
         mCommandContext->GetD3D11DeviceContext4()->OMSetRenderTargetsAndUnorderedAccessViews(
             D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr,
-            uavSlotCount - d3d11UAVs.size(), d3d11UAVs.size(), views.data(), nullptr);
-        d3d11UAVs.clear();
+            uavSlotCount - views.size(), views.size(), views.data(), nullptr);
     } else {
         BindGroupLayoutMask inheritedGroups =
             mPipelineLayout->InheritedGroupsMask(mLastAppliedPipelineLayout);
diff --git a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
index ba70285..56e461a 100644
--- a/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
+++ b/src/dawn/native/d3d11/BindGroupTrackerD3D11.h
@@ -28,6 +28,8 @@
 #ifndef SRC_DAWN_NATIVE_D3D11_BINDGROUPTRACKERD3D11_H_
 #define SRC_DAWN_NATIVE_D3D11_BINDGROUPTRACKERD3D11_H_
 
+#include <vector>
+
 #include "dawn/native/BindGroupTracker.h"
 #include "dawn/native/d3d/d3d_platform.h"
 
@@ -36,11 +38,16 @@
 class ScopedSwapStateCommandRecordingContext;
 
 // We need convert WebGPU bind slot to d3d11 bind slot according a map in PipelineLayout, so we
-// cannot inherit BindGroupTrackerGroups.
+// cannot inherit BindGroupTrackerGroups. Currently we arrange all the RTVs and UAVs when calling
+// OMSetRenderTargetsAndUnorderedAccessViews() with below rules:
+// - RTVs from the first register (r0)
+// - UAVs in bind groups
+// - Pixel Local Storage UAVs
 class BindGroupTracker : public BindGroupTrackerBase</*CanInheritBindGroups=*/true, uint64_t> {
   public:
     BindGroupTracker(const ScopedSwapStateCommandRecordingContext* commandContext,
-                     bool isRenderPass);
+                     bool isRenderPass,
+                     std::vector<ComPtr<ID3D11UnorderedAccessView>> pixelLocalStorageUAVs = {});
     ~BindGroupTracker();
     MaybeError Apply();
 
@@ -51,6 +58,8 @@
     const ScopedSwapStateCommandRecordingContext* mCommandContext;
     const bool mIsRenderPass;
     const wgpu::ShaderStage mVisibleStages;
+    // All the pixel local storage UAVs
+    const std::vector<ComPtr<ID3D11UnorderedAccessView>> mPixelLocalStorageUAVs;
 };
 
 }  // namespace dawn::native::d3d11
diff --git a/src/dawn/native/d3d11/CommandBufferD3D11.cpp b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
index 2f5367d..89903ed 100644
--- a/src/dawn/native/d3d11/CommandBufferD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
@@ -30,6 +30,7 @@
 #include <algorithm>
 #include <array>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "dawn/common/WindowsUtils.h"
@@ -128,6 +129,101 @@
     return {};
 }
 
+// Create a texture as an implicit pixel local attachment of the render pass.
+ResultOrError<Ref<TextureViewBase>> CreateImplicitPixelLocalAttachment(
+    DeviceBase* device,
+    const BeginRenderPassCmd* renderPass) {
+    TextureDescriptor desc;
+    desc.dimension = wgpu::TextureDimension::e2D;
+    desc.format = RenderPipelineBase::kImplicitPLSSlotFormat;
+    desc.usage = wgpu::TextureUsage::StorageAttachment;
+    desc.size = {renderPass->width, renderPass->height, 1};
+    Ref<TextureBase> texture;
+    DAWN_TRY_ASSIGN(texture, device->CreateTexture(&desc));
+
+    Ref<TextureViewBase> textureView;
+    DAWN_TRY_ASSIGN(textureView, texture->CreateView());
+
+    return textureView;
+}
+
+// Handle pixel local storage attachments and return a vector of all pixel local storage UAVs.
+// - For implicit attachments, create the texture and clear it to 0.
+// - For explicit attachments, clear them to the specified clear color if their load operation is
+//   `clear`
+ResultOrError<std::vector<ComPtr<ID3D11UnorderedAccessView>>>
+HandlePixelLocalStorageAndGetPixelLocalStorageUAVs(
+    DeviceBase* device,
+    const BeginRenderPassCmd* renderPass,
+    const ScopedSwapStateCommandRecordingContext* commandContext) {
+    std::vector<ComPtr<ID3D11UnorderedAccessView>> pixelLocalStorageUAVs;
+    auto d3d11DeviceContext = commandContext->GetD3D11DeviceContext4();
+
+    const std::vector<wgpu::TextureFormat>& storageAttachmentSlots =
+        renderPass->attachmentState->GetStorageAttachmentSlots();
+    for (size_t attachment = 0; attachment < storageAttachmentSlots.size(); attachment++) {
+        ComPtr<ID3D11UnorderedAccessView> pixelLocalStorageUAV;
+        if (storageAttachmentSlots[attachment] == wgpu::TextureFormat::Undefined) {
+            // Create the texture as implicit pixel local storage attachment
+            // TODO(dawn:1704): Optimize this by creating a single 2D array texture and reusing it
+            // across different render passes.
+            Ref<TextureViewBase> implicitPixelLocalStorageTextureView;
+            DAWN_TRY_ASSIGN(implicitPixelLocalStorageTextureView,
+                            CreateImplicitPixelLocalAttachment(device, renderPass));
+
+            // Get and clear the UAV of the implicit pixel local storage attachment
+            DAWN_TRY_ASSIGN(pixelLocalStorageUAV,
+                            ToBackend(implicitPixelLocalStorageTextureView.Get())
+                                ->GetOrCreateD3D11UnorderedAccessView());
+
+            uint32_t clearValue[4] = {0, 0, 0, 0};
+            d3d11DeviceContext->ClearUnorderedAccessViewUint(pixelLocalStorageUAV.Get(),
+                                                             clearValue);
+        } else {
+            // Get the UAV of the explicit pixel local storage attachment
+            auto& attachmentInfo = renderPass->storageAttachments[attachment];
+            DAWN_TRY_ASSIGN(
+                pixelLocalStorageUAV,
+                ToBackend(attachmentInfo.storage.Get())->GetOrCreateD3D11UnorderedAccessView());
+
+            // Execute the load operation of the pixel local storage attachment
+            switch (attachmentInfo.loadOp) {
+                case wgpu::LoadOp::Clear: {
+                    switch (attachmentInfo.storage->GetFormat().format) {
+                        case wgpu::TextureFormat::R32Float: {
+                            float clearValue[4] = {static_cast<float>(attachmentInfo.clearColor.r),
+                                                   0, 0, 0};
+                            d3d11DeviceContext->ClearUnorderedAccessViewFloat(
+                                pixelLocalStorageUAV.Get(), clearValue);
+                            break;
+                        }
+                        case wgpu::TextureFormat::R32Sint:
+                        case wgpu::TextureFormat::R32Uint: {
+                            uint32_t clearValue[4] = {
+                                static_cast<uint32_t>(attachmentInfo.clearColor.r), 0, 0, 0};
+                            d3d11DeviceContext->ClearUnorderedAccessViewUint(
+                                pixelLocalStorageUAV.Get(), clearValue);
+                            break;
+                        }
+                        default:
+                            DAWN_UNREACHABLE();
+                            break;
+                    }
+                    break;
+                }
+                case wgpu::LoadOp::Load:
+                    break;
+                case wgpu::LoadOp::Undefined:
+                    DAWN_UNREACHABLE();
+                    break;
+            }
+        }
+        pixelLocalStorageUAVs.push_back(pixelLocalStorageUAV);
+    }
+
+    return pixelLocalStorageUAVs;
+}
+
 }  // namespace
 
 // Create CommandBuffer
@@ -141,12 +237,16 @@
         for (size_t i = 0; i < scope.textures.size(); i++) {
             Texture* texture = ToBackend(scope.textures[i]);
 
-            // Clear subresources that are not render attachments. Render attachments will be
-            // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
-            // subresource has not been initialized before the render pass.
+            // Clear subresources that are not render attachments or storage attachment. Render
+            // attachments will be cleared in RecordBeginRenderPass by setting the loadop to clear
+            // when the texture subresource has not been initialized before the render pass. Storage
+            // attachments will also be cleared in RecordBeginRenderPass by
+            // ClearUnorderedAccessView*() when the texture subresource has not been initialized
+            // before the render pass.
             DAWN_TRY(scope.textureSyncInfos[i].Iterate([&](const SubresourceRange& range,
                                                            TextureSyncInfo syncInfo) -> MaybeError {
-                if (syncInfo.usage & ~wgpu::TextureUsage::RenderAttachment) {
+                if (syncInfo.usage & ~(wgpu::TextureUsage::RenderAttachment |
+                                       wgpu::TextureUsage::StorageAttachment)) {
                     DAWN_TRY(texture->EnsureSubresourceContentInitialized(commandContext, range));
                 }
                 return {};
@@ -530,6 +630,12 @@
     d3d11DeviceContext->OMSetRenderTargets(static_cast<uint8_t>(attachmentCount),
                                            d3d11RenderTargetViews.data(), d3d11DepthStencilView);
 
+    std::vector<ComPtr<ID3D11UnorderedAccessView>> pixelLocalStorageUAVs;
+    if (renderPass->attachmentState->HasPixelLocalStorage()) {
+        DAWN_TRY_ASSIGN(pixelLocalStorageUAVs, HandlePixelLocalStorageAndGetPixelLocalStorageUAVs(
+                                                   GetDevice(), renderPass, commandContext));
+    }
+
     // Set viewport
     D3D11_VIEWPORT defautViewport;
     defautViewport.TopLeftX = 0;
@@ -549,7 +655,8 @@
     d3d11DeviceContext->RSSetScissorRects(1, &scissor);
 
     RenderPipeline* lastPipeline = nullptr;
-    BindGroupTracker bindGroupTracker(commandContext, /*isRenderPass=*/true);
+    BindGroupTracker bindGroupTracker(commandContext, /*isRenderPass=*/true,
+                                      std::move(pixelLocalStorageUAVs));
     VertexBufferTracker vertexBufferTracker(commandContext);
     std::array<float, 4> blendColor = {0.0f, 0.0f, 0.0f, 0.0f};
     uint32_t stencilReference = 0;
diff --git a/src/dawn/native/d3d11/DeviceInfoD3D11.cpp b/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
index a09a830..ec1deed 100644
--- a/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
+++ b/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
@@ -44,6 +44,7 @@
         "D3D11_FEATURE_D3D11_OPTIONS2"));
 
     info.isUMA = options2.UnifiedMemoryArchitecture;
+    info.supportsROV = options2.ROVsSupported;
 
     info.shaderModel = 50;
     // Profiles are always <stage>s_<minor>_<major> so we build the s_<minor>_major and add
diff --git a/src/dawn/native/d3d11/DeviceInfoD3D11.h b/src/dawn/native/d3d11/DeviceInfoD3D11.h
index 4129802..83423f1 100644
--- a/src/dawn/native/d3d11/DeviceInfoD3D11.h
+++ b/src/dawn/native/d3d11/DeviceInfoD3D11.h
@@ -44,6 +44,7 @@
     uint32_t shaderModel;
     PerStage<std::wstring> shaderProfiles;
     bool supportsSharedResourceCapabilityTier2;
+    bool supportsROV;
     size_t dedicatedVideoMemory;
     size_t sharedSystemMemory;
 };
diff --git a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
index 86fa5f8..f1d8bda 100644
--- a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
@@ -194,6 +194,9 @@
         EnableFeature(Feature::DawnMultiPlanarFormats);
         EnableFeature(Feature::MultiPlanarFormatP010);
     }
+    if (mDeviceInfo.supportsROV) {
+        EnableFeature(Feature::PixelLocalStorageCoherent);
+    }
 
     EnableFeature(Feature::SharedTextureMemoryDXGISharedHandle);
     EnableFeature(Feature::SharedTextureMemoryD3D11Texture2D);
diff --git a/src/dawn/native/d3d11/PipelineLayoutD3D11.cpp b/src/dawn/native/d3d11/PipelineLayoutD3D11.cpp
index abca72d..b1e5f9a 100644
--- a/src/dawn/native/d3d11/PipelineLayoutD3D11.cpp
+++ b/src/dawn/native/d3d11/PipelineLayoutD3D11.cpp
@@ -50,9 +50,11 @@
     // resource slots when being written out. So we assign UAV binding index decreasingly here.
     // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-omsetrendertargetsandunorderedaccessviews
     // TODO(dawn:1818): Support testing on both FL11_0 and FL11_1.
-    uint32_t unorderedAccessViewIndex = device->GetUAVSlotCount();
-    mTotalUAVBindingCount = unorderedAccessViewIndex;
+    mTotalUAVBindingCount = device->GetUAVSlotCount();
 
+    // Reserve last several UAV slots for Pixel Local Storage attachments.
+    uint32_t unorderedAccessViewIndex =
+        mTotalUAVBindingCount - static_cast<uint32_t>(GetStorageAttachmentSlots().size());
     for (BindGroupIndex group : IterateBitSet(GetBindGroupLayoutsMask())) {
         const BindGroupLayoutInternalBase* bgl = GetBindGroupLayout(group);
         mIndexInfo[group].resize(bgl->GetBindingCount());
diff --git a/src/dawn/native/d3d11/RenderPipelineD3D11.cpp b/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
index 8ef9ca4..dc0f260 100644
--- a/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
+++ b/src/dawn/native/d3d11/RenderPipelineD3D11.cpp
@@ -462,13 +462,57 @@
         mUsesInstanceIndex = compiledShader[SingleShaderStage::Vertex].usesInstanceIndex;
     }
 
+    std::optional<tint::PixelLocalOptions> pixelLocalOptions;
     if (GetStageMask() & wgpu::ShaderStage::Fragment) {
+        pixelLocalOptions = tint::PixelLocalOptions();
+        // HLSL SM5.0 doesn't support groups, so we set group index to 0.
+        pixelLocalOptions->pixel_local_group_index = 0;
+
+        if (GetAttachmentState()->HasPixelLocalStorage()) {
+            const std::vector<wgpu::TextureFormat>& storageAttachmentSlots =
+                GetAttachmentState()->GetStorageAttachmentSlots();
+            DAWN_ASSERT(ToBackend(GetLayout())->GetTotalUAVBindingCount() >
+                        storageAttachmentSlots.size());
+            // Currently all the pixel local storage UAVs are allocated at the last several UAV
+            // slots. For example, when there are 4 pixel local storage attachments, we will
+            // allocate register u60 to u63 for them.
+            uint32_t basePixelLocalAttachmentIndex =
+                ToBackend(GetLayout())->GetTotalUAVBindingCount() -
+                static_cast<uint32_t>(storageAttachmentSlots.size());
+            for (size_t i = 0; i < storageAttachmentSlots.size(); i++) {
+                pixelLocalOptions->attachments[i] = basePixelLocalAttachmentIndex + i;
+
+                static_assert(
+                    RenderPipelineBase::kImplicitPLSSlotFormat == wgpu::TextureFormat::R32Uint,
+                    "The implicit Pixel Local Storage format should be R32Uint.");
+                switch (storageAttachmentSlots[i]) {
+                        // We use R32Uint as default pixel local storage attachment format
+                    case wgpu::TextureFormat::Undefined:
+                    case wgpu::TextureFormat::R32Uint:
+                        pixelLocalOptions->attachment_formats[i] =
+                            tint::PixelLocalOptions::TexelFormat::kR32Uint;
+                        break;
+                    case wgpu::TextureFormat::R32Sint:
+                        pixelLocalOptions->attachment_formats[i] =
+                            tint::PixelLocalOptions::TexelFormat::kR32Sint;
+                        break;
+                    case wgpu::TextureFormat::R32Float:
+                        pixelLocalOptions->attachment_formats[i] =
+                            tint::PixelLocalOptions::TexelFormat::kR32Float;
+                        break;
+                    default:
+                        DAWN_UNREACHABLE();
+                        break;
+                }
+            }
+        }
+
         const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment);
         DAWN_TRY_ASSIGN(
             compiledShader[SingleShaderStage::Fragment],
             ToBackend(programmableStage.module)
                 ->Compile(programmableStage, SingleShaderStage::Fragment, ToBackend(GetLayout()),
-                          compileFlags, usedInterstageVariables));
+                          compileFlags, usedInterstageVariables, pixelLocalOptions));
         DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreatePixelShader(
                                   compiledShader[SingleShaderStage::Fragment].shaderBlob.Data(),
                                   compiledShader[SingleShaderStage::Fragment].shaderBlob.Size(),
diff --git a/src/dawn/native/d3d11/ShaderModuleD3D11.cpp b/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
index 6a1efdf..e5a573a 100644
--- a/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
+++ b/src/dawn/native/d3d11/ShaderModuleD3D11.cpp
@@ -77,8 +77,8 @@
     SingleShaderStage stage,
     const PipelineLayout* layout,
     uint32_t compileFlags,
-    const std::optional<dawn::native::d3d::InterStageShaderVariablesMask>&
-        usedInterstageVariables) {
+    const std::optional<dawn::native::d3d::InterStageShaderVariablesMask>& usedInterstageVariables,
+    const std::optional<tint::PixelLocalOptions>& pixelLocalOptions) {
     Device* device = ToBackend(GetDevice());
     TRACE_EVENT0(device->GetPlatform(), General, "ShaderModuleD3D11::Compile");
     DAWN_ASSERT(!IsError());
@@ -201,6 +201,10 @@
             req.hlsl.tintOptions.interstage_locations = *usedInterstageVariables;
         }
         req.hlsl.tintOptions.truncate_interstage_variables = true;
+    } else if (stage == SingleShaderStage::Fragment) {
+        if (pixelLocalOptions.has_value()) {
+            req.hlsl.tintOptions.pixel_local_options = *pixelLocalOptions;
+        }
     }
 
     // TODO(dawn:1705): do we need to support it?
diff --git a/src/dawn/native/d3d11/ShaderModuleD3D11.h b/src/dawn/native/d3d11/ShaderModuleD3D11.h
index e14526c..6e8b431 100644
--- a/src/dawn/native/d3d11/ShaderModuleD3D11.h
+++ b/src/dawn/native/d3d11/ShaderModuleD3D11.h
@@ -59,7 +59,8 @@
         const PipelineLayout* layout,
         uint32_t compileFlags,
         const std::optional<dawn::native::d3d::InterStageShaderVariablesMask>&
-            usedInterstageVariables = {});
+            usedInterstageVariables = {},
+        const std::optional<tint::PixelLocalOptions>& pixelLocalOptions = {});
 
   private:
     ShaderModule(Device* device, const ShaderModuleDescriptor* descriptor);
diff --git a/src/dawn/native/d3d11/TextureD3D11.cpp b/src/dawn/native/d3d11/TextureD3D11.cpp
index 014bdde..c680b91 100644
--- a/src/dawn/native/d3d11/TextureD3D11.cpp
+++ b/src/dawn/native/d3d11/TextureD3D11.cpp
@@ -62,6 +62,9 @@
     if (usage & wgpu::TextureUsage::RenderAttachment) {
         bindFlags |= isDepthOrStencilFormat ? D3D11_BIND_DEPTH_STENCIL : D3D11_BIND_RENDER_TARGET;
     }
+    if (usage & wgpu::TextureUsage::StorageAttachment) {
+        bindFlags |= D3D11_BIND_UNORDERED_ACCESS;
+    }
     return bindFlags;
 }
 
diff --git a/src/dawn/native/metal/CommandBufferMTL.mm b/src/dawn/native/metal/CommandBufferMTL.mm
index 15fa969..62c4032 100644
--- a/src/dawn/native/metal/CommandBufferMTL.mm
+++ b/src/dawn/native/metal/CommandBufferMTL.mm
@@ -352,7 +352,7 @@
                     DAWN_UNREACHABLE();
                 }
                 texDesc.pixelFormat =
-                    MetalPixelFormat(device, RenderPipeline::kImplicitPLSSlotFormat);
+                    MetalPixelFormat(device, RenderPipelineBase::kImplicitPLSSlotFormat);
 
                 NSPRef<id<MTLTexture>> implicitAttachment =
                     AcquireNSPRef([device->GetMTLDevice() newTextureWithDescriptor:texDesc]);
diff --git a/src/dawn/native/metal/RenderPipelineMTL.h b/src/dawn/native/metal/RenderPipelineMTL.h
index 01bbcb7..0033dd6 100644
--- a/src/dawn/native/metal/RenderPipelineMTL.h
+++ b/src/dawn/native/metal/RenderPipelineMTL.h
@@ -62,7 +62,6 @@
     uint32_t GetMtlVertexBufferIndex(VertexBufferSlot slot) const;
 
     wgpu::ShaderStage GetStagesRequiringStorageBufferLength() const;
-    static constexpr wgpu::TextureFormat kImplicitPLSSlotFormat = wgpu::TextureFormat::R32Uint;
 
     MaybeError Initialize() override;
 
diff --git a/src/dawn/tests/end2end/PixelLocalStorageTests.cpp b/src/dawn/tests/end2end/PixelLocalStorageTests.cpp
index 4522e4f..b7ac7ac 100644
--- a/src/dawn/tests/end2end/PixelLocalStorageTests.cpp
+++ b/src/dawn/tests/end2end/PixelLocalStorageTests.cpp
@@ -499,6 +499,28 @@
         DoTest(spec);
     }
 
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Sint}}};
+        spec.attachments[0].loadOp = wgpu::LoadOp::Clear;
+
+        spec.attachments[0].clearValue.r = 42;
+        DoTest(spec);
+
+        spec.attachments[0].clearValue.r = -38;
+        DoTest(spec);
+    }
+
+    {
+        PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Float}}};
+        spec.attachments[0].loadOp = wgpu::LoadOp::Clear;
+
+        spec.attachments[0].clearValue.r = 4.0;
+        DoTest(spec);
+
+        spec.attachments[0].clearValue.r = -3.0;
+        DoTest(spec);
+    }
+
     // Test LoadOp::Load (the test helper clears the texture to clearValue).
     {
         PLSSpec spec = {4, {{0, wgpu::TextureFormat::R32Uint}}};
@@ -624,7 +646,7 @@
     }
 }
 
-DAWN_INSTANTIATE_TEST(PixelLocalStorageTests, MetalBackend());
+DAWN_INSTANTIATE_TEST(PixelLocalStorageTests, D3D11Backend(), MetalBackend());
 
 }  // anonymous namespace
 }  // namespace dawn