src/dawn/native/d3d11/BufferD3D11.cpp - dawn.git - Git at Google

 // Copyright 2023 The Dawn & Tint Authors
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // 1. Redistributions of source code must retain the above copyright notice, this
 //    list of conditions and the following disclaimer.
 //
 // 2. Redistributions in binary form must reproduce the above copyright notice,
 //    this list of conditions and the following disclaimer in the documentation
 //    and/or other materials provided with the distribution.
 //
 // 3. Neither the name of the copyright holder nor the names of its
 //    contributors may be used to endorse or promote products derived from
 //    this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "dawn/native/d3d11/BufferD3D11.h"

 #include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>

 #include "dawn/common/Alloc.h"
 #include "dawn/common/Assert.h"
 #include "dawn/common/Constants.h"
 #include "dawn/common/Math.h"
 #include "dawn/native/ChainUtils.h"
 #include "dawn/native/CommandBuffer.h"
 #include "dawn/native/DynamicUploader.h"
 #include "dawn/native/d3d/D3DError.h"
 #include "dawn/native/d3d11/DeviceD3D11.h"
 #include "dawn/native/d3d11/PhysicalDeviceD3D11.h"
 #include "dawn/native/d3d11/QueueD3D11.h"
 #include "dawn/native/d3d11/UtilsD3D11.h"
 #include "dawn/platform/DawnPlatform.h"
 #include "dawn/platform/tracing/TraceEvent.h"

 namespace dawn::native::d3d11 {

 class ScopedCommandRecordingContext;

 namespace {

 // Max size for a CPU buffer.
 constexpr uint64_t kMaxCPUUploadBufferSize = 64 * 1024;

 constexpr wgpu::BufferUsage kCopyUsages =
     wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst | kInternalCopySrcBuffer;

 constexpr wgpu::BufferUsage kStagingUsages = kMappableBufferUsages | kCopyUsages;

 constexpr wgpu::BufferUsage kD3D11GPUWriteUsages =
     wgpu::BufferUsage::Storage | kInternalStorageBuffer | wgpu::BufferUsage::Indirect;

 // Resource usage    Default    Dynamic   Immutable   Staging
 // ------------------------------------------------------------
 //  GPU-read         Yes        Yes       Yes         Yes[1]
 //  GPU-write        Yes        No        No          Yes[1]
 //  CPU-read         No         No        No          Yes[1]
 //  CPU-write        No         Yes       No          Yes[1]
 // ------------------------------------------------------------
 // [1] GPU read or write of a resource with the D3D11_USAGE_STAGING usage is restricted to copy
 // operations. You use ID3D11DeviceContext::CopySubresourceRegion and
 // ID3D11DeviceContext::CopyResource for these copy operations.

 bool IsMappable(wgpu::BufferUsage usage) {
     return usage & kMappableBufferUsages;
 }

 bool IsUpload(wgpu::BufferUsage usage) {
     return usage & wgpu::BufferUsage::MapWrite &&
            IsSubset(usage, kInternalCopySrcBuffer | wgpu::BufferUsage::CopySrc |
                                wgpu::BufferUsage::MapWrite);
 }

 bool IsStaging(wgpu::BufferUsage usage) {
     // Must have at least MapWrite or MapRead bit
     return IsMappable(usage) && IsSubset(usage, kStagingUsages);
 }

 UINT D3D11BufferBindFlags(wgpu::BufferUsage usage) {
     UINT bindFlags = 0;

     if (usage & (wgpu::BufferUsage::Vertex)) {
         bindFlags |= D3D11_BIND_VERTEX_BUFFER;
     }
     if (usage & wgpu::BufferUsage::Index) {
         bindFlags |= D3D11_BIND_INDEX_BUFFER;
     }
     if (usage & (wgpu::BufferUsage::Uniform)) {
         bindFlags |= D3D11_BIND_CONSTANT_BUFFER;
     }
     if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) {
         DAWN_ASSERT(!IsMappable(usage));
         bindFlags |= D3D11_BIND_UNORDERED_ACCESS;
     }
     if (usage & kReadOnlyStorageBuffer) {
         bindFlags |= D3D11_BIND_SHADER_RESOURCE;
     }

     // If the buffer only has CopySrc and CopyDst usages are used as staging buffers for copy.
     // Because D3D11 doesn't allow copying between buffer and texture, we will use compute shader
     // to copy data between buffer and texture. So the buffer needs to be bound as unordered access
     // view.
     if (IsSubset(usage, kCopyUsages)) {
         bindFlags |= D3D11_BIND_UNORDERED_ACCESS;
     }

     return bindFlags;
 }

 UINT D3D11BufferMiscFlags(wgpu::BufferUsage usage) {
     UINT miscFlags = 0;
     if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) {
         miscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
     }
     if (usage & wgpu::BufferUsage::Indirect) {
         miscFlags |= D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS;
     }
     return miscFlags;
 }

 size_t D3D11BufferSizeAlignment(wgpu::BufferUsage usage) {
     if (usage & wgpu::BufferUsage::Uniform) {
         // https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1
         // Each number of constants must be a multiple of 16 shader constants(sizeof(float) * 4 *
         // 16).
         return sizeof(float) * 4 * 16;
     }

     if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer |
                  wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc)) {
         // Unordered access buffers must be 4-byte aligned.
         // We also align 4 bytes for CopyDst buffer since it would be used in T2B compute shader.
         // And that shader needs to write 4-byte chunks.
         // Similarly, we need to align 4 bytes for CopySrc buffer since it would be used in B2T
         // shader that reads 4 byte chunks.
         return sizeof(uint32_t);
     }
     return 1;
 }

 bool CanUseCPUUploadBuffer(const Device* device, wgpu::BufferUsage usage, size_t bufferSize) {
     return IsUpload(usage) && bufferSize <= kMaxCPUUploadBufferSize &&
            !device->IsToggleEnabled(Toggle::D3D11DisableCPUUploadBuffers);
 }

 constexpr size_t kConstantBufferUpdateAlignment = 16;

 wgpu::MapMode GetAutoMapMode(DeviceBase* device, wgpu::BufferUsage usage) {
     if (!device->IsToggleEnabled(Toggle::AutoMapBackendBuffer) || !IsMappable(usage)) {
         return wgpu::MapMode::None;
     }

     wgpu::MapMode mode = wgpu::MapMode::None;
     if (usage & wgpu::BufferUsage::MapWrite) {
         mode |= wgpu::MapMode::Write;
     }
     if (usage & wgpu::BufferUsage::MapRead) {
         mode |= wgpu::MapMode::Read;
     }
     return mode;
 }

 }  // namespace

 // For CPU-to-GPU upload buffers(CopySrc|MapWrite), they can be emulated in the system memory, and
 // then written into the dest GPU buffer via ID3D11DeviceContext::UpdateSubresource.
 class UploadBuffer final : public Buffer {
   public:
     UploadBuffer(DeviceBase* device, const UnpackedPtr<BufferDescriptor>& descriptor)
         : Buffer(device,
                  descriptor,
                  /*internalMappableFlags=*/kMappableBufferUsages,
                  /*autoMapMode=*/wgpu::MapMode::None) {}
     ~UploadBuffer() override = default;

   private:
     // BufferBase implementations
     MaybeError MapAtCreationImpl() override {
         mMappedData = mUploadData.get();
         // MapAtCreation does the zeroization on the front-end side.
         return {};
     }

     MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override {
         mMappedData = mUploadData.get();
         return EnsureDataInitialized(nullptr);
     }
     void UnmapImpl(BufferState oldState, BufferState newState) override { mMappedData = nullptr; }

     // d3d11::Buffer implementations
     MaybeError InitializeInternal() override {
         mUploadData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize()));
         if (mUploadData == nullptr) {
             return DAWN_OUT_OF_MEMORY_ERROR("Failed to allocate memory for buffer uploading.");
         }
         return {};
     }

     MaybeError MapInternal(const ScopedCommandRecordingContext*, wgpu::MapMode) override {
         mMappedData = mUploadData.get();
         return {};
     }

     void UnmapInternal(const ScopedCommandRecordingContext*) override { mMappedData = nullptr; }

     MaybeError ClearInternal(const ScopedCommandRecordingContext* commandContext,
                              uint8_t clearValue,
                              uint64_t offset,
                              uint64_t size) override {
         memset(mUploadData.get() + offset, clearValue, size);
         return {};
     }

     MaybeError CopyToInternal(const ScopedCommandRecordingContext* commandContext,
                               uint64_t sourceOffset,
                               size_t size,
                               Buffer* destination,
                               uint64_t destinationOffset) override {
         return destination->WriteInternal(commandContext, destinationOffset,
                                           mUploadData.get() + sourceOffset, size,
                                           /*isInitialWrite=*/false);
     }

     MaybeError CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
                                    ID3D11Buffer* srcD3D11Buffer,
                                    uint64_t sourceOffset,
                                    size_t size,
                                    uint64_t destinationOffset) override {
         // Upload buffers shouldn't be copied to.
         DAWN_UNREACHABLE();
         return {};
     }

     MaybeError WriteInternal(const ScopedCommandRecordingContext* commandContext,
                              uint64_t offset,
                              const void* data,
                              size_t size,
                              bool isInitialWrite) override {
         const auto* src = static_cast<const uint8_t*>(data);
         std::copy(src, src + size, mUploadData.get() + offset);
         return {};
     }

     std::unique_ptr<uint8_t[]> mUploadData;
 };

 bool CanAddStorageUsageToBufferWithoutSideEffects(const Device* device,
                                                   wgpu::BufferUsage storageUsage,
                                                   wgpu::BufferUsage originalUsage,
                                                   size_t bufferSize) {
     // Don't support uniform buffers being used as storage buffer. Because D3D11 constant buffers
     // cannot be bound to SRV or UAV. Allowing them to be used as storage buffer would require some
     // workarounds including extra copies so it's better we prefer to not do that.
     if (originalUsage & wgpu::BufferUsage::Uniform) {
         return false;
     }

     // If buffer is small, we prefer CPU buffer for uploading so don't allow adding storage usage.
     if (CanUseCPUUploadBuffer(device, originalUsage, bufferSize)) {
         return false;
     }

     const bool requiresUAV = storageUsage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer);
     // Check supports for writeable storage usage:
     if (requiresUAV) {
         // D3D11 mappable buffers cannot be used as UAV natively. So avoid that.
         return !(originalUsage & kMappableBufferUsages);
     }

     // Read-only storage buffer cannot be mapped for read natively. Avoid that.
     DAWN_ASSERT(storageUsage == kReadOnlyStorageBuffer);
     return !(originalUsage & wgpu::BufferUsage::MapRead);
 }

 // static
 ResultOrError<Ref<Buffer>> Buffer::Create(Device* device,
                                           const UnpackedPtr<BufferDescriptor>& descriptor,
                                           const ScopedCommandRecordingContext* commandContext,
                                           bool allowUploadBufferEmulation) {
     const auto actualUsage =
         ComputeInternalBufferUsages(device, descriptor->usage, descriptor->size);
     bool useUploadBuffer = allowUploadBufferEmulation;
     useUploadBuffer &= CanUseCPUUploadBuffer(device, actualUsage, descriptor->size);
     Ref<Buffer> buffer;
     if (useUploadBuffer) {
         buffer = AcquireRef(new UploadBuffer(device, descriptor));
     } else {
         buffer = AcquireRef(new GPUUsableBuffer(device, descriptor));
     }
     DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation, commandContext));
     return buffer;
 }

 Buffer::Buffer(DeviceBase* device,
                const UnpackedPtr<BufferDescriptor>& descriptor,
                wgpu::BufferUsage internalMappableFlags,
                wgpu::MapMode autoMapMode)
     : BufferBase(device, descriptor),
       mInternalMappableFlags(internalMappableFlags),
       mAutoMapMode(autoMapMode) {}

 MaybeError Buffer::Initialize(bool mappedAtCreation,
                               const ScopedCommandRecordingContext* commandContext) {
     // TODO(dawn:1705): handle mappedAtCreation for NonzeroClearResourcesOnCreationForTesting

     // Allocate at least 4 bytes so clamped accesses are always in bounds.
     uint64_t size = std::max(GetSize(), uint64_t(4u));
     // The validation layer requires:
     // ByteWidth must be 12 or larger to be used with D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.
     if (GetInternalUsage() & wgpu::BufferUsage::Indirect) {
         size = std::max(size, uint64_t(12u));
     }
     size_t alignment = D3D11BufferSizeAlignment(GetInternalUsage());
     // Check for overflow, bufferDescriptor.ByteWidth is a UINT.
     if (size > std::numeric_limits<UINT>::max() - alignment) {
         // Alignment would overlow.
         return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
     }
     mAllocatedSize = Align(size, alignment);

     DAWN_TRY(InitializeInternal());

     SetLabelImpl();

     // The buffers with mappedAtCreation == true will be initialized in
     // BufferBase::MapAtCreation().
     if (!mappedAtCreation &&
         GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
         auto scopedUseDuringCreation = UseInternal();
         if (commandContext) {
             DAWN_TRY(ClearWholeBuffer(commandContext, 1u));
         } else {
             auto tmpCommandContext =
                 ToBackend(GetDevice()->GetQueue())
                     ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
             DAWN_TRY(ClearWholeBuffer(&tmpCommandContext, 1u));
         }
     }

     // Mark padding as cleared if there's no padding.
     if (GetAllocatedSize() == GetSize()) {
         mPaddingCleared = true;
     }

     return {};
 }

 Buffer::~Buffer() = default;

 bool Buffer::IsCPUWritableAtCreation() const {
     return IsCPUWritable();
 }

 bool Buffer::IsCPUWritable() const {
     return mInternalMappableFlags & wgpu::BufferUsage::MapWrite;
 }

 bool Buffer::IsCPUReadable() const {
     return mInternalMappableFlags & wgpu::BufferUsage::MapRead;
 }

 MaybeError Buffer::MapAtCreationImpl() {
     DAWN_ASSERT(IsCPUWritableAtCreation());
     // Use Try variant to avoid blocking if the CommandContext lock is already held (e.g., by
     // another thread or during Queue::Submit). MapAtCreation must return immediately with a
     // mappable pointer, so if the lock isn't available, we fall back to temporary storage and
     // defer the actual D3D11 buffer mapping until UnmapIfNeeded. At that point, the
     // CommandContext will already be acquired (e.g., during TrackUsage before GPU submission),
     // and we can safely map the real D3D11 buffer and transfer the temporary storage contents
     // to it via memcpy.
     std::optional<ScopedCommandRecordingContext> maybeCommandContext =
         ToBackend(GetDevice()->GetQueue())
             ->TryGetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
     if (maybeCommandContext.has_value()) {
         return MapInternal(&maybeCommandContext.value(), mAutoMapMode | wgpu::MapMode::Write);
     }

     // Lock could not be acquired, use temporary storage instead
     mMapAtCreationData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize()));
     mMappedData = mMapAtCreationData.get();
     return {};
 }

 MaybeError Buffer::MapInternal(const ScopedCommandRecordingContext* commandContext,
                                wgpu::MapMode mode) {
     DAWN_UNREACHABLE();

     return {};
 }

 void Buffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) {
     DAWN_UNREACHABLE();
 }

 MaybeError Buffer::UnmapIfNeeded(const ScopedCommandRecordingContext* commandContext) {
     if (mMappedData == nullptr) {
         return {};
     }

     if (mMapAtCreationData) {
         // We used temporary storage for MapAtCreation, now copy it to the actual buffer
         mMappedData = nullptr;
         ScopedMap scopedMap;
         DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write));
         DAWN_ASSERT(scopedMap.GetMappedData());
         memcpy(scopedMap.GetMappedData(), mMapAtCreationData.get(), GetAllocatedSize());
         mMapAtCreationData.reset();
         return {};
     }

     UnmapInternal(commandContext);
     return {};
 }

 MaybeError Buffer::TrackUsage(const ScopedCommandRecordingContext* commandContext,
                               ExecutionSerial pendingSerial) {
     if (GetLastUsageSerial() == pendingSerial) {
         return {};
     }
     // We need to unmap buffer before it can be used in the queue.
     DAWN_TRY(UnmapIfNeeded(commandContext));
     MarkUsedInPendingCommands(pendingSerial);

     // If automatic mapping is enabled, schedule the buffer to be re-mapped after GPU usage.
     if (mAutoMapMode != wgpu::MapMode::None) {
         mMapRequest.mode = mAutoMapMode;
         ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, pendingSerial);
     }

     return {};
 }

 MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
     DAWN_ASSERT((mode == wgpu::MapMode::Write && IsCPUWritable()) ||
                 (mode == wgpu::MapMode::Read && IsCPUReadable()));

     // With automatic mapping, the buffer will be actually mapped/unmapped at queue
     // boundaries, so MapAsync is a no-op.
     if (mAutoMapMode != wgpu::MapMode::None) {
         // Lazily do the 1st map if the buffer is not used in any queue yet.
         if (GetLastUsageSerial() == kBeginningOfGPUTime && !mMappedData) {
             DAWN_TRY(MapAtCreationImpl());
         }
         return {};
     }

     auto deviceGuard = GetDevice()->GetGuard();

     const ExecutionSerial lastUsageSerial = GetLastUsageSerial();
     const ExecutionSerial completedSerial = GetDevice()->GetQueue()->GetCompletedCommandSerial();
     // We may run into map stall in case that the buffer is still being used by previous submitted
     // commands. To avoid that, instead we ask Queue to do the map later when last usage serial has
     // passed.
     if (lastUsageSerial > completedSerial) {
         mMapRequest.mode = mode;
         ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, lastUsageSerial);
     } else {
         auto commandContext = ToBackend(GetDevice()->GetQueue())
                                   ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
         DAWN_TRY(TryMapNow(&commandContext, completedSerial, mode));
     }

     return {};
 }

 // The difference between FinalizeMapImpl and TryMapNow is that:
 // - FinalizeMapImpl() is triggered by front-end's serial control.
 //   - It's called after Queue::CheckAndUpdateCompletedSerials() and before user's mapping callback.
 //   - FinalizeMapImpl() will always be called  for each MapAsync regardless of whether automatic
 //   mapping is enabled or not.
 // - TryMapNow() is triggered by Queue::CheckAndUpdateCompletedSerials().
 //   - If automatic mapping is disabled, it's scheduled for each MapAsync.
 //   - else, it's scheduled once after the queue finishes using the buffer.
 MaybeError Buffer::TryMapNow(ScopedCommandRecordingContext* commandContext,
                              ExecutionSerial completedSerial,
                              wgpu::MapMode mode) {
     // If the buffer was used again after the remap was scheduled, skip this remap.
     // This should only happen with automatic mapping where the buffer is scheduled to be
     // re-mapped, but then used again in a subsequent submit before the original serial completed.
     if (completedSerial < GetLastUsageSerial()) {
         DAWN_ASSERT(mAutoMapMode != wgpu::MapMode::None);
         return {};
     }

     DAWN_ASSERT(GetDevice()->IsLockedByCurrentThreadIfNeeded());

     // Trigger any deferred unmaps.
     // TODO(crbug.com/345471009): Consider reuse the mapped pointer and skip mapping again if
     // the previous map mode is the same as the current map mode.
     DAWN_TRY(UnmapIfNeeded(commandContext));

     // Map then initialize data using mapped pointer.
     // The mapped pointer is always writable because:
     // - If mode is Write, then it's already writable.
     // - If mode is Read, it's only possible to map staging buffer. In that case,
     // D3D11_MAP_READ_WRITE will be used, hence the mapped pointer will also be writable.
     // TODO(dawn:1705): make sure the map call is not blocked by the GPU operations.
     DAWN_TRY(MapInternal(commandContext, mode));

     return {};
 }

 MaybeError Buffer::FinalizeMapImpl(BufferState newState) {
     if (newState == BufferState::MappedAtCreation) {
         return {};
     }

     DAWN_ASSERT(mMappedData);

     // Ensure data is initialized after a MapAsync event completes.
     DAWN_TRY(EnsureDataInitialized(nullptr));

     return {};
 }

 void Buffer::UnmapImpl(BufferState oldState, BufferState newState) {
     DAWN_ASSERT(IsMappable(GetInternalUsage()));

     // With automatic mapping, the buffer stays mapped, so Unmap is a no-op.
     if (mAutoMapMode != wgpu::MapMode::None) {
         return;
     }

     // Cancel any pending scheduled map. Note we don't cancel here if newState is Destroyed, since
     // it should be handled in DestroyImpl instead. DestroyImpl knows whether the reason is early
     // destroy or dtor, and can decide to call CancelScheduledBufferMapping accordingly.
     if (newState != BufferState::Destroyed) {
         ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this);
     }

     // The actual unmap will be deferred until the buffer is used by the queue or we need to map
     // again. This avoids the need to lock the CommandContext here just to call D3D11's Unmap
     // function, and instead defers the call to a moment where the CommandContext is already
     // acquired.
 }

 void* Buffer::GetMappedPointerImpl() {
     // The frontend asks that the pointer returned is from the start of the resource
     // irrespective of the offset passed in MapAsyncImpl, which is what mMappedData is.
     return mMappedData;
 }

 void Buffer::DestroyImpl(DestroyReason reason) {
     // TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
     // - It may be called if the buffer is explicitly destroyed with APIDestroy.
     //   This case is NOT thread-safe and needs proper synchronization with other
     //   simultaneous uses of the buffer.
     // - It may be called when the last ref to the buffer is dropped and the buffer
     //   is implicitly destroyed. This case is thread-safe because there are no
     //   other threads using the buffer since there are no other live refs.
     BufferBase::DestroyImpl(reason);

     // Cancel any pending map schedule. Even though front-end guarantees that Destroy() cannot run
     // in parallel with Queue operations, it doesn't do the same for Device::Tick(),
     // Instance::ProcessEvents() or WaitAny(). Thus a scheduled map triggered by those functions
     // would race with Destroy() if we don't do a cancel here.
     if (reason != DestroyReason::CppDestructor && IsMappable(GetInternalUsage())) {
         ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this);
     }

     // If buffer is still mapped, we need to unmap it before releasing the D3D11 resource. If we
     // don't do that, there might be some issues on certain drivers such as Intel's.
     // Note: The front-end guarantees that DestroyImpl cannot run concurrently with MapAsync,
     // UnmapImpl, or Queue operations, so accessing mMappedData here is safe. Additionally, since
     // no Queue operation can use this buffer anymore, it won't be scheduled for a remap after a
     // cancel above.
     if (mMappedData != nullptr && !mMapAtCreationData) {
         // We don't need to unmap if the mapping was done on a shadow copy because no real
         // buffer is mapped yet.
         ToBackend(GetDevice())->DeferUnmapDestroyedBuffer(GetD3D11MappedBuffer());
         mMappedData = nullptr;
     }
 }

 std::optional<DeviceGuard> Buffer::UseDeviceGuardForDestroy() {
     return std::nullopt;
 }

 MaybeError Buffer::EnsureDataInitialized(const ScopedCommandRecordingContext* commandContext) {
     // Clear padding on first use, regardless of initialization state.
     DAWN_TRY(EnsurePaddingInitialized(commandContext));

     if (!NeedsInitialization()) {
         return {};
     }

     DAWN_TRY(InitializeToZero(commandContext));
     return {};
 }

 MaybeError Buffer::EnsureDataInitializedAsDestination(
     const ScopedCommandRecordingContext* commandContext,
     uint64_t offset,
     uint64_t size) {
     // Clear padding on first use as destination, regardless of initialization state.
     DAWN_TRY(EnsurePaddingInitialized(commandContext));

     if (!NeedsInitialization()) {
         return {};
     }

     if (IsFullBufferRange(offset, size)) {
         SetInitialized(true);
         return {};
     }

     DAWN_TRY(InitializeToZero(commandContext));
     return {};
 }

 MaybeError Buffer::EnsureDataInitializedAsDestination(
     const ScopedCommandRecordingContext* commandContext,
     const CopyTextureToBufferCmd* copy) {
     // Clear padding on first use as destination, regardless of initialization state.
     DAWN_TRY(EnsurePaddingInitialized(commandContext));

     if (!NeedsInitialization()) {
         return {};
     }

     if (IsFullBufferOverwrittenInTextureToBufferCopy(copy)) {
         SetInitialized(true);
     } else {
         DAWN_TRY(InitializeToZero(commandContext));
     }

     return {};
 }

 MaybeError Buffer::InitializeToZero(const ScopedCommandRecordingContext* commandContext) {
     DAWN_ASSERT(NeedsInitialization());

     DAWN_TRY(ClearWholeBuffer(commandContext, uint8_t(0u)));
     SetInitialized(true);
     GetDevice()->IncrementLazyClearCountForTesting();

     return {};
 }

 MaybeError Buffer::PredicatedClear(const ScopedSwapStateCommandRecordingContext* commandContext,
                                    ID3D11Predicate* predicate,
                                    uint8_t clearValue,
                                    uint64_t offset,
                                    uint64_t size) {
     DAWN_UNREACHABLE();
     return {};
 }

 MaybeError Buffer::Clear(const ScopedCommandRecordingContext* commandContext,
                          uint8_t clearValue,
                          uint64_t offset,
                          uint64_t size) {
     DAWN_ASSERT(!mMappedData);

     if (size == 0) {
         return {};
     }

     // For non-staging buffers, we can use UpdateSubresource to write the data.
     DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size));

     return ClearInternal(commandContext, clearValue, offset, size);
 }

 MaybeError Buffer::ClearWholeBuffer(const ScopedCommandRecordingContext* commandContext,
                                     uint8_t clearValue) {
     return ClearInternal(commandContext, clearValue, 0, GetAllocatedSize());
 }

 MaybeError Buffer::ClearInternal(const ScopedCommandRecordingContext* commandContext,
                                  uint8_t clearValue,
                                  uint64_t offset,
                                  uint64_t size) {
     DAWN_ASSERT(size != 0);

     // TODO(dawn:1705): use a reusable zero staging buffer to clear the buffer to avoid this CPU to
     // GPU copy.
     std::vector<uint8_t> clearData(size, clearValue);
     return WriteInternal(commandContext, offset, clearData.data(), size,
                          /*isInitialWrite=*/true);
 }

 MaybeError Buffer::EnsurePaddingInitialized(const ScopedCommandRecordingContext* commandContext) {
     if (mPaddingCleared) [[likely]] {
         return {};
     }
     DAWN_TRY(ClearPaddingInternal(commandContext));
     mPaddingCleared = true;
     return {};
 }

 MaybeError Buffer::ClearPaddingInternal(const ScopedCommandRecordingContext* commandContext) {
     uint32_t paddingBytes = GetAllocatedSize() - GetSize();
     if (paddingBytes == 0) {
         return {};
     }
     uint32_t clearSize = paddingBytes;
     uint64_t clearOffset = GetSize();
     DAWN_TRY(ClearInternal(commandContext, 0, clearOffset, clearSize));

     return {};
 }

 ComPtr<ID3D11Buffer> Buffer::GetD3D11MappedBuffer() {
     return nullptr;
 }

 MaybeError Buffer::Write(const ScopedCommandRecordingContext* commandContext,
                          uint64_t offset,
                          const void* data,
                          size_t size) {
     DAWN_ASSERT(size != 0);

     // For non-staging buffers, we can use UpdateSubresource to write the data.
     DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size));

     return WriteInternal(commandContext, offset, data, size, /*isInitialWrite=*/false);
 }

 // static
 MaybeError Buffer::Copy(const ScopedCommandRecordingContext* commandContext,
                         Buffer* source,
                         uint64_t sourceOffset,
                         size_t size,
                         Buffer* destination,
                         uint64_t destinationOffset) {
     DAWN_ASSERT(size != 0);

     DAWN_TRY(source->EnsureDataInitialized(commandContext));
     DAWN_TRY(
         destination->EnsureDataInitializedAsDestination(commandContext, destinationOffset, size));
     return source->CopyToInternal(commandContext, sourceOffset, size, destination,
                                   destinationOffset);
 }

 ResultOrError<Buffer::ScopedMap> Buffer::ScopedMap::Create(
     const ScopedCommandRecordingContext* commandContext,
     Buffer* buffer,
     wgpu::MapMode mode) {
     if (mode == wgpu::MapMode::Write && !buffer->IsCPUWritable()) {
         return ScopedMap();
     }
     if (mode == wgpu::MapMode::Read && !buffer->IsCPUReadable()) {
         return ScopedMap();
     }

     if (buffer->mMappedData) {
         return ScopedMap(commandContext, buffer, /*needsUnmap=*/false);
     }

     DAWN_TRY(buffer->MapInternal(commandContext, mode));
     return ScopedMap(commandContext, buffer, /*needsUnmap=*/true);
 }

 // ScopedMap
 Buffer::ScopedMap::ScopedMap() = default;

 Buffer::ScopedMap::ScopedMap(const ScopedCommandRecordingContext* commandContext,
                              Buffer* buffer,
                              bool needsUnmap)
     : mCommandContext(commandContext), mBuffer(buffer), mNeedsUnmap(needsUnmap) {}

 Buffer::ScopedMap::~ScopedMap() {
     Reset();
 }

 Buffer::ScopedMap::ScopedMap(Buffer::ScopedMap&& other) {
     this->operator=(std::move(other));
 }

 Buffer::ScopedMap& Buffer::ScopedMap::operator=(Buffer::ScopedMap&& other) {
     Reset();
     mCommandContext = other.mCommandContext;
     mBuffer = other.mBuffer;
     mNeedsUnmap = other.mNeedsUnmap;
     other.mBuffer = nullptr;
     other.mNeedsUnmap = false;
     return *this;
 }

 void Buffer::ScopedMap::Reset() {
     if (mNeedsUnmap) {
         mBuffer->UnmapInternal(mCommandContext);
     }
     mCommandContext = nullptr;
     mBuffer = nullptr;
     mNeedsUnmap = false;
 }

 uint8_t* Buffer::ScopedMap::GetMappedData() const {
     return mBuffer ? static_cast<uint8_t*>(mBuffer->mMappedData) : nullptr;
 }

 // GPUUsableBuffer::Storage
 class GPUUsableBuffer::Storage : public RefCounted, NonCopyable {
   public:
     explicit Storage(ComPtr<ID3D11Buffer> d3d11Buffer) : mD3d11Buffer(std::move(d3d11Buffer)) {
         D3D11_BUFFER_DESC desc;
         mD3d11Buffer->GetDesc(&desc);
         mD3d11Usage = desc.Usage;

         mMappableCopyableFlags = wgpu::BufferUsage::CopySrc;

         switch (mD3d11Usage) {
             case D3D11_USAGE_STAGING:
                 mMappableCopyableFlags |= kMappableBufferUsages | wgpu::BufferUsage::CopyDst;
                 break;
             case D3D11_USAGE_DYNAMIC:
                 mMappableCopyableFlags |= wgpu::BufferUsage::MapWrite;
                 break;
             case D3D11_USAGE_DEFAULT:
                 mMappableCopyableFlags |= wgpu::BufferUsage::CopyDst;
                 break;
             default:
                 break;
         }

         mIsConstantBuffer = desc.BindFlags & D3D11_BIND_CONSTANT_BUFFER;
     }

     ID3D11Buffer* GetD3D11Buffer() { return mD3d11Buffer.Get(); }

     uint64_t GetRevision() const { return mRevision; }
     void SetRevision(uint64_t revision) { mRevision = revision; }
     bool IsFirstRevision() const { return mRevision == 0; }

     bool IsConstantBuffer() const { return mIsConstantBuffer; }

     bool IsCPUWritable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapWrite; }
     bool IsCPUReadable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapRead; }
     bool IsDynamic() const { return mD3d11Usage == D3D11_USAGE_DYNAMIC; }
     bool SupportsCopyDst() const { return mMappableCopyableFlags & wgpu::BufferUsage::CopyDst; }
     bool IsGPUWritable() const { return mD3d11Usage == D3D11_USAGE_DEFAULT; }

   private:
     ComPtr<ID3D11Buffer> mD3d11Buffer;
     uint64_t mRevision = 0;
     D3D11_USAGE mD3d11Usage;
     bool mIsConstantBuffer = false;
     wgpu::BufferUsage mMappableCopyableFlags;
 };

 // GPUUsableBuffer
 GPUUsableBuffer::GPUUsableBuffer(DeviceBase* device,
                                  const UnpackedPtr<BufferDescriptor>& descriptor)
     : Buffer(
           device,
           descriptor,
           /*internalMappableFlags=*/
           [](const UnpackedPtr<BufferDescriptor>& descriptor) {
               wgpu::BufferUsage mappableFlags = descriptor->usage & kMappableBufferUsages;
               if (descriptor->usage & wgpu::BufferUsage::MapRead) {
                   // Staging buffer can be both mapped read & write.
                   mappableFlags |= wgpu::BufferUsage::MapWrite;
               }
               return mappableFlags;
           }(descriptor),
           /*autoMapMode=*/
           GetAutoMapMode(device, descriptor->usage)) {}

 GPUUsableBuffer::~GPUUsableBuffer() = default;

 void GPUUsableBuffer::DestroyImpl(DestroyReason reason) {
     // TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
     // - It may be called if the buffer is explicitly destroyed with APIDestroy.
     //   This case is NOT thread-safe and needs proper synchronization with other
     //   simultaneous uses of the buffer.
     // - It may be called when the last ref to the buffer is dropped and the buffer
     //   is implicitly destroyed. This case is thread-safe because there are no
     //   other threads using the buffer since there are no other live refs.
     Buffer::DestroyImpl(reason);

     mSRVCache.clear();
     mUAVCache.clear();

     mLastUpdatedStorage = nullptr;
     mMappableStorage = nullptr;

     mStorages = {};
 }

 void GPUUsableBuffer::SetLabelImpl() {
     for (auto ite = mStorages.begin(); ite != mStorages.end(); ++ite) {
         auto storageType = static_cast<StorageType>(std::distance(mStorages.begin(), ite));
         SetStorageLabel(storageType);
     }
 }

 void GPUUsableBuffer::SetStorageLabel(StorageType storageType) {
     static constexpr ityp::array<GPUUsableBuffer::StorageType, const char*,
                                  static_cast<uint8_t>(StorageType::Count)>
         kStorageTypeStrings = {
             "Dawn_CPUWritableConstantBuffer",
             "Dawn_GPUCopyDstConstantBuffer",
             "Dawn_CPUWritableNonConstantBuffer",
             "Dawn_GPUWritableNonConstantBuffer",
             "Dawn_Staging",
         };

     if (!mStorages[storageType]) {
         return;
     }

     SetDebugName(ToBackend(GetDevice()), mStorages[storageType]->GetD3D11Buffer(),
                  kStorageTypeStrings[storageType], GetLabel());
 }

 MaybeError GPUUsableBuffer::InitializeInternal() {
     mStorages = {};

     wgpu::BufferUsage usagesToHandle = GetInternalUsage();

     // If the buffer has MapRead usage, allocate a staging storage for both MapRead & MapWrite.
     // Example: If user creates a buffer with MapWrite + MapRead + Vertex usages:
     // - We will allocate:
     //   - A storage for both MapWrite + MapRead.
     //   - A storage for Vertex usage.
     //   - This will require a copy for vertex input whenever the user maps the buffer (either for
     //   writing or reading).
     //   - This is acceptable since a buffer with both MapWrite + MapRead is rare.
     if ((usagesToHandle & wgpu::BufferUsage::MapRead) || IsStaging(usagesToHandle)) {
         DAWN_TRY_ASSIGN(mLastUpdatedStorage, GetOrCreateStorage(StorageType::Staging));
         mMappableStorage = mLastUpdatedStorage;
         usagesToHandle &= ~kStagingUsages;
     }

     // We need to create a separate storage for uniform usage, because D3D11 doesn't allow constant
     // buffer to be used for other purposes.
     if (usagesToHandle & wgpu::BufferUsage::Uniform) {
         usagesToHandle &=
             ~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer);

         // Since D3D11 doesn't allow both CPU & GPU to write to a buffer, we need separate
         // storages for CPU writes and GPU writes.
         if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
             // Note: we favor CPU write over GPU write if MapWrite is present. If buffer has GPU
             // writable usages, the GPU writable storage will be lazily created later.
             usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
             DAWN_TRY_ASSIGN(mLastUpdatedStorage,
                             GetOrCreateStorage(StorageType::CPUWritableConstantBuffer));
             mMappableStorage = mLastUpdatedStorage;
         } else {
             // For constant buffer, the only supported GPU op is copy. So create one storage for
             // that.
             usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
             DAWN_TRY_ASSIGN(mLastUpdatedStorage,
                             GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
         }
     }

     if (usagesToHandle == wgpu::BufferUsage::None) {
         return {};
     }

     // Create separate storage for non-constant buffer usages if required.
     if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
         // Note: we only need one CPU writable storage. If there are both const buffer and
         // non-const buffer usages, we favor CPU writable const buffer first. Since that's most
         // likely the common use case where users want to update const buffer on CPU.
         DAWN_ASSERT(mMappableStorage == nullptr);
         usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
         // If a buffer is created with both Storage and MapWrite usages, then
         // we will lazily create a GPU writable storage later. Note: we favor CPU writable
         // over GPU writable when creating non-constant buffer storage. This is to optimize
         // the most common cases where MapWrite buffers are mostly updated by CPU.
         DAWN_TRY_ASSIGN(mLastUpdatedStorage,
                         GetOrCreateStorage(StorageType::CPUWritableNonConstantBuffer));
         mMappableStorage = mLastUpdatedStorage;
     } else {
         usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
         DAWN_TRY_ASSIGN(mLastUpdatedStorage,
                         GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
     }

     return {};
 }

 ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateStorage(
     StorageType storageType) {
     if (mStorages[storageType]) {
         return mStorages[storageType].Get();
     }
     D3D11_BUFFER_DESC bufferDescriptor;
     bufferDescriptor.ByteWidth = GetAllocatedSize();
     bufferDescriptor.StructureByteStride = 0;

     switch (storageType) {
         case StorageType::CPUWritableConstantBuffer:
             bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
             bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
             bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
             bufferDescriptor.MiscFlags = 0;
             break;
         case StorageType::GPUCopyDstConstantBuffer:
             bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
             bufferDescriptor.CPUAccessFlags = 0;
             bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
             bufferDescriptor.MiscFlags = 0;
             break;
         case StorageType::CPUWritableNonConstantBuffer: {
             // Need to exclude GPU writable usages because CPU writable buffer is not GPU writable
             // in D3D11.
             auto nonUniformUsage =
                 GetInternalUsage() & ~(kD3D11GPUWriteUsages | wgpu::BufferUsage::Uniform);
             bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
             bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
             bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
             bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
             if (bufferDescriptor.BindFlags == 0) {
                 // Dynamic buffer requires at least one binding flag. If no binding flag is needed
                 // (one example is MapWrite | QueryResolve), then use D3D11_BIND_INDEX_BUFFER.
                 bufferDescriptor.BindFlags = D3D11_BIND_INDEX_BUFFER;
                 DAWN_ASSERT(bufferDescriptor.MiscFlags == 0);
             }
         } break;
         case StorageType::GPUWritableNonConstantBuffer: {
             // Need to exclude mapping usages.
             const auto nonUniformUsage =
                 GetInternalUsage() & ~(kMappableBufferUsages | wgpu::BufferUsage::Uniform);
             bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
             bufferDescriptor.CPUAccessFlags = 0;
             bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
             bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
         } break;
         case StorageType::Staging: {
             bufferDescriptor.Usage = D3D11_USAGE_STAGING;
             bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
             bufferDescriptor.BindFlags = 0;
             bufferDescriptor.MiscFlags = 0;
         } break;
         case StorageType::Count:
             DAWN_UNREACHABLE();
     }

     ComPtr<ID3D11Buffer> buffer;
     DAWN_TRY(CheckOutOfMemoryHRESULT(
         ToBackend(GetDevice())->GetD3D11Device()->CreateBuffer(&bufferDescriptor, nullptr, &buffer),
         "ID3D11Device::CreateBuffer"));

     mStorages[storageType] = AcquireRef(new Storage(std::move(buffer)));

     SetStorageLabel(storageType);

     return mStorages[storageType].Get();
 }

 ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateDstCopyableStorage() {
     if (mStorages[StorageType::GPUCopyDstConstantBuffer]) {
         return mStorages[StorageType::GPUCopyDstConstantBuffer].Get();
     }
     if (mStorages[StorageType::GPUWritableNonConstantBuffer]) {
         return mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
     }

     if (GetInternalUsage() & wgpu::BufferUsage::Uniform) {
         return GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer);
     }

     return GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer);
 }

 MaybeError GPUUsableBuffer::SyncStorage(const ScopedCommandRecordingContext* commandContext,
                                         Storage* dstStorage) {
     DAWN_ASSERT(mLastUpdatedStorage);
     DAWN_ASSERT(dstStorage);
     if (mLastUpdatedStorage->GetRevision() == dstStorage->GetRevision()) {
         return {};
     }

     DAWN_ASSERT(commandContext);

     // Must not have pending unmap.
     DAWN_CHECK(!mMappedData);

     if (dstStorage->SupportsCopyDst()) {
         commandContext->CopyResource(dstStorage->GetD3D11Buffer(),
                                      mLastUpdatedStorage->GetD3D11Buffer());
         dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());
         return {};
     }

     // TODO(42241146): This is a slow path. It's usually used by uncommon use cases:
     // - GPU writes a CPU writable buffer.
     DAWN_ASSERT(dstStorage->IsCPUWritable());
     Storage* stagingStorage;
     DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));
     DAWN_TRY(SyncStorage(commandContext, stagingStorage));
     D3D11_MAPPED_SUBRESOURCE mappedSrcResource;
     DAWN_TRY(CheckHRESULT(commandContext->Map(stagingStorage->GetD3D11Buffer(),
                                               /*Subresource=*/0, D3D11_MAP_READ,
                                               /*MapFlags=*/0, &mappedSrcResource),
                           "ID3D11DeviceContext::Map src"));

     auto MapAndCopy = [](const ScopedCommandRecordingContext* commandContext, ID3D11Buffer* dst,
                          const void* srcData, size_t size) -> MaybeError {
         D3D11_MAPPED_SUBRESOURCE mappedDstResource;
         DAWN_TRY(CheckHRESULT(commandContext->Map(dst,
                                                   /*Subresource=*/0, D3D11_MAP_WRITE_DISCARD,
                                                   /*MapFlags=*/0, &mappedDstResource),
                               "ID3D11DeviceContext::Map dst"));
         memcpy(mappedDstResource.pData, srcData, size);
         commandContext->Unmap(dst,
                               /*Subresource=*/0);
         return {};
     };

     auto result = MapAndCopy(commandContext, dstStorage->GetD3D11Buffer(), mappedSrcResource.pData,
                              GetAllocatedSize());

     commandContext->Unmap(stagingStorage->GetD3D11Buffer(),
                           /*Subresource=*/0);

     if (result.IsError()) {
         return result;
     }

     dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());

     return {};
 }

 void GPUUsableBuffer::IncrStorageRevAndMakeLatest(
     const ScopedCommandRecordingContext* commandContext,
     Storage* dstStorage) {
     DAWN_ASSERT(dstStorage->GetRevision() == mLastUpdatedStorage->GetRevision());
     dstStorage->SetRevision(dstStorage->GetRevision() + 1);
     mLastUpdatedStorage = dstStorage;

     if (dstStorage->IsGPUWritable() && IsMappable(GetInternalUsage())) {
         // If this buffer is mappable and the last updated storage is GPU writable, we need to
         // update the staging storage when the command buffer is flushed.
         // This is to make sure the staging storage will contain the up-to-date GPU modified data.
         commandContext->AddBufferForSyncingWithCPU(this);
     }
 }

 MaybeError GPUUsableBuffer::SyncGPUWritesToStaging(
     const ScopedCommandRecordingContext* commandContext) {
     DAWN_ASSERT(IsMappable(GetInternalUsage()));

     // Only sync staging storage. Later other CPU writable storages can be updated by
     // copying from staging storage with Map(MAP_WRITE_DISCARD) which won't stall the CPU.
     // Otherwise, since CPU writable storages don't support CopyDst, it would require a CPU
     // stall in order to sync them here.
     Storage* stagingStorage;
     DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));

     return SyncStorage(commandContext, stagingStorage);
 }

 MaybeError GPUUsableBuffer::MapInternal(const ScopedCommandRecordingContext* commandContext,
                                         wgpu::MapMode mode) {
     DAWN_ASSERT(!mMappedData);
     DAWN_ASSERT(mMappableStorage);

     if (mMappableStorage->IsDynamic()) {
         DAWN_ASSERT(mode == wgpu::MapMode::Write);
         // Dynamic buffer can only use D3D11_MAP_WRITE_NO_OVERWRITE
         mD3DMapTypeUsed = D3D11_MAP_WRITE_NO_OVERWRITE;
     } else {
         if (NeedsInitialization() || mode == (wgpu::MapMode::Read | wgpu::MapMode::Write)) {
             // Map buffer with D3D11_MAP_READ_WRITE because we need write permission to initialize
             // the buffer.
             // TODO(dawn:1705): investigate the performance impact of mapping with
             // D3D11_MAP_READ_WRITE.
             mD3DMapTypeUsed = D3D11_MAP_READ_WRITE;
         } else {
             if (mode & wgpu::MapMode::Read) {
                 mD3DMapTypeUsed = D3D11_MAP_READ;
             } else {
                 mD3DMapTypeUsed = D3D11_MAP_WRITE;
             }
         }
     }

     // Sync previously modified content before mapping.
     DAWN_TRY(SyncStorage(commandContext, mMappableStorage));

     D3D11_MAPPED_SUBRESOURCE mappedResource;
     DAWN_TRY(CheckHRESULT(commandContext->Map(mMappableStorage->GetD3D11Buffer(),
                                               /*Subresource=*/0, mD3DMapTypeUsed,
                                               /*MapFlags=*/0, &mappedResource),
                           "ID3D11DeviceContext::Map"));
     mMappedData = static_cast<uint8_t*>(mappedResource.pData);

     return {};
 }

 void GPUUsableBuffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) {
     DAWN_ASSERT(mMappedData);
     DAWN_ASSERT(mMappableStorage);
     commandContext->Unmap(mMappableStorage->GetD3D11Buffer(),
                           /*Subresource=*/0);
     mMappedData = nullptr;
     // Only increment revision if the buffer was mapped for writing.
     if (mD3DMapTypeUsed != D3D11_MAP_READ) {
         IncrStorageRevAndMakeLatest(commandContext, mMappableStorage);
     }
 }

 ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11ConstantBuffer(
     const ScopedCommandRecordingContext* commandContext) {
     auto* storage = mStorages[StorageType::CPUWritableConstantBuffer].Get();
     if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
         // The CPUWritableConstantBuffer is up to date, use it directly.
         return storage->GetD3D11Buffer();
     }

     // In all other cases we are going to use the GPUCopyDstConstantBuffer because, either it is up
     // to date, or we need to update the ConstantBuffer data and doing a CopyResource on the GPU is
     // always more efficient than paths involving a memcpy (or potentially a stall).
     DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
     DAWN_TRY(SyncStorage(commandContext, storage));
     return storage->GetD3D11Buffer();
 }

 ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11NonConstantBuffer(
     const ScopedCommandRecordingContext* commandContext) {
     auto* storage = mStorages[StorageType::CPUWritableNonConstantBuffer].Get();
     if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
         // The CPUWritableNonConstantBuffer is up to date, use it directly.
         return storage->GetD3D11Buffer();
     }

     // In all other cases we are going to use the GPUWritableNonConstantBuffe because, either it is
     // up to date, or we need to update the non-ConstantBuffer data and doing a CopyResource on the
     // GPU is always more efficient than paths involving a memcpy (or potentially a stall).
     DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
     DAWN_TRY(SyncStorage(commandContext, storage));
     return storage->GetD3D11Buffer();
 }

 ID3D11Buffer* GPUUsableBuffer::GetD3D11ConstantBufferForTesting() {
     if (!mStorages[StorageType::CPUWritableConstantBuffer] &&
         !mStorages[StorageType::GPUCopyDstConstantBuffer]) {
         return nullptr;
     }
     auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
                                   ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
     ID3D11Buffer* buffer;
     if (GetDevice()->ConsumedError(GetD3D11ConstantBuffer(&tempCommandContext), &buffer)) {
         return nullptr;
     }

     return buffer;
 }

 ID3D11Buffer* GPUUsableBuffer::GetD3D11NonConstantBufferForTesting() {
     if (!mStorages[StorageType::CPUWritableNonConstantBuffer] &&
         !mStorages[StorageType::GPUWritableNonConstantBuffer]) {
         return nullptr;
     }
     auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
                                   ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
     ID3D11Buffer* buffer;
     if (GetDevice()->ConsumedError(GetD3D11NonConstantBuffer(&tempCommandContext), &buffer)) {
         return nullptr;
     }

     return buffer;
 }

 ResultOrError<ComPtr<ID3D11ShaderResourceView>>
 GPUUsableBuffer::CreateD3D11ShaderResourceViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer,
                                                             uint64_t offset,
                                                             uint64_t originalSize) {
     uint64_t size = Align(originalSize, 4);
     DAWN_ASSERT(IsAligned(offset, 4u));
     DAWN_ASSERT(size <= GetAllocatedSize());
     UINT firstElement = static_cast<UINT>(offset / 4);
     UINT numElements = static_cast<UINT>(size / 4);

     D3D11_SHADER_RESOURCE_VIEW_DESC desc;
     desc.Format = DXGI_FORMAT_R32_TYPELESS;
     desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
     desc.BufferEx.FirstElement = firstElement;
     desc.BufferEx.NumElements = numElements;
     desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
     ComPtr<ID3D11ShaderResourceView> srv;
     DAWN_TRY(CheckHRESULT(ToBackend(GetDevice())
                               ->GetD3D11Device()
                               ->CreateShaderResourceView(d3d11Buffer, &desc, &srv),
                           "ShaderResourceView creation"));

     return std::move(srv);
 }

 ResultOrError<ComPtr<ID3D11UnorderedAccessView1>>
 GPUUsableBuffer::CreateD3D11UnorderedAccessViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer,
                                                              uint64_t offset,
                                                              uint64_t originalSize) {
     uint64_t size = Align(originalSize, 4);
     DAWN_ASSERT(IsAligned(offset, 4u));
     DAWN_ASSERT(size <= GetAllocatedSize());

     UINT firstElement = static_cast<UINT>(offset / 4);
     UINT numElements = static_cast<UINT>(size / 4);

     D3D11_UNORDERED_ACCESS_VIEW_DESC1 desc;
     desc.Format = DXGI_FORMAT_R32_TYPELESS;
     desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
     desc.Buffer.FirstElement = firstElement;
     desc.Buffer.NumElements = numElements;
     desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;

     ComPtr<ID3D11UnorderedAccessView1> uav;
     DAWN_TRY(CheckHRESULT(ToBackend(GetDevice())
                               ->GetD3D11Device3()
                               ->CreateUnorderedAccessView1(d3d11Buffer, &desc, &uav),
                           "UnorderedAccessView creation"));

     return std::move(uav);
 }

 ResultOrError<ComPtr<ID3D11ShaderResourceView>> GPUUsableBuffer::UseAsSRV(
     const ScopedCommandRecordingContext* commandContext,
     uint64_t offset,
     uint64_t size) {
     ID3D11Buffer* d3dBuffer;

     DAWN_TRY_ASSIGN(d3dBuffer, GetD3D11NonConstantBuffer(commandContext));

     auto key = std::make_tuple(d3dBuffer, offset, size);
     auto ite = mSRVCache.find(key);
     if (ite != mSRVCache.end()) {
         return ite->second;
     }

     ComPtr<ID3D11ShaderResourceView> srv;
     DAWN_TRY_ASSIGN(srv, CreateD3D11ShaderResourceViewFromD3DBuffer(d3dBuffer, offset, size));

     mSRVCache[key] = srv;

     return std::move(srv);
 }

 ResultOrError<ComPtr<ID3D11UnorderedAccessView>> GPUUsableBuffer::UseAsUAV(
     const ScopedCommandRecordingContext* commandContext,
     uint64_t offset,
     uint64_t size) {
     Storage* storage = nullptr;
     DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
     DAWN_TRY(SyncStorage(commandContext, storage));

     ComPtr<ID3D11UnorderedAccessView1> uav;
     {
         auto key = std::make_tuple(storage->GetD3D11Buffer(), offset, size);
         auto ite = mUAVCache.find(key);
         if (ite != mUAVCache.end()) {
             uav = ite->second;
         } else {
             DAWN_TRY_ASSIGN(uav, CreateD3D11UnorderedAccessViewFromD3DBuffer(
                                      storage->GetD3D11Buffer(), offset, size));
             mUAVCache[key] = uav;
         }
     }

     // Since UAV will modify the storage's content, increment its revision.
     IncrStorageRevAndMakeLatest(commandContext, storage);

     return ComPtr<ID3D11UnorderedAccessView>(std::move(uav));
 }

 MaybeError GPUUsableBuffer::UpdateD3D11ConstantBuffer(
     const ScopedCommandRecordingContext* commandContext,
     ID3D11Buffer* d3d11Buffer,
     bool firstTimeUpdate,
     uint64_t offset,
     const void* data,
     size_t size) {
     DAWN_ASSERT(size > 0);

     // For a full size write, UpdateSubresource1(D3D11_COPY_DISCARD) can be used to update
     // constant buffer.
     // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
     // as well.
     const bool fullSizeUpdate = size >= GetSize() && offset == 0;
     const bool canPartialUpdate =
         ToBackend(GetDevice())->GetDeviceInfo().supportsPartialConstantBufferUpdate;
     if (fullSizeUpdate || firstTimeUpdate) {
         const bool requiresFullAllocatedSizeWrite = !canPartialUpdate && !firstTimeUpdate;

         // Offset and size must be aligned with 16 for using UpdateSubresource1() on constant
         // buffer.
         size_t alignedOffset;
         if (offset < kConstantBufferUpdateAlignment - 1) {
             alignedOffset = 0;
         } else {
             DAWN_ASSERT(firstTimeUpdate);
             // For offset we align to lower value (<= offset).
             alignedOffset = Align(offset - (kConstantBufferUpdateAlignment - 1),
                                   kConstantBufferUpdateAlignment);
         }
         size_t alignedEnd;
         if (requiresFullAllocatedSizeWrite) {
             alignedEnd = GetAllocatedSize();
         } else {
             alignedEnd = Align(offset + size, kConstantBufferUpdateAlignment);
         }
         size_t alignedSize = alignedEnd - alignedOffset;

         DAWN_ASSERT((alignedSize % kConstantBufferUpdateAlignment) == 0);
         DAWN_ASSERT(alignedSize <= GetAllocatedSize());
         DAWN_ASSERT(offset >= alignedOffset);

         // Extra bytes on the left of offset we could write to. This is only valid if
         // firstTimeUpdate = true.
         size_t leftExtraBytes = offset - alignedOffset;
         DAWN_ASSERT(leftExtraBytes == 0 || firstTimeUpdate);

         // The layout of the buffer is like this:
         // |..........................| leftExtraBytes |     data   | ............... |
         // |<----------------- offset ---------------->|<-- size -->|
         // |<----- alignedOffset ---->|<--------- alignedSize --------->|
         std::unique_ptr<uint8_t[]> alignedBuffer;
         if (size != alignedSize) {
             alignedBuffer.reset(new uint8_t[alignedSize]);
             std::memcpy(alignedBuffer.get() + leftExtraBytes, data, size);
             data = alignedBuffer.get();
         }

         D3D11_BOX dstBox;
         dstBox.left = static_cast<UINT>(alignedOffset);
         dstBox.top = 0;
         dstBox.front = 0;
         dstBox.right = static_cast<UINT>(alignedOffset + alignedSize);
         dstBox.bottom = 1;
         dstBox.back = 1;
         // For full buffer write, D3D11_COPY_DISCARD is used to avoid GPU CPU synchronization.
         commandContext->UpdateSubresource1(d3d11Buffer, /*DstSubresource=*/0,
                                            requiresFullAllocatedSizeWrite ? nullptr : &dstBox, data,
                                            /*SrcRowPitch=*/0,
                                            /*SrcDepthPitch=*/0,
                                            /*CopyFlags=*/D3D11_COPY_DISCARD);
         return {};
     }

     // If copy offset and size are not 16 bytes aligned, we have to create a staging buffer for
     // transfer the data to constant buffer.
     Ref<BufferBase> stagingBuffer;
     DAWN_TRY_ASSIGN(stagingBuffer, ToBackend(GetDevice())->GetStagingBuffer(commandContext, size));
     {
         auto scopedUseStaging = stagingBuffer->UseInternal();
         DAWN_TRY(ToBackend(stagingBuffer)
                      ->WriteInternal(commandContext, 0, data, size,
                                      /*isInitialWrite=*/true));
         DAWN_TRY(ToBackend(stagingBuffer.Get())
                      ->CopyToInternal(commandContext,
                                       /*sourceOffset=*/0,
                                       /*size=*/size, this, offset));
     }
     ToBackend(GetDevice())->ReturnStagingBuffer(std::move(stagingBuffer));

     return {};
 }

 MaybeError GPUUsableBuffer::WriteInternal(const ScopedCommandRecordingContext* commandContext,
                                           uint64_t offset,
                                           const void* data,
                                           size_t size,
                                           bool isInitialWrite) {
     if (size == 0) {
         return {};
     }

     // Map the buffer if it is possible, so WriteInternal() can write to the mapped memory
     // directly.
     // TODO(crbug.com/345471009): Consider mapping the buffer for non-clearing writes when
     // it's not in use by the GPU. This would avoid allocating additional GPU storage.
     // However, checking GetLastUsageSerial() is unreliable here because Queue::Submit()
     // may have already updated it before entering this function. In practice, this is
     // uncommon for mappable buffers since users typically update them via MapAsync when
     // they know the buffer is idle.
     const bool alreadyMappedForWrite = mMappedData && mD3DMapTypeUsed != D3D11_MAP_READ;
     if ((IsCPUWritable() && isInitialWrite) || alreadyMappedForWrite) {
         // If buffer is already mapped, creating ScopedMap is a no-op.
         ScopedMap scopedMap;
         DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write));

         DAWN_ASSERT(scopedMap.GetMappedData());
         memcpy(scopedMap.GetMappedData() + offset, data, size);

         return {};
     }

     // Mark the buffer as used in pending commands if the mapping path above wasn't taken.
     // Mapped writes complete synchronously and don't require tracking.
     DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));

     // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
     // as well.
     bool fullSizeWrite = size >= GetSize() && offset == 0;

     // Mapping buffer at this point would stall the CPU. We will create a GPU copyable
     // storage and use UpdateSubresource on it below instead. Note if we have both const buffer &
     // non-const buffer, we favor writing to non-const buffer, because it has no alignment
     // requirement.
     Storage* gpuCopyableStorage = mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
     if (!gpuCopyableStorage) {
         DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
     }

     if (fullSizeWrite) {
         // If this is a full overwrite, no need to copy the old content.
         // Just need to copy the revision number.
         DAWN_ASSERT(mLastUpdatedStorage);
         gpuCopyableStorage->SetRevision(mLastUpdatedStorage->GetRevision());
     } else {
         DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
     }

     const bool firstTimeUpdate = gpuCopyableStorage->IsFirstRevision();

     // We are going to write to the storage in all code paths, update the revision already.
     IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);

     if (gpuCopyableStorage->IsConstantBuffer()) {
         return UpdateD3D11ConstantBuffer(commandContext, gpuCopyableStorage->GetD3D11Buffer(),
                                          firstTimeUpdate, offset, data, size);
     }

     D3D11_BOX box;
     box.left = static_cast<UINT>(offset);
     box.top = 0;
     box.front = 0;
     box.right = static_cast<UINT>(offset + size);
     box.bottom = 1;
     box.back = 1;
     commandContext->UpdateSubresource1(gpuCopyableStorage->GetD3D11Buffer(),
                                        /*DstSubresource=*/0,
                                        /*pDstBox=*/&box, data,
                                        /*SrcRowPitch=*/0,
                                        /*SrcDepthPitch=*/0,
                                        /*CopyFlags=*/0);

     // No need to update constant buffer at this point, when command buffer wants to bind
     // the constant buffer in a render/compute pass, it will call GetD3D11ConstantBuffer()
     // and the constant buffer will be sync-ed there. WriteBuffer() cannot be called inside
     // render/compute pass so no need to sync here.
     return {};
 }

 MaybeError GPUUsableBuffer::CopyToInternal(const ScopedCommandRecordingContext* commandContext,
                                            uint64_t sourceOffset,
                                            size_t size,
                                            Buffer* destination,
                                            uint64_t destinationOffset) {
     DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));

     ID3D11Buffer* d3d11SourceBuffer = mLastUpdatedStorage->GetD3D11Buffer();

     return destination->CopyFromD3DInternal(commandContext, d3d11SourceBuffer, sourceOffset, size,
                                             destinationOffset);
 }

 MaybeError GPUUsableBuffer::CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
                                                 ID3D11Buffer* d3d11SourceBuffer,
                                                 uint64_t sourceOffset,
                                                 size_t size,
                                                 uint64_t destinationOffset) {
     DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));

     D3D11_BOX srcBox;
     srcBox.left = static_cast<UINT>(sourceOffset);
     srcBox.top = 0;
     srcBox.front = 0;
     srcBox.right = static_cast<UINT>(sourceOffset + size);
     srcBox.bottom = 1;
     srcBox.back = 1;

     Storage* gpuCopyableStorage;
     if (mLastUpdatedStorage->SupportsCopyDst()) {
         gpuCopyableStorage = mLastUpdatedStorage;
     } else {
         DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
         DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
     }

     commandContext->CopySubresourceRegion(
         gpuCopyableStorage->GetD3D11Buffer(), /*DstSubresource=*/0,
         /*DstX=*/destinationOffset,
         /*DstY=*/0,
         /*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0, &srcBox);

     IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);

     return {};
 }

 MaybeError GPUUsableBuffer::PredicatedClear(
     const ScopedSwapStateCommandRecordingContext* commandContext,
     ID3D11Predicate* predicate,
     uint8_t clearValue,
     uint64_t offset,
     uint64_t size) {
     DAWN_ASSERT(size != 0);

     // Don't use mapping, mapping is not affected by ID3D11Predicate.
     // Allocate GPU writable storage and sync it. Note: we don't SetPredication() yet otherwise
     // it would affect the syncing.
     Storage* gpuWritableStorage;
     DAWN_TRY_ASSIGN(gpuWritableStorage,
                     GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
     DAWN_TRY(SyncStorage(commandContext, gpuWritableStorage));

     // SetPredication() and clear the storage with UpdateSubresource1().
     D3D11_BOX box;
     box.left = static_cast<UINT>(offset);
     box.top = 0;
     box.front = 0;
     box.right = static_cast<UINT>(offset + size);
     box.bottom = 1;
     box.back = 1;

     // TODO(350493305): Change function signature to accept a single uint64_t value.
     // So that we don't need to allocate a vector here.
     absl::InlinedVector<uint8_t, sizeof(uint64_t)> clearData(size, clearValue);

     // The update will *NOT* be performed if the predicate's data is false.
     commandContext->GetD3D11DeviceContext3()->SetPredication(predicate, false);
     commandContext->UpdateSubresource1(gpuWritableStorage->GetD3D11Buffer(),
                                        /*DstSubresource=*/0,
                                        /*pDstBox=*/&box, clearData.data(),
                                        /*SrcRowPitch=*/0,
                                        /*SrcDepthPitch=*/0,
                                        /*CopyFlags=*/0);
     commandContext->GetD3D11DeviceContext3()->SetPredication(nullptr, false);

     IncrStorageRevAndMakeLatest(commandContext, gpuWritableStorage);

     return {};
 }

 ComPtr<ID3D11Buffer> GPUUsableBuffer::GetD3D11MappedBuffer() {
     return mMappedData ? mMappableStorage->GetD3D11Buffer() : nullptr;
 }

 }  // namespace dawn::native::d3d11