| // Copyright 2023 The Dawn & Tint Authors |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // 1. Redistributions of source code must retain the above copyright notice, this |
| // list of conditions and the following disclaimer. |
| // |
| // 2. Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // 3. Neither the name of the copyright holder nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "dawn/native/d3d11/BufferD3D11.h" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <utility> |
| #include <vector> |
| |
| #include "dawn/common/Alloc.h" |
| #include "dawn/common/Assert.h" |
| #include "dawn/common/Constants.h" |
| #include "dawn/common/Math.h" |
| #include "dawn/native/ChainUtils.h" |
| #include "dawn/native/CommandBuffer.h" |
| #include "dawn/native/DynamicUploader.h" |
| #include "dawn/native/d3d/D3DError.h" |
| #include "dawn/native/d3d11/DeviceD3D11.h" |
| #include "dawn/native/d3d11/PhysicalDeviceD3D11.h" |
| #include "dawn/native/d3d11/QueueD3D11.h" |
| #include "dawn/native/d3d11/UtilsD3D11.h" |
| #include "dawn/platform/DawnPlatform.h" |
| #include "dawn/platform/tracing/TraceEvent.h" |
| |
| namespace dawn::native::d3d11 { |
| |
| class ScopedCommandRecordingContext; |
| |
| namespace { |
| |
| // Max size for a CPU buffer. |
| constexpr uint64_t kMaxCPUUploadBufferSize = 64 * 1024; |
| |
| constexpr wgpu::BufferUsage kCopyUsages = |
| wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst | kInternalCopySrcBuffer; |
| |
| constexpr wgpu::BufferUsage kStagingUsages = kMappableBufferUsages | kCopyUsages; |
| |
| constexpr wgpu::BufferUsage kD3D11GPUWriteUsages = |
| wgpu::BufferUsage::Storage | kInternalStorageBuffer | wgpu::BufferUsage::Indirect; |
| |
| // Resource usage Default Dynamic Immutable Staging |
| // ------------------------------------------------------------ |
| // GPU-read Yes Yes Yes Yes[1] |
| // GPU-write Yes No No Yes[1] |
| // CPU-read No No No Yes[1] |
| // CPU-write No Yes No Yes[1] |
| // ------------------------------------------------------------ |
| // [1] GPU read or write of a resource with the D3D11_USAGE_STAGING usage is restricted to copy |
| // operations. You use ID3D11DeviceContext::CopySubresourceRegion and |
| // ID3D11DeviceContext::CopyResource for these copy operations. |
| |
| bool IsMappable(wgpu::BufferUsage usage) { |
| return usage & kMappableBufferUsages; |
| } |
| |
| bool IsUpload(wgpu::BufferUsage usage) { |
| return usage & wgpu::BufferUsage::MapWrite && |
| IsSubset(usage, kInternalCopySrcBuffer | wgpu::BufferUsage::CopySrc | |
| wgpu::BufferUsage::MapWrite); |
| } |
| |
| bool IsStaging(wgpu::BufferUsage usage) { |
| // Must have at least MapWrite or MapRead bit |
| return IsMappable(usage) && IsSubset(usage, kStagingUsages); |
| } |
| |
| UINT D3D11BufferBindFlags(wgpu::BufferUsage usage) { |
| UINT bindFlags = 0; |
| |
| if (usage & (wgpu::BufferUsage::Vertex)) { |
| bindFlags |= D3D11_BIND_VERTEX_BUFFER; |
| } |
| if (usage & wgpu::BufferUsage::Index) { |
| bindFlags |= D3D11_BIND_INDEX_BUFFER; |
| } |
| if (usage & (wgpu::BufferUsage::Uniform)) { |
| bindFlags |= D3D11_BIND_CONSTANT_BUFFER; |
| } |
| if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) { |
| DAWN_ASSERT(!IsMappable(usage)); |
| bindFlags |= D3D11_BIND_UNORDERED_ACCESS; |
| } |
| if (usage & kReadOnlyStorageBuffer) { |
| bindFlags |= D3D11_BIND_SHADER_RESOURCE; |
| } |
| |
| // If the buffer only has CopySrc and CopyDst usages are used as staging buffers for copy. |
| // Because D3D11 doesn't allow copying between buffer and texture, we will use compute shader |
| // to copy data between buffer and texture. So the buffer needs to be bound as unordered access |
| // view. |
| if (IsSubset(usage, kCopyUsages)) { |
| bindFlags |= D3D11_BIND_UNORDERED_ACCESS; |
| } |
| |
| return bindFlags; |
| } |
| |
| UINT D3D11BufferMiscFlags(wgpu::BufferUsage usage) { |
| UINT miscFlags = 0; |
| if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) { |
| miscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; |
| } |
| if (usage & wgpu::BufferUsage::Indirect) { |
| miscFlags |= D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; |
| } |
| return miscFlags; |
| } |
| |
| size_t D3D11BufferSizeAlignment(wgpu::BufferUsage usage) { |
| if (usage & wgpu::BufferUsage::Uniform) { |
| // https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1 |
| // Each number of constants must be a multiple of 16 shader constants(sizeof(float) * 4 * |
| // 16). |
| return sizeof(float) * 4 * 16; |
| } |
| |
| if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer | |
| wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc)) { |
| // Unordered access buffers must be 4-byte aligned. |
| // We also align 4 bytes for CopyDst buffer since it would be used in T2B compute shader. |
| // And that shader needs to write 4-byte chunks. |
| // Similarly, we need to align 4 bytes for CopySrc buffer since it would be used in B2T |
| // shader that reads 4 byte chunks. |
| return sizeof(uint32_t); |
| } |
| return 1; |
| } |
| |
| bool CanUseCPUUploadBuffer(const Device* device, wgpu::BufferUsage usage, size_t bufferSize) { |
| return IsUpload(usage) && bufferSize <= kMaxCPUUploadBufferSize && |
| !device->IsToggleEnabled(Toggle::D3D11DisableCPUUploadBuffers); |
| } |
| |
| constexpr size_t kConstantBufferUpdateAlignment = 16; |
| |
| wgpu::MapMode GetAutoMapMode(DeviceBase* device, wgpu::BufferUsage usage) { |
| if (!device->IsToggleEnabled(Toggle::AutoMapBackendBuffer) || !IsMappable(usage)) { |
| return wgpu::MapMode::None; |
| } |
| |
| wgpu::MapMode mode = wgpu::MapMode::None; |
| if (usage & wgpu::BufferUsage::MapWrite) { |
| mode |= wgpu::MapMode::Write; |
| } |
| if (usage & wgpu::BufferUsage::MapRead) { |
| mode |= wgpu::MapMode::Read; |
| } |
| return mode; |
| } |
| |
| } // namespace |
| |
| // For CPU-to-GPU upload buffers(CopySrc|MapWrite), they can be emulated in the system memory, and |
| // then written into the dest GPU buffer via ID3D11DeviceContext::UpdateSubresource. |
| class UploadBuffer final : public Buffer { |
| public: |
| UploadBuffer(DeviceBase* device, const UnpackedPtr<BufferDescriptor>& descriptor) |
| : Buffer(device, |
| descriptor, |
| /*internalMappableFlags=*/kMappableBufferUsages, |
| /*autoMapMode=*/wgpu::MapMode::None) {} |
| ~UploadBuffer() override = default; |
| |
| private: |
| // BufferBase implementations |
| MaybeError MapAtCreationImpl() override { |
| mMappedData = mUploadData.get(); |
| // MapAtCreation does the zeroization on the front-end side. |
| return {}; |
| } |
| |
| MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override { |
| mMappedData = mUploadData.get(); |
| return EnsureDataInitialized(nullptr); |
| } |
| void UnmapImpl(BufferState oldState, BufferState newState) override { mMappedData = nullptr; } |
| |
| // d3d11::Buffer implementations |
| MaybeError InitializeInternal() override { |
| mUploadData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize())); |
| if (mUploadData == nullptr) { |
| return DAWN_OUT_OF_MEMORY_ERROR("Failed to allocate memory for buffer uploading."); |
| } |
| return {}; |
| } |
| |
| MaybeError MapInternal(const ScopedCommandRecordingContext*, wgpu::MapMode) override { |
| mMappedData = mUploadData.get(); |
| return {}; |
| } |
| |
| void UnmapInternal(const ScopedCommandRecordingContext*) override { mMappedData = nullptr; } |
| |
| MaybeError ClearInternal(const ScopedCommandRecordingContext* commandContext, |
| uint8_t clearValue, |
| uint64_t offset, |
| uint64_t size) override { |
| memset(mUploadData.get() + offset, clearValue, size); |
| return {}; |
| } |
| |
| MaybeError CopyToInternal(const ScopedCommandRecordingContext* commandContext, |
| uint64_t sourceOffset, |
| size_t size, |
| Buffer* destination, |
| uint64_t destinationOffset) override { |
| return destination->WriteInternal(commandContext, destinationOffset, |
| mUploadData.get() + sourceOffset, size, |
| /*isInitialWrite=*/false); |
| } |
| |
| MaybeError CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext, |
| ID3D11Buffer* srcD3D11Buffer, |
| uint64_t sourceOffset, |
| size_t size, |
| uint64_t destinationOffset) override { |
| // Upload buffers shouldn't be copied to. |
| DAWN_UNREACHABLE(); |
| return {}; |
| } |
| |
| MaybeError WriteInternal(const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| const void* data, |
| size_t size, |
| bool isInitialWrite) override { |
| const auto* src = static_cast<const uint8_t*>(data); |
| std::copy(src, src + size, mUploadData.get() + offset); |
| return {}; |
| } |
| |
| std::unique_ptr<uint8_t[]> mUploadData; |
| }; |
| |
| bool CanAddStorageUsageToBufferWithoutSideEffects(const Device* device, |
| wgpu::BufferUsage storageUsage, |
| wgpu::BufferUsage originalUsage, |
| size_t bufferSize) { |
| // Don't support uniform buffers being used as storage buffer. Because D3D11 constant buffers |
| // cannot be bound to SRV or UAV. Allowing them to be used as storage buffer would require some |
| // workarounds including extra copies so it's better we prefer to not do that. |
| if (originalUsage & wgpu::BufferUsage::Uniform) { |
| return false; |
| } |
| |
| // If buffer is small, we prefer CPU buffer for uploading so don't allow adding storage usage. |
| if (CanUseCPUUploadBuffer(device, originalUsage, bufferSize)) { |
| return false; |
| } |
| |
| const bool requiresUAV = storageUsage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer); |
| // Check supports for writeable storage usage: |
| if (requiresUAV) { |
| // D3D11 mappable buffers cannot be used as UAV natively. So avoid that. |
| return !(originalUsage & kMappableBufferUsages); |
| } |
| |
| // Read-only storage buffer cannot be mapped for read natively. Avoid that. |
| DAWN_ASSERT(storageUsage == kReadOnlyStorageBuffer); |
| return !(originalUsage & wgpu::BufferUsage::MapRead); |
| } |
| |
| // static |
| ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, |
| const UnpackedPtr<BufferDescriptor>& descriptor, |
| const ScopedCommandRecordingContext* commandContext, |
| bool allowUploadBufferEmulation) { |
| const auto actualUsage = |
| ComputeInternalBufferUsages(device, descriptor->usage, descriptor->size); |
| bool useUploadBuffer = allowUploadBufferEmulation; |
| useUploadBuffer &= CanUseCPUUploadBuffer(device, actualUsage, descriptor->size); |
| Ref<Buffer> buffer; |
| if (useUploadBuffer) { |
| buffer = AcquireRef(new UploadBuffer(device, descriptor)); |
| } else { |
| buffer = AcquireRef(new GPUUsableBuffer(device, descriptor)); |
| } |
| DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation, commandContext)); |
| return buffer; |
| } |
| |
| Buffer::Buffer(DeviceBase* device, |
| const UnpackedPtr<BufferDescriptor>& descriptor, |
| wgpu::BufferUsage internalMappableFlags, |
| wgpu::MapMode autoMapMode) |
| : BufferBase(device, descriptor), |
| mInternalMappableFlags(internalMappableFlags), |
| mAutoMapMode(autoMapMode) {} |
| |
| MaybeError Buffer::Initialize(bool mappedAtCreation, |
| const ScopedCommandRecordingContext* commandContext) { |
| // TODO(dawn:1705): handle mappedAtCreation for NonzeroClearResourcesOnCreationForTesting |
| |
| // Allocate at least 4 bytes so clamped accesses are always in bounds. |
| uint64_t size = std::max(GetSize(), uint64_t(4u)); |
| // The validation layer requires: |
| // ByteWidth must be 12 or larger to be used with D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS. |
| if (GetInternalUsage() & wgpu::BufferUsage::Indirect) { |
| size = std::max(size, uint64_t(12u)); |
| } |
| size_t alignment = D3D11BufferSizeAlignment(GetInternalUsage()); |
| // Check for overflow, bufferDescriptor.ByteWidth is a UINT. |
| if (size > std::numeric_limits<UINT>::max() - alignment) { |
| // Alignment would overlow. |
| return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); |
| } |
| mAllocatedSize = Align(size, alignment); |
| |
| DAWN_TRY(InitializeInternal()); |
| |
| SetLabelImpl(); |
| |
| // The buffers with mappedAtCreation == true will be initialized in |
| // BufferBase::MapAtCreation(). |
| if (!mappedAtCreation && |
| GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) { |
| auto scopedUseDuringCreation = UseInternal(); |
| if (commandContext) { |
| DAWN_TRY(ClearWholeBuffer(commandContext, 1u)); |
| } else { |
| auto tmpCommandContext = |
| ToBackend(GetDevice()->GetQueue()) |
| ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal); |
| DAWN_TRY(ClearWholeBuffer(&tmpCommandContext, 1u)); |
| } |
| } |
| |
| // Mark padding as cleared if there's no padding. |
| if (GetAllocatedSize() == GetSize()) { |
| mPaddingCleared = true; |
| } |
| |
| return {}; |
| } |
| |
| Buffer::~Buffer() = default; |
| |
| bool Buffer::IsCPUWritableAtCreation() const { |
| return IsCPUWritable(); |
| } |
| |
| bool Buffer::IsCPUWritable() const { |
| return mInternalMappableFlags & wgpu::BufferUsage::MapWrite; |
| } |
| |
| bool Buffer::IsCPUReadable() const { |
| return mInternalMappableFlags & wgpu::BufferUsage::MapRead; |
| } |
| |
| MaybeError Buffer::MapAtCreationImpl() { |
| DAWN_ASSERT(IsCPUWritableAtCreation()); |
| // Use Try variant to avoid blocking if the CommandContext lock is already held (e.g., by |
| // another thread or during Queue::Submit). MapAtCreation must return immediately with a |
| // mappable pointer, so if the lock isn't available, we fall back to temporary storage and |
| // defer the actual D3D11 buffer mapping until UnmapIfNeeded. At that point, the |
| // CommandContext will already be acquired (e.g., during TrackUsage before GPU submission), |
| // and we can safely map the real D3D11 buffer and transfer the temporary storage contents |
| // to it via memcpy. |
| std::optional<ScopedCommandRecordingContext> maybeCommandContext = |
| ToBackend(GetDevice()->GetQueue()) |
| ->TryGetScopedPendingCommandContext(QueueBase::SubmitMode::Normal); |
| if (maybeCommandContext.has_value()) { |
| return MapInternal(&maybeCommandContext.value(), mAutoMapMode | wgpu::MapMode::Write); |
| } |
| |
| // Lock could not be acquired, use temporary storage instead |
| mMapAtCreationData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize())); |
| mMappedData = mMapAtCreationData.get(); |
| return {}; |
| } |
| |
| MaybeError Buffer::MapInternal(const ScopedCommandRecordingContext* commandContext, |
| wgpu::MapMode mode) { |
| DAWN_UNREACHABLE(); |
| |
| return {}; |
| } |
| |
| void Buffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) { |
| DAWN_UNREACHABLE(); |
| } |
| |
| MaybeError Buffer::UnmapIfNeeded(const ScopedCommandRecordingContext* commandContext) { |
| if (mMappedData == nullptr) { |
| return {}; |
| } |
| |
| if (mMapAtCreationData) { |
| // We used temporary storage for MapAtCreation, now copy it to the actual buffer |
| mMappedData = nullptr; |
| ScopedMap scopedMap; |
| DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write)); |
| DAWN_ASSERT(scopedMap.GetMappedData()); |
| memcpy(scopedMap.GetMappedData(), mMapAtCreationData.get(), GetAllocatedSize()); |
| mMapAtCreationData.reset(); |
| return {}; |
| } |
| |
| UnmapInternal(commandContext); |
| return {}; |
| } |
| |
| MaybeError Buffer::TrackUsage(const ScopedCommandRecordingContext* commandContext, |
| ExecutionSerial pendingSerial) { |
| if (GetLastUsageSerial() == pendingSerial) { |
| return {}; |
| } |
| // We need to unmap buffer before it can be used in the queue. |
| DAWN_TRY(UnmapIfNeeded(commandContext)); |
| MarkUsedInPendingCommands(pendingSerial); |
| |
| // If automatic mapping is enabled, schedule the buffer to be re-mapped after GPU usage. |
| if (mAutoMapMode != wgpu::MapMode::None) { |
| mMapRequest.mode = mAutoMapMode; |
| ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, pendingSerial); |
| } |
| |
| return {}; |
| } |
| |
| MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) { |
| DAWN_ASSERT((mode == wgpu::MapMode::Write && IsCPUWritable()) || |
| (mode == wgpu::MapMode::Read && IsCPUReadable())); |
| |
| // With automatic mapping, the buffer will be actually mapped/unmapped at queue |
| // boundaries, so MapAsync is a no-op. |
| if (mAutoMapMode != wgpu::MapMode::None) { |
| // Lazily do the 1st map if the buffer is not used in any queue yet. |
| if (GetLastUsageSerial() == kBeginningOfGPUTime && !mMappedData) { |
| DAWN_TRY(MapAtCreationImpl()); |
| } |
| return {}; |
| } |
| |
| auto deviceGuard = GetDevice()->GetGuard(); |
| |
| const ExecutionSerial lastUsageSerial = GetLastUsageSerial(); |
| const ExecutionSerial completedSerial = GetDevice()->GetQueue()->GetCompletedCommandSerial(); |
| // We may run into map stall in case that the buffer is still being used by previous submitted |
| // commands. To avoid that, instead we ask Queue to do the map later when last usage serial has |
| // passed. |
| if (lastUsageSerial > completedSerial) { |
| mMapRequest.mode = mode; |
| ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, lastUsageSerial); |
| } else { |
| auto commandContext = ToBackend(GetDevice()->GetQueue()) |
| ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal); |
| DAWN_TRY(TryMapNow(&commandContext, completedSerial, mode)); |
| } |
| |
| return {}; |
| } |
| |
| // The difference between FinalizeMapImpl and TryMapNow is that: |
| // - FinalizeMapImpl() is triggered by front-end's serial control. |
| // - It's called after Queue::CheckAndUpdateCompletedSerials() and before user's mapping callback. |
| // - FinalizeMapImpl() will always be called for each MapAsync regardless of whether automatic |
| // mapping is enabled or not. |
| // - TryMapNow() is triggered by Queue::CheckAndUpdateCompletedSerials(). |
| // - If automatic mapping is disabled, it's scheduled for each MapAsync. |
| // - else, it's scheduled once after the queue finishes using the buffer. |
| MaybeError Buffer::TryMapNow(ScopedCommandRecordingContext* commandContext, |
| ExecutionSerial completedSerial, |
| wgpu::MapMode mode) { |
| // If the buffer was used again after the remap was scheduled, skip this remap. |
| // This should only happen with automatic mapping where the buffer is scheduled to be |
| // re-mapped, but then used again in a subsequent submit before the original serial completed. |
| if (completedSerial < GetLastUsageSerial()) { |
| DAWN_ASSERT(mAutoMapMode != wgpu::MapMode::None); |
| return {}; |
| } |
| |
| DAWN_ASSERT(GetDevice()->IsLockedByCurrentThreadIfNeeded()); |
| |
| // Trigger any deferred unmaps. |
| // TODO(crbug.com/345471009): Consider reuse the mapped pointer and skip mapping again if |
| // the previous map mode is the same as the current map mode. |
| DAWN_TRY(UnmapIfNeeded(commandContext)); |
| |
| // Map then initialize data using mapped pointer. |
| // The mapped pointer is always writable because: |
| // - If mode is Write, then it's already writable. |
| // - If mode is Read, it's only possible to map staging buffer. In that case, |
| // D3D11_MAP_READ_WRITE will be used, hence the mapped pointer will also be writable. |
| // TODO(dawn:1705): make sure the map call is not blocked by the GPU operations. |
| DAWN_TRY(MapInternal(commandContext, mode)); |
| |
| return {}; |
| } |
| |
| MaybeError Buffer::FinalizeMapImpl(BufferState newState) { |
| if (newState == BufferState::MappedAtCreation) { |
| return {}; |
| } |
| |
| DAWN_ASSERT(mMappedData); |
| |
| // Ensure data is initialized after a MapAsync event completes. |
| DAWN_TRY(EnsureDataInitialized(nullptr)); |
| |
| return {}; |
| } |
| |
| void Buffer::UnmapImpl(BufferState oldState, BufferState newState) { |
| DAWN_ASSERT(IsMappable(GetInternalUsage())); |
| |
| // With automatic mapping, the buffer stays mapped, so Unmap is a no-op. |
| if (mAutoMapMode != wgpu::MapMode::None) { |
| return; |
| } |
| |
| // Cancel any pending scheduled map. Note we don't cancel here if newState is Destroyed, since |
| // it should be handled in DestroyImpl instead. DestroyImpl knows whether the reason is early |
| // destroy or dtor, and can decide to call CancelScheduledBufferMapping accordingly. |
| if (newState != BufferState::Destroyed) { |
| ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this); |
| } |
| |
| // The actual unmap will be deferred until the buffer is used by the queue or we need to map |
| // again. This avoids the need to lock the CommandContext here just to call D3D11's Unmap |
| // function, and instead defers the call to a moment where the CommandContext is already |
| // acquired. |
| } |
| |
| void* Buffer::GetMappedPointerImpl() { |
| // The frontend asks that the pointer returned is from the start of the resource |
| // irrespective of the offset passed in MapAsyncImpl, which is what mMappedData is. |
| return mMappedData; |
| } |
| |
| void Buffer::DestroyImpl(DestroyReason reason) { |
| // TODO(crbug.com/dawn/831): DestroyImpl is called from two places. |
| // - It may be called if the buffer is explicitly destroyed with APIDestroy. |
| // This case is NOT thread-safe and needs proper synchronization with other |
| // simultaneous uses of the buffer. |
| // - It may be called when the last ref to the buffer is dropped and the buffer |
| // is implicitly destroyed. This case is thread-safe because there are no |
| // other threads using the buffer since there are no other live refs. |
| BufferBase::DestroyImpl(reason); |
| |
| // Cancel any pending map schedule. Even though front-end guarantees that Destroy() cannot run |
| // in parallel with Queue operations, it doesn't do the same for Device::Tick(), |
| // Instance::ProcessEvents() or WaitAny(). Thus a scheduled map triggered by those functions |
| // would race with Destroy() if we don't do a cancel here. |
| if (reason != DestroyReason::CppDestructor && IsMappable(GetInternalUsage())) { |
| ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this); |
| } |
| |
| // If buffer is still mapped, we need to unmap it before releasing the D3D11 resource. If we |
| // don't do that, there might be some issues on certain drivers such as Intel's. |
| // Note: The front-end guarantees that DestroyImpl cannot run concurrently with MapAsync, |
| // UnmapImpl, or Queue operations, so accessing mMappedData here is safe. Additionally, since |
| // no Queue operation can use this buffer anymore, it won't be scheduled for a remap after a |
| // cancel above. |
| if (mMappedData != nullptr && !mMapAtCreationData) { |
| // We don't need to unmap if the mapping was done on a shadow copy because no real |
| // buffer is mapped yet. |
| ToBackend(GetDevice())->DeferUnmapDestroyedBuffer(GetD3D11MappedBuffer()); |
| mMappedData = nullptr; |
| } |
| } |
| |
| std::optional<DeviceGuard> Buffer::UseDeviceGuardForDestroy() { |
| return std::nullopt; |
| } |
| |
| MaybeError Buffer::EnsureDataInitialized(const ScopedCommandRecordingContext* commandContext) { |
| // Clear padding on first use, regardless of initialization state. |
| DAWN_TRY(EnsurePaddingInitialized(commandContext)); |
| |
| if (!NeedsInitialization()) { |
| return {}; |
| } |
| |
| DAWN_TRY(InitializeToZero(commandContext)); |
| return {}; |
| } |
| |
| MaybeError Buffer::EnsureDataInitializedAsDestination( |
| const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| uint64_t size) { |
| // Clear padding on first use as destination, regardless of initialization state. |
| DAWN_TRY(EnsurePaddingInitialized(commandContext)); |
| |
| if (!NeedsInitialization()) { |
| return {}; |
| } |
| |
| if (IsFullBufferRange(offset, size)) { |
| SetInitialized(true); |
| return {}; |
| } |
| |
| DAWN_TRY(InitializeToZero(commandContext)); |
| return {}; |
| } |
| |
| MaybeError Buffer::EnsureDataInitializedAsDestination( |
| const ScopedCommandRecordingContext* commandContext, |
| const CopyTextureToBufferCmd* copy) { |
| // Clear padding on first use as destination, regardless of initialization state. |
| DAWN_TRY(EnsurePaddingInitialized(commandContext)); |
| |
| if (!NeedsInitialization()) { |
| return {}; |
| } |
| |
| if (IsFullBufferOverwrittenInTextureToBufferCopy(copy)) { |
| SetInitialized(true); |
| } else { |
| DAWN_TRY(InitializeToZero(commandContext)); |
| } |
| |
| return {}; |
| } |
| |
| MaybeError Buffer::InitializeToZero(const ScopedCommandRecordingContext* commandContext) { |
| DAWN_ASSERT(NeedsInitialization()); |
| |
| DAWN_TRY(ClearWholeBuffer(commandContext, uint8_t(0u))); |
| SetInitialized(true); |
| GetDevice()->IncrementLazyClearCountForTesting(); |
| |
| return {}; |
| } |
| |
| MaybeError Buffer::PredicatedClear(const ScopedSwapStateCommandRecordingContext* commandContext, |
| ID3D11Predicate* predicate, |
| uint8_t clearValue, |
| uint64_t offset, |
| uint64_t size) { |
| DAWN_UNREACHABLE(); |
| return {}; |
| } |
| |
| MaybeError Buffer::Clear(const ScopedCommandRecordingContext* commandContext, |
| uint8_t clearValue, |
| uint64_t offset, |
| uint64_t size) { |
| DAWN_ASSERT(!mMappedData); |
| |
| if (size == 0) { |
| return {}; |
| } |
| |
| // For non-staging buffers, we can use UpdateSubresource to write the data. |
| DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size)); |
| |
| return ClearInternal(commandContext, clearValue, offset, size); |
| } |
| |
| MaybeError Buffer::ClearWholeBuffer(const ScopedCommandRecordingContext* commandContext, |
| uint8_t clearValue) { |
| return ClearInternal(commandContext, clearValue, 0, GetAllocatedSize()); |
| } |
| |
| MaybeError Buffer::ClearInternal(const ScopedCommandRecordingContext* commandContext, |
| uint8_t clearValue, |
| uint64_t offset, |
| uint64_t size) { |
| DAWN_ASSERT(size != 0); |
| |
| // TODO(dawn:1705): use a reusable zero staging buffer to clear the buffer to avoid this CPU to |
| // GPU copy. |
| std::vector<uint8_t> clearData(size, clearValue); |
| return WriteInternal(commandContext, offset, clearData.data(), size, |
| /*isInitialWrite=*/true); |
| } |
| |
| MaybeError Buffer::EnsurePaddingInitialized(const ScopedCommandRecordingContext* commandContext) { |
| if (mPaddingCleared) [[likely]] { |
| return {}; |
| } |
| DAWN_TRY(ClearPaddingInternal(commandContext)); |
| mPaddingCleared = true; |
| return {}; |
| } |
| |
| MaybeError Buffer::ClearPaddingInternal(const ScopedCommandRecordingContext* commandContext) { |
| uint32_t paddingBytes = GetAllocatedSize() - GetSize(); |
| if (paddingBytes == 0) { |
| return {}; |
| } |
| uint32_t clearSize = paddingBytes; |
| uint64_t clearOffset = GetSize(); |
| DAWN_TRY(ClearInternal(commandContext, 0, clearOffset, clearSize)); |
| |
| return {}; |
| } |
| |
| ComPtr<ID3D11Buffer> Buffer::GetD3D11MappedBuffer() { |
| return nullptr; |
| } |
| |
| MaybeError Buffer::Write(const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| const void* data, |
| size_t size) { |
| DAWN_ASSERT(size != 0); |
| |
| // For non-staging buffers, we can use UpdateSubresource to write the data. |
| DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size)); |
| |
| return WriteInternal(commandContext, offset, data, size, /*isInitialWrite=*/false); |
| } |
| |
| // static |
| MaybeError Buffer::Copy(const ScopedCommandRecordingContext* commandContext, |
| Buffer* source, |
| uint64_t sourceOffset, |
| size_t size, |
| Buffer* destination, |
| uint64_t destinationOffset) { |
| DAWN_ASSERT(size != 0); |
| |
| DAWN_TRY(source->EnsureDataInitialized(commandContext)); |
| DAWN_TRY( |
| destination->EnsureDataInitializedAsDestination(commandContext, destinationOffset, size)); |
| return source->CopyToInternal(commandContext, sourceOffset, size, destination, |
| destinationOffset); |
| } |
| |
| ResultOrError<Buffer::ScopedMap> Buffer::ScopedMap::Create( |
| const ScopedCommandRecordingContext* commandContext, |
| Buffer* buffer, |
| wgpu::MapMode mode) { |
| if (mode == wgpu::MapMode::Write && !buffer->IsCPUWritable()) { |
| return ScopedMap(); |
| } |
| if (mode == wgpu::MapMode::Read && !buffer->IsCPUReadable()) { |
| return ScopedMap(); |
| } |
| |
| if (buffer->mMappedData) { |
| return ScopedMap(commandContext, buffer, /*needsUnmap=*/false); |
| } |
| |
| DAWN_TRY(buffer->MapInternal(commandContext, mode)); |
| return ScopedMap(commandContext, buffer, /*needsUnmap=*/true); |
| } |
| |
| // ScopedMap |
| Buffer::ScopedMap::ScopedMap() = default; |
| |
| Buffer::ScopedMap::ScopedMap(const ScopedCommandRecordingContext* commandContext, |
| Buffer* buffer, |
| bool needsUnmap) |
| : mCommandContext(commandContext), mBuffer(buffer), mNeedsUnmap(needsUnmap) {} |
| |
| Buffer::ScopedMap::~ScopedMap() { |
| Reset(); |
| } |
| |
| Buffer::ScopedMap::ScopedMap(Buffer::ScopedMap&& other) { |
| this->operator=(std::move(other)); |
| } |
| |
| Buffer::ScopedMap& Buffer::ScopedMap::operator=(Buffer::ScopedMap&& other) { |
| Reset(); |
| mCommandContext = other.mCommandContext; |
| mBuffer = other.mBuffer; |
| mNeedsUnmap = other.mNeedsUnmap; |
| other.mBuffer = nullptr; |
| other.mNeedsUnmap = false; |
| return *this; |
| } |
| |
| void Buffer::ScopedMap::Reset() { |
| if (mNeedsUnmap) { |
| mBuffer->UnmapInternal(mCommandContext); |
| } |
| mCommandContext = nullptr; |
| mBuffer = nullptr; |
| mNeedsUnmap = false; |
| } |
| |
| uint8_t* Buffer::ScopedMap::GetMappedData() const { |
| return mBuffer ? static_cast<uint8_t*>(mBuffer->mMappedData) : nullptr; |
| } |
| |
| // GPUUsableBuffer::Storage |
| class GPUUsableBuffer::Storage : public RefCounted, NonCopyable { |
| public: |
| explicit Storage(ComPtr<ID3D11Buffer> d3d11Buffer) : mD3d11Buffer(std::move(d3d11Buffer)) { |
| D3D11_BUFFER_DESC desc; |
| mD3d11Buffer->GetDesc(&desc); |
| mD3d11Usage = desc.Usage; |
| |
| mMappableCopyableFlags = wgpu::BufferUsage::CopySrc; |
| |
| switch (mD3d11Usage) { |
| case D3D11_USAGE_STAGING: |
| mMappableCopyableFlags |= kMappableBufferUsages | wgpu::BufferUsage::CopyDst; |
| break; |
| case D3D11_USAGE_DYNAMIC: |
| mMappableCopyableFlags |= wgpu::BufferUsage::MapWrite; |
| break; |
| case D3D11_USAGE_DEFAULT: |
| mMappableCopyableFlags |= wgpu::BufferUsage::CopyDst; |
| break; |
| default: |
| break; |
| } |
| |
| mIsConstantBuffer = desc.BindFlags & D3D11_BIND_CONSTANT_BUFFER; |
| } |
| |
| ID3D11Buffer* GetD3D11Buffer() { return mD3d11Buffer.Get(); } |
| |
| uint64_t GetRevision() const { return mRevision; } |
| void SetRevision(uint64_t revision) { mRevision = revision; } |
| bool IsFirstRevision() const { return mRevision == 0; } |
| |
| bool IsConstantBuffer() const { return mIsConstantBuffer; } |
| |
| bool IsCPUWritable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapWrite; } |
| bool IsCPUReadable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapRead; } |
| bool IsDynamic() const { return mD3d11Usage == D3D11_USAGE_DYNAMIC; } |
| bool SupportsCopyDst() const { return mMappableCopyableFlags & wgpu::BufferUsage::CopyDst; } |
| bool IsGPUWritable() const { return mD3d11Usage == D3D11_USAGE_DEFAULT; } |
| |
| private: |
| ComPtr<ID3D11Buffer> mD3d11Buffer; |
| uint64_t mRevision = 0; |
| D3D11_USAGE mD3d11Usage; |
| bool mIsConstantBuffer = false; |
| wgpu::BufferUsage mMappableCopyableFlags; |
| }; |
| |
| // GPUUsableBuffer |
| GPUUsableBuffer::GPUUsableBuffer(DeviceBase* device, |
| const UnpackedPtr<BufferDescriptor>& descriptor) |
| : Buffer( |
| device, |
| descriptor, |
| /*internalMappableFlags=*/ |
| [](const UnpackedPtr<BufferDescriptor>& descriptor) { |
| wgpu::BufferUsage mappableFlags = descriptor->usage & kMappableBufferUsages; |
| if (descriptor->usage & wgpu::BufferUsage::MapRead) { |
| // Staging buffer can be both mapped read & write. |
| mappableFlags |= wgpu::BufferUsage::MapWrite; |
| } |
| return mappableFlags; |
| }(descriptor), |
| /*autoMapMode=*/ |
| GetAutoMapMode(device, descriptor->usage)) {} |
| |
| GPUUsableBuffer::~GPUUsableBuffer() = default; |
| |
| void GPUUsableBuffer::DestroyImpl(DestroyReason reason) { |
| // TODO(crbug.com/dawn/831): DestroyImpl is called from two places. |
| // - It may be called if the buffer is explicitly destroyed with APIDestroy. |
| // This case is NOT thread-safe and needs proper synchronization with other |
| // simultaneous uses of the buffer. |
| // - It may be called when the last ref to the buffer is dropped and the buffer |
| // is implicitly destroyed. This case is thread-safe because there are no |
| // other threads using the buffer since there are no other live refs. |
| Buffer::DestroyImpl(reason); |
| |
| mSRVCache.clear(); |
| mUAVCache.clear(); |
| |
| mLastUpdatedStorage = nullptr; |
| mMappableStorage = nullptr; |
| |
| mStorages = {}; |
| } |
| |
| void GPUUsableBuffer::SetLabelImpl() { |
| for (auto ite = mStorages.begin(); ite != mStorages.end(); ++ite) { |
| auto storageType = static_cast<StorageType>(std::distance(mStorages.begin(), ite)); |
| SetStorageLabel(storageType); |
| } |
| } |
| |
| void GPUUsableBuffer::SetStorageLabel(StorageType storageType) { |
| static constexpr ityp::array<GPUUsableBuffer::StorageType, const char*, |
| static_cast<uint8_t>(StorageType::Count)> |
| kStorageTypeStrings = { |
| "Dawn_CPUWritableConstantBuffer", |
| "Dawn_GPUCopyDstConstantBuffer", |
| "Dawn_CPUWritableNonConstantBuffer", |
| "Dawn_GPUWritableNonConstantBuffer", |
| "Dawn_Staging", |
| }; |
| |
| if (!mStorages[storageType]) { |
| return; |
| } |
| |
| SetDebugName(ToBackend(GetDevice()), mStorages[storageType]->GetD3D11Buffer(), |
| kStorageTypeStrings[storageType], GetLabel()); |
| } |
| |
| MaybeError GPUUsableBuffer::InitializeInternal() { |
| mStorages = {}; |
| |
| wgpu::BufferUsage usagesToHandle = GetInternalUsage(); |
| |
| // If the buffer has MapRead usage, allocate a staging storage for both MapRead & MapWrite. |
| // Example: If user creates a buffer with MapWrite + MapRead + Vertex usages: |
| // - We will allocate: |
| // - A storage for both MapWrite + MapRead. |
| // - A storage for Vertex usage. |
| // - This will require a copy for vertex input whenever the user maps the buffer (either for |
| // writing or reading). |
| // - This is acceptable since a buffer with both MapWrite + MapRead is rare. |
| if ((usagesToHandle & wgpu::BufferUsage::MapRead) || IsStaging(usagesToHandle)) { |
| DAWN_TRY_ASSIGN(mLastUpdatedStorage, GetOrCreateStorage(StorageType::Staging)); |
| mMappableStorage = mLastUpdatedStorage; |
| usagesToHandle &= ~kStagingUsages; |
| } |
| |
| // We need to create a separate storage for uniform usage, because D3D11 doesn't allow constant |
| // buffer to be used for other purposes. |
| if (usagesToHandle & wgpu::BufferUsage::Uniform) { |
| usagesToHandle &= |
| ~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer); |
| |
| // Since D3D11 doesn't allow both CPU & GPU to write to a buffer, we need separate |
| // storages for CPU writes and GPU writes. |
| if (usagesToHandle & wgpu::BufferUsage::MapWrite) { |
| // Note: we favor CPU write over GPU write if MapWrite is present. If buffer has GPU |
| // writable usages, the GPU writable storage will be lazily created later. |
| usagesToHandle &= ~wgpu::BufferUsage::MapWrite; |
| DAWN_TRY_ASSIGN(mLastUpdatedStorage, |
| GetOrCreateStorage(StorageType::CPUWritableConstantBuffer)); |
| mMappableStorage = mLastUpdatedStorage; |
| } else { |
| // For constant buffer, the only supported GPU op is copy. So create one storage for |
| // that. |
| usagesToHandle &= ~wgpu::BufferUsage::CopyDst; |
| DAWN_TRY_ASSIGN(mLastUpdatedStorage, |
| GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer)); |
| } |
| } |
| |
| if (usagesToHandle == wgpu::BufferUsage::None) { |
| return {}; |
| } |
| |
| // Create separate storage for non-constant buffer usages if required. |
| if (usagesToHandle & wgpu::BufferUsage::MapWrite) { |
| // Note: we only need one CPU writable storage. If there are both const buffer and |
| // non-const buffer usages, we favor CPU writable const buffer first. Since that's most |
| // likely the common use case where users want to update const buffer on CPU. |
| DAWN_ASSERT(mMappableStorage == nullptr); |
| usagesToHandle &= ~wgpu::BufferUsage::MapWrite; |
| // If a buffer is created with both Storage and MapWrite usages, then |
| // we will lazily create a GPU writable storage later. Note: we favor CPU writable |
| // over GPU writable when creating non-constant buffer storage. This is to optimize |
| // the most common cases where MapWrite buffers are mostly updated by CPU. |
| DAWN_TRY_ASSIGN(mLastUpdatedStorage, |
| GetOrCreateStorage(StorageType::CPUWritableNonConstantBuffer)); |
| mMappableStorage = mLastUpdatedStorage; |
| } else { |
| usagesToHandle &= ~wgpu::BufferUsage::CopyDst; |
| DAWN_TRY_ASSIGN(mLastUpdatedStorage, |
| GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer)); |
| } |
| |
| return {}; |
| } |
| |
| ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateStorage( |
| StorageType storageType) { |
| if (mStorages[storageType]) { |
| return mStorages[storageType].Get(); |
| } |
| D3D11_BUFFER_DESC bufferDescriptor; |
| bufferDescriptor.ByteWidth = GetAllocatedSize(); |
| bufferDescriptor.StructureByteStride = 0; |
| |
| switch (storageType) { |
| case StorageType::CPUWritableConstantBuffer: |
| bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC; |
| bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; |
| bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER; |
| bufferDescriptor.MiscFlags = 0; |
| break; |
| case StorageType::GPUCopyDstConstantBuffer: |
| bufferDescriptor.Usage = D3D11_USAGE_DEFAULT; |
| bufferDescriptor.CPUAccessFlags = 0; |
| bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER; |
| bufferDescriptor.MiscFlags = 0; |
| break; |
| case StorageType::CPUWritableNonConstantBuffer: { |
| // Need to exclude GPU writable usages because CPU writable buffer is not GPU writable |
| // in D3D11. |
| auto nonUniformUsage = |
| GetInternalUsage() & ~(kD3D11GPUWriteUsages | wgpu::BufferUsage::Uniform); |
| bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC; |
| bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; |
| bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage); |
| bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage); |
| if (bufferDescriptor.BindFlags == 0) { |
| // Dynamic buffer requires at least one binding flag. If no binding flag is needed |
| // (one example is MapWrite | QueryResolve), then use D3D11_BIND_INDEX_BUFFER. |
| bufferDescriptor.BindFlags = D3D11_BIND_INDEX_BUFFER; |
| DAWN_ASSERT(bufferDescriptor.MiscFlags == 0); |
| } |
| } break; |
| case StorageType::GPUWritableNonConstantBuffer: { |
| // Need to exclude mapping usages. |
| const auto nonUniformUsage = |
| GetInternalUsage() & ~(kMappableBufferUsages | wgpu::BufferUsage::Uniform); |
| bufferDescriptor.Usage = D3D11_USAGE_DEFAULT; |
| bufferDescriptor.CPUAccessFlags = 0; |
| bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage); |
| bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage); |
| } break; |
| case StorageType::Staging: { |
| bufferDescriptor.Usage = D3D11_USAGE_STAGING; |
| bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; |
| bufferDescriptor.BindFlags = 0; |
| bufferDescriptor.MiscFlags = 0; |
| } break; |
| case StorageType::Count: |
| DAWN_UNREACHABLE(); |
| } |
| |
| ComPtr<ID3D11Buffer> buffer; |
| DAWN_TRY(CheckOutOfMemoryHRESULT( |
| ToBackend(GetDevice())->GetD3D11Device()->CreateBuffer(&bufferDescriptor, nullptr, &buffer), |
| "ID3D11Device::CreateBuffer")); |
| |
| mStorages[storageType] = AcquireRef(new Storage(std::move(buffer))); |
| |
| SetStorageLabel(storageType); |
| |
| return mStorages[storageType].Get(); |
| } |
| |
| ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateDstCopyableStorage() { |
| if (mStorages[StorageType::GPUCopyDstConstantBuffer]) { |
| return mStorages[StorageType::GPUCopyDstConstantBuffer].Get(); |
| } |
| if (mStorages[StorageType::GPUWritableNonConstantBuffer]) { |
| return mStorages[StorageType::GPUWritableNonConstantBuffer].Get(); |
| } |
| |
| if (GetInternalUsage() & wgpu::BufferUsage::Uniform) { |
| return GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer); |
| } |
| |
| return GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer); |
| } |
| |
| MaybeError GPUUsableBuffer::SyncStorage(const ScopedCommandRecordingContext* commandContext, |
| Storage* dstStorage) { |
| DAWN_ASSERT(mLastUpdatedStorage); |
| DAWN_ASSERT(dstStorage); |
| if (mLastUpdatedStorage->GetRevision() == dstStorage->GetRevision()) { |
| return {}; |
| } |
| |
| DAWN_ASSERT(commandContext); |
| |
| // Must not have pending unmap. |
| DAWN_CHECK(!mMappedData); |
| |
| if (dstStorage->SupportsCopyDst()) { |
| commandContext->CopyResource(dstStorage->GetD3D11Buffer(), |
| mLastUpdatedStorage->GetD3D11Buffer()); |
| dstStorage->SetRevision(mLastUpdatedStorage->GetRevision()); |
| return {}; |
| } |
| |
| // TODO(42241146): This is a slow path. It's usually used by uncommon use cases: |
| // - GPU writes a CPU writable buffer. |
| DAWN_ASSERT(dstStorage->IsCPUWritable()); |
| Storage* stagingStorage; |
| DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging)); |
| DAWN_TRY(SyncStorage(commandContext, stagingStorage)); |
| D3D11_MAPPED_SUBRESOURCE mappedSrcResource; |
| DAWN_TRY(CheckHRESULT(commandContext->Map(stagingStorage->GetD3D11Buffer(), |
| /*Subresource=*/0, D3D11_MAP_READ, |
| /*MapFlags=*/0, &mappedSrcResource), |
| "ID3D11DeviceContext::Map src")); |
| |
| auto MapAndCopy = [](const ScopedCommandRecordingContext* commandContext, ID3D11Buffer* dst, |
| const void* srcData, size_t size) -> MaybeError { |
| D3D11_MAPPED_SUBRESOURCE mappedDstResource; |
| DAWN_TRY(CheckHRESULT(commandContext->Map(dst, |
| /*Subresource=*/0, D3D11_MAP_WRITE_DISCARD, |
| /*MapFlags=*/0, &mappedDstResource), |
| "ID3D11DeviceContext::Map dst")); |
| memcpy(mappedDstResource.pData, srcData, size); |
| commandContext->Unmap(dst, |
| /*Subresource=*/0); |
| return {}; |
| }; |
| |
| auto result = MapAndCopy(commandContext, dstStorage->GetD3D11Buffer(), mappedSrcResource.pData, |
| GetAllocatedSize()); |
| |
| commandContext->Unmap(stagingStorage->GetD3D11Buffer(), |
| /*Subresource=*/0); |
| |
| if (result.IsError()) { |
| return result; |
| } |
| |
| dstStorage->SetRevision(mLastUpdatedStorage->GetRevision()); |
| |
| return {}; |
| } |
| |
| void GPUUsableBuffer::IncrStorageRevAndMakeLatest( |
| const ScopedCommandRecordingContext* commandContext, |
| Storage* dstStorage) { |
| DAWN_ASSERT(dstStorage->GetRevision() == mLastUpdatedStorage->GetRevision()); |
| dstStorage->SetRevision(dstStorage->GetRevision() + 1); |
| mLastUpdatedStorage = dstStorage; |
| |
| if (dstStorage->IsGPUWritable() && IsMappable(GetInternalUsage())) { |
| // If this buffer is mappable and the last updated storage is GPU writable, we need to |
| // update the staging storage when the command buffer is flushed. |
| // This is to make sure the staging storage will contain the up-to-date GPU modified data. |
| commandContext->AddBufferForSyncingWithCPU(this); |
| } |
| } |
| |
| MaybeError GPUUsableBuffer::SyncGPUWritesToStaging( |
| const ScopedCommandRecordingContext* commandContext) { |
| DAWN_ASSERT(IsMappable(GetInternalUsage())); |
| |
| // Only sync staging storage. Later other CPU writable storages can be updated by |
| // copying from staging storage with Map(MAP_WRITE_DISCARD) which won't stall the CPU. |
| // Otherwise, since CPU writable storages don't support CopyDst, it would require a CPU |
| // stall in order to sync them here. |
| Storage* stagingStorage; |
| DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging)); |
| |
| return SyncStorage(commandContext, stagingStorage); |
| } |
| |
| MaybeError GPUUsableBuffer::MapInternal(const ScopedCommandRecordingContext* commandContext, |
| wgpu::MapMode mode) { |
| DAWN_ASSERT(!mMappedData); |
| DAWN_ASSERT(mMappableStorage); |
| |
| if (mMappableStorage->IsDynamic()) { |
| DAWN_ASSERT(mode == wgpu::MapMode::Write); |
| // Dynamic buffer can only use D3D11_MAP_WRITE_NO_OVERWRITE |
| mD3DMapTypeUsed = D3D11_MAP_WRITE_NO_OVERWRITE; |
| } else { |
| if (NeedsInitialization() || mode == (wgpu::MapMode::Read | wgpu::MapMode::Write)) { |
| // Map buffer with D3D11_MAP_READ_WRITE because we need write permission to initialize |
| // the buffer. |
| // TODO(dawn:1705): investigate the performance impact of mapping with |
| // D3D11_MAP_READ_WRITE. |
| mD3DMapTypeUsed = D3D11_MAP_READ_WRITE; |
| } else { |
| if (mode & wgpu::MapMode::Read) { |
| mD3DMapTypeUsed = D3D11_MAP_READ; |
| } else { |
| mD3DMapTypeUsed = D3D11_MAP_WRITE; |
| } |
| } |
| } |
| |
| // Sync previously modified content before mapping. |
| DAWN_TRY(SyncStorage(commandContext, mMappableStorage)); |
| |
| D3D11_MAPPED_SUBRESOURCE mappedResource; |
| DAWN_TRY(CheckHRESULT(commandContext->Map(mMappableStorage->GetD3D11Buffer(), |
| /*Subresource=*/0, mD3DMapTypeUsed, |
| /*MapFlags=*/0, &mappedResource), |
| "ID3D11DeviceContext::Map")); |
| mMappedData = static_cast<uint8_t*>(mappedResource.pData); |
| |
| return {}; |
| } |
| |
| void GPUUsableBuffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) { |
| DAWN_ASSERT(mMappedData); |
| DAWN_ASSERT(mMappableStorage); |
| commandContext->Unmap(mMappableStorage->GetD3D11Buffer(), |
| /*Subresource=*/0); |
| mMappedData = nullptr; |
| // Only increment revision if the buffer was mapped for writing. |
| if (mD3DMapTypeUsed != D3D11_MAP_READ) { |
| IncrStorageRevAndMakeLatest(commandContext, mMappableStorage); |
| } |
| } |
| |
| ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11ConstantBuffer( |
| const ScopedCommandRecordingContext* commandContext) { |
| auto* storage = mStorages[StorageType::CPUWritableConstantBuffer].Get(); |
| if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) { |
| // The CPUWritableConstantBuffer is up to date, use it directly. |
| return storage->GetD3D11Buffer(); |
| } |
| |
| // In all other cases we are going to use the GPUCopyDstConstantBuffer because, either it is up |
| // to date, or we need to update the ConstantBuffer data and doing a CopyResource on the GPU is |
| // always more efficient than paths involving a memcpy (or potentially a stall). |
| DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer)); |
| DAWN_TRY(SyncStorage(commandContext, storage)); |
| return storage->GetD3D11Buffer(); |
| } |
| |
| ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11NonConstantBuffer( |
| const ScopedCommandRecordingContext* commandContext) { |
| auto* storage = mStorages[StorageType::CPUWritableNonConstantBuffer].Get(); |
| if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) { |
| // The CPUWritableNonConstantBuffer is up to date, use it directly. |
| return storage->GetD3D11Buffer(); |
| } |
| |
| // In all other cases we are going to use the GPUWritableNonConstantBuffe because, either it is |
| // up to date, or we need to update the non-ConstantBuffer data and doing a CopyResource on the |
| // GPU is always more efficient than paths involving a memcpy (or potentially a stall). |
| DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer)); |
| DAWN_TRY(SyncStorage(commandContext, storage)); |
| return storage->GetD3D11Buffer(); |
| } |
| |
| ID3D11Buffer* GPUUsableBuffer::GetD3D11ConstantBufferForTesting() { |
| if (!mStorages[StorageType::CPUWritableConstantBuffer] && |
| !mStorages[StorageType::GPUCopyDstConstantBuffer]) { |
| return nullptr; |
| } |
| auto tempCommandContext = ToBackend(GetDevice()->GetQueue()) |
| ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal); |
| ID3D11Buffer* buffer; |
| if (GetDevice()->ConsumedError(GetD3D11ConstantBuffer(&tempCommandContext), &buffer)) { |
| return nullptr; |
| } |
| |
| return buffer; |
| } |
| |
| ID3D11Buffer* GPUUsableBuffer::GetD3D11NonConstantBufferForTesting() { |
| if (!mStorages[StorageType::CPUWritableNonConstantBuffer] && |
| !mStorages[StorageType::GPUWritableNonConstantBuffer]) { |
| return nullptr; |
| } |
| auto tempCommandContext = ToBackend(GetDevice()->GetQueue()) |
| ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal); |
| ID3D11Buffer* buffer; |
| if (GetDevice()->ConsumedError(GetD3D11NonConstantBuffer(&tempCommandContext), &buffer)) { |
| return nullptr; |
| } |
| |
| return buffer; |
| } |
| |
| ResultOrError<ComPtr<ID3D11ShaderResourceView>> |
| GPUUsableBuffer::CreateD3D11ShaderResourceViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer, |
| uint64_t offset, |
| uint64_t originalSize) { |
| uint64_t size = Align(originalSize, 4); |
| DAWN_ASSERT(IsAligned(offset, 4u)); |
| DAWN_ASSERT(size <= GetAllocatedSize()); |
| UINT firstElement = static_cast<UINT>(offset / 4); |
| UINT numElements = static_cast<UINT>(size / 4); |
| |
| D3D11_SHADER_RESOURCE_VIEW_DESC desc; |
| desc.Format = DXGI_FORMAT_R32_TYPELESS; |
| desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; |
| desc.BufferEx.FirstElement = firstElement; |
| desc.BufferEx.NumElements = numElements; |
| desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; |
| ComPtr<ID3D11ShaderResourceView> srv; |
| DAWN_TRY(CheckHRESULT(ToBackend(GetDevice()) |
| ->GetD3D11Device() |
| ->CreateShaderResourceView(d3d11Buffer, &desc, &srv), |
| "ShaderResourceView creation")); |
| |
| return std::move(srv); |
| } |
| |
| ResultOrError<ComPtr<ID3D11UnorderedAccessView1>> |
| GPUUsableBuffer::CreateD3D11UnorderedAccessViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer, |
| uint64_t offset, |
| uint64_t originalSize) { |
| uint64_t size = Align(originalSize, 4); |
| DAWN_ASSERT(IsAligned(offset, 4u)); |
| DAWN_ASSERT(size <= GetAllocatedSize()); |
| |
| UINT firstElement = static_cast<UINT>(offset / 4); |
| UINT numElements = static_cast<UINT>(size / 4); |
| |
| D3D11_UNORDERED_ACCESS_VIEW_DESC1 desc; |
| desc.Format = DXGI_FORMAT_R32_TYPELESS; |
| desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; |
| desc.Buffer.FirstElement = firstElement; |
| desc.Buffer.NumElements = numElements; |
| desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; |
| |
| ComPtr<ID3D11UnorderedAccessView1> uav; |
| DAWN_TRY(CheckHRESULT(ToBackend(GetDevice()) |
| ->GetD3D11Device3() |
| ->CreateUnorderedAccessView1(d3d11Buffer, &desc, &uav), |
| "UnorderedAccessView creation")); |
| |
| return std::move(uav); |
| } |
| |
| ResultOrError<ComPtr<ID3D11ShaderResourceView>> GPUUsableBuffer::UseAsSRV( |
| const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| uint64_t size) { |
| ID3D11Buffer* d3dBuffer; |
| |
| DAWN_TRY_ASSIGN(d3dBuffer, GetD3D11NonConstantBuffer(commandContext)); |
| |
| auto key = std::make_tuple(d3dBuffer, offset, size); |
| auto ite = mSRVCache.find(key); |
| if (ite != mSRVCache.end()) { |
| return ite->second; |
| } |
| |
| ComPtr<ID3D11ShaderResourceView> srv; |
| DAWN_TRY_ASSIGN(srv, CreateD3D11ShaderResourceViewFromD3DBuffer(d3dBuffer, offset, size)); |
| |
| mSRVCache[key] = srv; |
| |
| return std::move(srv); |
| } |
| |
| ResultOrError<ComPtr<ID3D11UnorderedAccessView>> GPUUsableBuffer::UseAsUAV( |
| const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| uint64_t size) { |
| Storage* storage = nullptr; |
| DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer)); |
| DAWN_TRY(SyncStorage(commandContext, storage)); |
| |
| ComPtr<ID3D11UnorderedAccessView1> uav; |
| { |
| auto key = std::make_tuple(storage->GetD3D11Buffer(), offset, size); |
| auto ite = mUAVCache.find(key); |
| if (ite != mUAVCache.end()) { |
| uav = ite->second; |
| } else { |
| DAWN_TRY_ASSIGN(uav, CreateD3D11UnorderedAccessViewFromD3DBuffer( |
| storage->GetD3D11Buffer(), offset, size)); |
| mUAVCache[key] = uav; |
| } |
| } |
| |
| // Since UAV will modify the storage's content, increment its revision. |
| IncrStorageRevAndMakeLatest(commandContext, storage); |
| |
| return ComPtr<ID3D11UnorderedAccessView>(std::move(uav)); |
| } |
| |
| MaybeError GPUUsableBuffer::UpdateD3D11ConstantBuffer( |
| const ScopedCommandRecordingContext* commandContext, |
| ID3D11Buffer* d3d11Buffer, |
| bool firstTimeUpdate, |
| uint64_t offset, |
| const void* data, |
| size_t size) { |
| DAWN_ASSERT(size > 0); |
| |
| // For a full size write, UpdateSubresource1(D3D11_COPY_DISCARD) can be used to update |
| // constant buffer. |
| // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write |
| // as well. |
| const bool fullSizeUpdate = size >= GetSize() && offset == 0; |
| const bool canPartialUpdate = |
| ToBackend(GetDevice())->GetDeviceInfo().supportsPartialConstantBufferUpdate; |
| if (fullSizeUpdate || firstTimeUpdate) { |
| const bool requiresFullAllocatedSizeWrite = !canPartialUpdate && !firstTimeUpdate; |
| |
| // Offset and size must be aligned with 16 for using UpdateSubresource1() on constant |
| // buffer. |
| size_t alignedOffset; |
| if (offset < kConstantBufferUpdateAlignment - 1) { |
| alignedOffset = 0; |
| } else { |
| DAWN_ASSERT(firstTimeUpdate); |
| // For offset we align to lower value (<= offset). |
| alignedOffset = Align(offset - (kConstantBufferUpdateAlignment - 1), |
| kConstantBufferUpdateAlignment); |
| } |
| size_t alignedEnd; |
| if (requiresFullAllocatedSizeWrite) { |
| alignedEnd = GetAllocatedSize(); |
| } else { |
| alignedEnd = Align(offset + size, kConstantBufferUpdateAlignment); |
| } |
| size_t alignedSize = alignedEnd - alignedOffset; |
| |
| DAWN_ASSERT((alignedSize % kConstantBufferUpdateAlignment) == 0); |
| DAWN_ASSERT(alignedSize <= GetAllocatedSize()); |
| DAWN_ASSERT(offset >= alignedOffset); |
| |
| // Extra bytes on the left of offset we could write to. This is only valid if |
| // firstTimeUpdate = true. |
| size_t leftExtraBytes = offset - alignedOffset; |
| DAWN_ASSERT(leftExtraBytes == 0 || firstTimeUpdate); |
| |
| // The layout of the buffer is like this: |
| // |..........................| leftExtraBytes | data | ............... | |
| // |<----------------- offset ---------------->|<-- size -->| |
| // |<----- alignedOffset ---->|<--------- alignedSize --------->| |
| std::unique_ptr<uint8_t[]> alignedBuffer; |
| if (size != alignedSize) { |
| alignedBuffer.reset(new uint8_t[alignedSize]); |
| std::memcpy(alignedBuffer.get() + leftExtraBytes, data, size); |
| data = alignedBuffer.get(); |
| } |
| |
| D3D11_BOX dstBox; |
| dstBox.left = static_cast<UINT>(alignedOffset); |
| dstBox.top = 0; |
| dstBox.front = 0; |
| dstBox.right = static_cast<UINT>(alignedOffset + alignedSize); |
| dstBox.bottom = 1; |
| dstBox.back = 1; |
| // For full buffer write, D3D11_COPY_DISCARD is used to avoid GPU CPU synchronization. |
| commandContext->UpdateSubresource1(d3d11Buffer, /*DstSubresource=*/0, |
| requiresFullAllocatedSizeWrite ? nullptr : &dstBox, data, |
| /*SrcRowPitch=*/0, |
| /*SrcDepthPitch=*/0, |
| /*CopyFlags=*/D3D11_COPY_DISCARD); |
| return {}; |
| } |
| |
| // If copy offset and size are not 16 bytes aligned, we have to create a staging buffer for |
| // transfer the data to constant buffer. |
| Ref<BufferBase> stagingBuffer; |
| DAWN_TRY_ASSIGN(stagingBuffer, ToBackend(GetDevice())->GetStagingBuffer(commandContext, size)); |
| { |
| auto scopedUseStaging = stagingBuffer->UseInternal(); |
| DAWN_TRY(ToBackend(stagingBuffer) |
| ->WriteInternal(commandContext, 0, data, size, |
| /*isInitialWrite=*/true)); |
| DAWN_TRY(ToBackend(stagingBuffer.Get()) |
| ->CopyToInternal(commandContext, |
| /*sourceOffset=*/0, |
| /*size=*/size, this, offset)); |
| } |
| ToBackend(GetDevice())->ReturnStagingBuffer(std::move(stagingBuffer)); |
| |
| return {}; |
| } |
| |
| MaybeError GPUUsableBuffer::WriteInternal(const ScopedCommandRecordingContext* commandContext, |
| uint64_t offset, |
| const void* data, |
| size_t size, |
| bool isInitialWrite) { |
| if (size == 0) { |
| return {}; |
| } |
| |
| // Map the buffer if it is possible, so WriteInternal() can write to the mapped memory |
| // directly. |
| // TODO(crbug.com/345471009): Consider mapping the buffer for non-clearing writes when |
| // it's not in use by the GPU. This would avoid allocating additional GPU storage. |
| // However, checking GetLastUsageSerial() is unreliable here because Queue::Submit() |
| // may have already updated it before entering this function. In practice, this is |
| // uncommon for mappable buffers since users typically update them via MapAsync when |
| // they know the buffer is idle. |
| const bool alreadyMappedForWrite = mMappedData && mD3DMapTypeUsed != D3D11_MAP_READ; |
| if ((IsCPUWritable() && isInitialWrite) || alreadyMappedForWrite) { |
| // If buffer is already mapped, creating ScopedMap is a no-op. |
| ScopedMap scopedMap; |
| DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write)); |
| |
| DAWN_ASSERT(scopedMap.GetMappedData()); |
| memcpy(scopedMap.GetMappedData() + offset, data, size); |
| |
| return {}; |
| } |
| |
| // Mark the buffer as used in pending commands if the mapping path above wasn't taken. |
| // Mapped writes complete synchronously and don't require tracking. |
| DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial())); |
| |
| // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write |
| // as well. |
| bool fullSizeWrite = size >= GetSize() && offset == 0; |
| |
| // Mapping buffer at this point would stall the CPU. We will create a GPU copyable |
| // storage and use UpdateSubresource on it below instead. Note if we have both const buffer & |
| // non-const buffer, we favor writing to non-const buffer, because it has no alignment |
| // requirement. |
| Storage* gpuCopyableStorage = mStorages[StorageType::GPUWritableNonConstantBuffer].Get(); |
| if (!gpuCopyableStorage) { |
| DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage()); |
| } |
| |
| if (fullSizeWrite) { |
| // If this is a full overwrite, no need to copy the old content. |
| // Just need to copy the revision number. |
| DAWN_ASSERT(mLastUpdatedStorage); |
| gpuCopyableStorage->SetRevision(mLastUpdatedStorage->GetRevision()); |
| } else { |
| DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage)); |
| } |
| |
| const bool firstTimeUpdate = gpuCopyableStorage->IsFirstRevision(); |
| |
| // We are going to write to the storage in all code paths, update the revision already. |
| IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage); |
| |
| if (gpuCopyableStorage->IsConstantBuffer()) { |
| return UpdateD3D11ConstantBuffer(commandContext, gpuCopyableStorage->GetD3D11Buffer(), |
| firstTimeUpdate, offset, data, size); |
| } |
| |
| D3D11_BOX box; |
| box.left = static_cast<UINT>(offset); |
| box.top = 0; |
| box.front = 0; |
| box.right = static_cast<UINT>(offset + size); |
| box.bottom = 1; |
| box.back = 1; |
| commandContext->UpdateSubresource1(gpuCopyableStorage->GetD3D11Buffer(), |
| /*DstSubresource=*/0, |
| /*pDstBox=*/&box, data, |
| /*SrcRowPitch=*/0, |
| /*SrcDepthPitch=*/0, |
| /*CopyFlags=*/0); |
| |
| // No need to update constant buffer at this point, when command buffer wants to bind |
| // the constant buffer in a render/compute pass, it will call GetD3D11ConstantBuffer() |
| // and the constant buffer will be sync-ed there. WriteBuffer() cannot be called inside |
| // render/compute pass so no need to sync here. |
| return {}; |
| } |
| |
| MaybeError GPUUsableBuffer::CopyToInternal(const ScopedCommandRecordingContext* commandContext, |
| uint64_t sourceOffset, |
| size_t size, |
| Buffer* destination, |
| uint64_t destinationOffset) { |
| DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial())); |
| |
| ID3D11Buffer* d3d11SourceBuffer = mLastUpdatedStorage->GetD3D11Buffer(); |
| |
| return destination->CopyFromD3DInternal(commandContext, d3d11SourceBuffer, sourceOffset, size, |
| destinationOffset); |
| } |
| |
| MaybeError GPUUsableBuffer::CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext, |
| ID3D11Buffer* d3d11SourceBuffer, |
| uint64_t sourceOffset, |
| size_t size, |
| uint64_t destinationOffset) { |
| DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial())); |
| |
| D3D11_BOX srcBox; |
| srcBox.left = static_cast<UINT>(sourceOffset); |
| srcBox.top = 0; |
| srcBox.front = 0; |
| srcBox.right = static_cast<UINT>(sourceOffset + size); |
| srcBox.bottom = 1; |
| srcBox.back = 1; |
| |
| Storage* gpuCopyableStorage; |
| if (mLastUpdatedStorage->SupportsCopyDst()) { |
| gpuCopyableStorage = mLastUpdatedStorage; |
| } else { |
| DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage()); |
| DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage)); |
| } |
| |
| commandContext->CopySubresourceRegion( |
| gpuCopyableStorage->GetD3D11Buffer(), /*DstSubresource=*/0, |
| /*DstX=*/destinationOffset, |
| /*DstY=*/0, |
| /*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0, &srcBox); |
| |
| IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage); |
| |
| return {}; |
| } |
| |
| MaybeError GPUUsableBuffer::PredicatedClear( |
| const ScopedSwapStateCommandRecordingContext* commandContext, |
| ID3D11Predicate* predicate, |
| uint8_t clearValue, |
| uint64_t offset, |
| uint64_t size) { |
| DAWN_ASSERT(size != 0); |
| |
| // Don't use mapping, mapping is not affected by ID3D11Predicate. |
| // Allocate GPU writable storage and sync it. Note: we don't SetPredication() yet otherwise |
| // it would affect the syncing. |
| Storage* gpuWritableStorage; |
| DAWN_TRY_ASSIGN(gpuWritableStorage, |
| GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer)); |
| DAWN_TRY(SyncStorage(commandContext, gpuWritableStorage)); |
| |
| // SetPredication() and clear the storage with UpdateSubresource1(). |
| D3D11_BOX box; |
| box.left = static_cast<UINT>(offset); |
| box.top = 0; |
| box.front = 0; |
| box.right = static_cast<UINT>(offset + size); |
| box.bottom = 1; |
| box.back = 1; |
| |
| // TODO(350493305): Change function signature to accept a single uint64_t value. |
| // So that we don't need to allocate a vector here. |
| absl::InlinedVector<uint8_t, sizeof(uint64_t)> clearData(size, clearValue); |
| |
| // The update will *NOT* be performed if the predicate's data is false. |
| commandContext->GetD3D11DeviceContext3()->SetPredication(predicate, false); |
| commandContext->UpdateSubresource1(gpuWritableStorage->GetD3D11Buffer(), |
| /*DstSubresource=*/0, |
| /*pDstBox=*/&box, clearData.data(), |
| /*SrcRowPitch=*/0, |
| /*SrcDepthPitch=*/0, |
| /*CopyFlags=*/0); |
| commandContext->GetD3D11DeviceContext3()->SetPredication(nullptr, false); |
| |
| IncrStorageRevAndMakeLatest(commandContext, gpuWritableStorage); |
| |
| return {}; |
| } |
| |
| ComPtr<ID3D11Buffer> GPUUsableBuffer::GetD3D11MappedBuffer() { |
| return mMappedData ? mMappableStorage->GetD3D11Buffer() : nullptr; |
| } |
| |
| } // namespace dawn::native::d3d11 |