blob: 69c7bf9faa44f86876901cc40ab1b826d490c50d [file] [log] [blame]
// Copyright 2023 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dawn/native/d3d11/BufferD3D11.h"
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "dawn/common/Alloc.h"
#include "dawn/common/Assert.h"
#include "dawn/common/Constants.h"
#include "dawn/common/Math.h"
#include "dawn/native/ChainUtils.h"
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/DynamicUploader.h"
#include "dawn/native/d3d/D3DError.h"
#include "dawn/native/d3d11/DeviceD3D11.h"
#include "dawn/native/d3d11/PhysicalDeviceD3D11.h"
#include "dawn/native/d3d11/QueueD3D11.h"
#include "dawn/native/d3d11/UtilsD3D11.h"
#include "dawn/platform/DawnPlatform.h"
#include "dawn/platform/tracing/TraceEvent.h"
namespace dawn::native::d3d11 {
class ScopedCommandRecordingContext;
namespace {
// Max size for a CPU buffer.
constexpr uint64_t kMaxCPUUploadBufferSize = 64 * 1024;
constexpr wgpu::BufferUsage kCopyUsages =
wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst | kInternalCopySrcBuffer;
constexpr wgpu::BufferUsage kStagingUsages = kMappableBufferUsages | kCopyUsages;
constexpr wgpu::BufferUsage kD3D11GPUWriteUsages =
wgpu::BufferUsage::Storage | kInternalStorageBuffer | wgpu::BufferUsage::Indirect;
// Resource usage Default Dynamic Immutable Staging
// ------------------------------------------------------------
// GPU-read Yes Yes Yes Yes[1]
// GPU-write Yes No No Yes[1]
// CPU-read No No No Yes[1]
// CPU-write No Yes No Yes[1]
// ------------------------------------------------------------
// [1] GPU read or write of a resource with the D3D11_USAGE_STAGING usage is restricted to copy
// operations. You use ID3D11DeviceContext::CopySubresourceRegion and
// ID3D11DeviceContext::CopyResource for these copy operations.
bool IsMappable(wgpu::BufferUsage usage) {
return usage & kMappableBufferUsages;
}
bool IsUpload(wgpu::BufferUsage usage) {
return usage & wgpu::BufferUsage::MapWrite &&
IsSubset(usage, kInternalCopySrcBuffer | wgpu::BufferUsage::CopySrc |
wgpu::BufferUsage::MapWrite);
}
bool IsStaging(wgpu::BufferUsage usage) {
// Must have at least MapWrite or MapRead bit
return IsMappable(usage) && IsSubset(usage, kStagingUsages);
}
UINT D3D11BufferBindFlags(wgpu::BufferUsage usage) {
UINT bindFlags = 0;
if (usage & (wgpu::BufferUsage::Vertex)) {
bindFlags |= D3D11_BIND_VERTEX_BUFFER;
}
if (usage & wgpu::BufferUsage::Index) {
bindFlags |= D3D11_BIND_INDEX_BUFFER;
}
if (usage & (wgpu::BufferUsage::Uniform)) {
bindFlags |= D3D11_BIND_CONSTANT_BUFFER;
}
if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) {
DAWN_ASSERT(!IsMappable(usage));
bindFlags |= D3D11_BIND_UNORDERED_ACCESS;
}
if (usage & kReadOnlyStorageBuffer) {
bindFlags |= D3D11_BIND_SHADER_RESOURCE;
}
// If the buffer only has CopySrc and CopyDst usages are used as staging buffers for copy.
// Because D3D11 doesn't allow copying between buffer and texture, we will use compute shader
// to copy data between buffer and texture. So the buffer needs to be bound as unordered access
// view.
if (IsSubset(usage, kCopyUsages)) {
bindFlags |= D3D11_BIND_UNORDERED_ACCESS;
}
return bindFlags;
}
UINT D3D11BufferMiscFlags(wgpu::BufferUsage usage) {
UINT miscFlags = 0;
if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) {
miscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
}
if (usage & wgpu::BufferUsage::Indirect) {
miscFlags |= D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS;
}
return miscFlags;
}
size_t D3D11BufferSizeAlignment(wgpu::BufferUsage usage) {
if (usage & wgpu::BufferUsage::Uniform) {
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1
// Each number of constants must be a multiple of 16 shader constants(sizeof(float) * 4 *
// 16).
return sizeof(float) * 4 * 16;
}
if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer |
wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc)) {
// Unordered access buffers must be 4-byte aligned.
// We also align 4 bytes for CopyDst buffer since it would be used in T2B compute shader.
// And that shader needs to write 4-byte chunks.
// Similarly, we need to align 4 bytes for CopySrc buffer since it would be used in B2T
// shader that reads 4 byte chunks.
return sizeof(uint32_t);
}
return 1;
}
bool CanUseCPUUploadBuffer(const Device* device, wgpu::BufferUsage usage, size_t bufferSize) {
return IsUpload(usage) && bufferSize <= kMaxCPUUploadBufferSize &&
!device->IsToggleEnabled(Toggle::D3D11DisableCPUUploadBuffers);
}
constexpr size_t kConstantBufferUpdateAlignment = 16;
wgpu::MapMode GetAutoMapMode(DeviceBase* device, wgpu::BufferUsage usage) {
if (!device->IsToggleEnabled(Toggle::AutoMapBackendBuffer) || !IsMappable(usage)) {
return wgpu::MapMode::None;
}
wgpu::MapMode mode = wgpu::MapMode::None;
if (usage & wgpu::BufferUsage::MapWrite) {
mode |= wgpu::MapMode::Write;
}
if (usage & wgpu::BufferUsage::MapRead) {
mode |= wgpu::MapMode::Read;
}
return mode;
}
} // namespace
// For CPU-to-GPU upload buffers(CopySrc|MapWrite), they can be emulated in the system memory, and
// then written into the dest GPU buffer via ID3D11DeviceContext::UpdateSubresource.
class UploadBuffer final : public Buffer {
public:
UploadBuffer(DeviceBase* device, const UnpackedPtr<BufferDescriptor>& descriptor)
: Buffer(device,
descriptor,
/*internalMappableFlags=*/kMappableBufferUsages,
/*autoMapMode=*/wgpu::MapMode::None) {}
~UploadBuffer() override = default;
private:
// BufferBase implementations
MaybeError MapAtCreationImpl() override {
mMappedData = mUploadData.get();
// MapAtCreation does the zeroization on the front-end side.
return {};
}
MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override {
mMappedData = mUploadData.get();
return EnsureDataInitialized(nullptr);
}
void UnmapImpl(BufferState oldState, BufferState newState) override { mMappedData = nullptr; }
// d3d11::Buffer implementations
MaybeError InitializeInternal() override {
mUploadData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize()));
if (mUploadData == nullptr) {
return DAWN_OUT_OF_MEMORY_ERROR("Failed to allocate memory for buffer uploading.");
}
return {};
}
MaybeError MapInternal(const ScopedCommandRecordingContext*, wgpu::MapMode) override {
mMappedData = mUploadData.get();
return {};
}
void UnmapInternal(const ScopedCommandRecordingContext*) override { mMappedData = nullptr; }
MaybeError ClearInternal(const ScopedCommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset,
uint64_t size) override {
memset(mUploadData.get() + offset, clearValue, size);
return {};
}
MaybeError CopyToInternal(const ScopedCommandRecordingContext* commandContext,
uint64_t sourceOffset,
size_t size,
Buffer* destination,
uint64_t destinationOffset) override {
return destination->WriteInternal(commandContext, destinationOffset,
mUploadData.get() + sourceOffset, size,
/*isInitialWrite=*/false);
}
MaybeError CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
ID3D11Buffer* srcD3D11Buffer,
uint64_t sourceOffset,
size_t size,
uint64_t destinationOffset) override {
// Upload buffers shouldn't be copied to.
DAWN_UNREACHABLE();
return {};
}
MaybeError WriteInternal(const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
const void* data,
size_t size,
bool isInitialWrite) override {
const auto* src = static_cast<const uint8_t*>(data);
std::copy(src, src + size, mUploadData.get() + offset);
return {};
}
std::unique_ptr<uint8_t[]> mUploadData;
};
bool CanAddStorageUsageToBufferWithoutSideEffects(const Device* device,
wgpu::BufferUsage storageUsage,
wgpu::BufferUsage originalUsage,
size_t bufferSize) {
// Don't support uniform buffers being used as storage buffer. Because D3D11 constant buffers
// cannot be bound to SRV or UAV. Allowing them to be used as storage buffer would require some
// workarounds including extra copies so it's better we prefer to not do that.
if (originalUsage & wgpu::BufferUsage::Uniform) {
return false;
}
// If buffer is small, we prefer CPU buffer for uploading so don't allow adding storage usage.
if (CanUseCPUUploadBuffer(device, originalUsage, bufferSize)) {
return false;
}
const bool requiresUAV = storageUsage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer);
// Check supports for writeable storage usage:
if (requiresUAV) {
// D3D11 mappable buffers cannot be used as UAV natively. So avoid that.
return !(originalUsage & kMappableBufferUsages);
}
// Read-only storage buffer cannot be mapped for read natively. Avoid that.
DAWN_ASSERT(storageUsage == kReadOnlyStorageBuffer);
return !(originalUsage & wgpu::BufferUsage::MapRead);
}
// static
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device,
const UnpackedPtr<BufferDescriptor>& descriptor,
const ScopedCommandRecordingContext* commandContext,
bool allowUploadBufferEmulation) {
const auto actualUsage =
ComputeInternalBufferUsages(device, descriptor->usage, descriptor->size);
bool useUploadBuffer = allowUploadBufferEmulation;
useUploadBuffer &= CanUseCPUUploadBuffer(device, actualUsage, descriptor->size);
Ref<Buffer> buffer;
if (useUploadBuffer) {
buffer = AcquireRef(new UploadBuffer(device, descriptor));
} else {
buffer = AcquireRef(new GPUUsableBuffer(device, descriptor));
}
DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation, commandContext));
return buffer;
}
Buffer::Buffer(DeviceBase* device,
const UnpackedPtr<BufferDescriptor>& descriptor,
wgpu::BufferUsage internalMappableFlags,
wgpu::MapMode autoMapMode)
: BufferBase(device, descriptor),
mInternalMappableFlags(internalMappableFlags),
mAutoMapMode(autoMapMode) {}
MaybeError Buffer::Initialize(bool mappedAtCreation,
const ScopedCommandRecordingContext* commandContext) {
// TODO(dawn:1705): handle mappedAtCreation for NonzeroClearResourcesOnCreationForTesting
// Allocate at least 4 bytes so clamped accesses are always in bounds.
uint64_t size = std::max(GetSize(), uint64_t(4u));
// The validation layer requires:
// ByteWidth must be 12 or larger to be used with D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS.
if (GetInternalUsage() & wgpu::BufferUsage::Indirect) {
size = std::max(size, uint64_t(12u));
}
size_t alignment = D3D11BufferSizeAlignment(GetInternalUsage());
// Check for overflow, bufferDescriptor.ByteWidth is a UINT.
if (size > std::numeric_limits<UINT>::max() - alignment) {
// Alignment would overlow.
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
mAllocatedSize = Align(size, alignment);
DAWN_TRY(InitializeInternal());
SetLabelImpl();
// The buffers with mappedAtCreation == true will be initialized in
// BufferBase::MapAtCreation().
if (!mappedAtCreation &&
GetDevice()->IsToggleEnabled(Toggle::NonzeroClearResourcesOnCreationForTesting)) {
auto scopedUseDuringCreation = UseInternal();
if (commandContext) {
DAWN_TRY(ClearWholeBuffer(commandContext, 1u));
} else {
auto tmpCommandContext =
ToBackend(GetDevice()->GetQueue())
->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
DAWN_TRY(ClearWholeBuffer(&tmpCommandContext, 1u));
}
}
// Mark padding as cleared if there's no padding.
if (GetAllocatedSize() == GetSize()) {
mPaddingCleared = true;
}
return {};
}
Buffer::~Buffer() = default;
bool Buffer::IsCPUWritableAtCreation() const {
return IsCPUWritable();
}
bool Buffer::IsCPUWritable() const {
return mInternalMappableFlags & wgpu::BufferUsage::MapWrite;
}
bool Buffer::IsCPUReadable() const {
return mInternalMappableFlags & wgpu::BufferUsage::MapRead;
}
MaybeError Buffer::MapAtCreationImpl() {
DAWN_ASSERT(IsCPUWritableAtCreation());
// Use Try variant to avoid blocking if the CommandContext lock is already held (e.g., by
// another thread or during Queue::Submit). MapAtCreation must return immediately with a
// mappable pointer, so if the lock isn't available, we fall back to temporary storage and
// defer the actual D3D11 buffer mapping until UnmapIfNeeded. At that point, the
// CommandContext will already be acquired (e.g., during TrackUsage before GPU submission),
// and we can safely map the real D3D11 buffer and transfer the temporary storage contents
// to it via memcpy.
std::optional<ScopedCommandRecordingContext> maybeCommandContext =
ToBackend(GetDevice()->GetQueue())
->TryGetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
if (maybeCommandContext.has_value()) {
return MapInternal(&maybeCommandContext.value(), mAutoMapMode | wgpu::MapMode::Write);
}
// Lock could not be acquired, use temporary storage instead
mMapAtCreationData = std::unique_ptr<uint8_t[]>(AllocNoThrow<uint8_t>(GetAllocatedSize()));
mMappedData = mMapAtCreationData.get();
return {};
}
MaybeError Buffer::MapInternal(const ScopedCommandRecordingContext* commandContext,
wgpu::MapMode mode) {
DAWN_UNREACHABLE();
return {};
}
void Buffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) {
DAWN_UNREACHABLE();
}
MaybeError Buffer::UnmapIfNeeded(const ScopedCommandRecordingContext* commandContext) {
if (mMappedData == nullptr) {
return {};
}
if (mMapAtCreationData) {
// We used temporary storage for MapAtCreation, now copy it to the actual buffer
mMappedData = nullptr;
ScopedMap scopedMap;
DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write));
DAWN_ASSERT(scopedMap.GetMappedData());
memcpy(scopedMap.GetMappedData(), mMapAtCreationData.get(), GetAllocatedSize());
mMapAtCreationData.reset();
return {};
}
UnmapInternal(commandContext);
return {};
}
MaybeError Buffer::TrackUsage(const ScopedCommandRecordingContext* commandContext,
ExecutionSerial pendingSerial) {
if (GetLastUsageSerial() == pendingSerial) {
return {};
}
// We need to unmap buffer before it can be used in the queue.
DAWN_TRY(UnmapIfNeeded(commandContext));
MarkUsedInPendingCommands(pendingSerial);
// If automatic mapping is enabled, schedule the buffer to be re-mapped after GPU usage.
if (mAutoMapMode != wgpu::MapMode::None) {
mMapRequest.mode = mAutoMapMode;
ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, pendingSerial);
}
return {};
}
MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
DAWN_ASSERT((mode == wgpu::MapMode::Write && IsCPUWritable()) ||
(mode == wgpu::MapMode::Read && IsCPUReadable()));
// With automatic mapping, the buffer will be actually mapped/unmapped at queue
// boundaries, so MapAsync is a no-op.
if (mAutoMapMode != wgpu::MapMode::None) {
// Lazily do the 1st map if the buffer is not used in any queue yet.
if (GetLastUsageSerial() == kBeginningOfGPUTime && !mMappedData) {
DAWN_TRY(MapAtCreationImpl());
}
return {};
}
auto deviceGuard = GetDevice()->GetGuard();
const ExecutionSerial lastUsageSerial = GetLastUsageSerial();
const ExecutionSerial completedSerial = GetDevice()->GetQueue()->GetCompletedCommandSerial();
// We may run into map stall in case that the buffer is still being used by previous submitted
// commands. To avoid that, instead we ask Queue to do the map later when last usage serial has
// passed.
if (lastUsageSerial > completedSerial) {
mMapRequest.mode = mode;
ToBackend(GetDevice()->GetQueue())->ScheduleBufferMapping(&mMapRequest, lastUsageSerial);
} else {
auto commandContext = ToBackend(GetDevice()->GetQueue())
->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
DAWN_TRY(TryMapNow(&commandContext, completedSerial, mode));
}
return {};
}
// The difference between FinalizeMapImpl and TryMapNow is that:
// - FinalizeMapImpl() is triggered by front-end's serial control.
// - It's called after Queue::CheckAndUpdateCompletedSerials() and before user's mapping callback.
// - FinalizeMapImpl() will always be called for each MapAsync regardless of whether automatic
// mapping is enabled or not.
// - TryMapNow() is triggered by Queue::CheckAndUpdateCompletedSerials().
// - If automatic mapping is disabled, it's scheduled for each MapAsync.
// - else, it's scheduled once after the queue finishes using the buffer.
MaybeError Buffer::TryMapNow(ScopedCommandRecordingContext* commandContext,
ExecutionSerial completedSerial,
wgpu::MapMode mode) {
// If the buffer was used again after the remap was scheduled, skip this remap.
// This should only happen with automatic mapping where the buffer is scheduled to be
// re-mapped, but then used again in a subsequent submit before the original serial completed.
if (completedSerial < GetLastUsageSerial()) {
DAWN_ASSERT(mAutoMapMode != wgpu::MapMode::None);
return {};
}
DAWN_ASSERT(GetDevice()->IsLockedByCurrentThreadIfNeeded());
// Trigger any deferred unmaps.
// TODO(crbug.com/345471009): Consider reuse the mapped pointer and skip mapping again if
// the previous map mode is the same as the current map mode.
DAWN_TRY(UnmapIfNeeded(commandContext));
// Map then initialize data using mapped pointer.
// The mapped pointer is always writable because:
// - If mode is Write, then it's already writable.
// - If mode is Read, it's only possible to map staging buffer. In that case,
// D3D11_MAP_READ_WRITE will be used, hence the mapped pointer will also be writable.
// TODO(dawn:1705): make sure the map call is not blocked by the GPU operations.
DAWN_TRY(MapInternal(commandContext, mode));
return {};
}
MaybeError Buffer::FinalizeMapImpl(BufferState newState) {
if (newState == BufferState::MappedAtCreation) {
return {};
}
DAWN_ASSERT(mMappedData);
// Ensure data is initialized after a MapAsync event completes.
DAWN_TRY(EnsureDataInitialized(nullptr));
return {};
}
void Buffer::UnmapImpl(BufferState oldState, BufferState newState) {
DAWN_ASSERT(IsMappable(GetInternalUsage()));
// With automatic mapping, the buffer stays mapped, so Unmap is a no-op.
if (mAutoMapMode != wgpu::MapMode::None) {
return;
}
// Cancel any pending scheduled map. Note we don't cancel here if newState is Destroyed, since
// it should be handled in DestroyImpl instead. DestroyImpl knows whether the reason is early
// destroy or dtor, and can decide to call CancelScheduledBufferMapping accordingly.
if (newState != BufferState::Destroyed) {
ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this);
}
// The actual unmap will be deferred until the buffer is used by the queue or we need to map
// again. This avoids the need to lock the CommandContext here just to call D3D11's Unmap
// function, and instead defers the call to a moment where the CommandContext is already
// acquired.
}
void* Buffer::GetMappedPointerImpl() {
// The frontend asks that the pointer returned is from the start of the resource
// irrespective of the offset passed in MapAsyncImpl, which is what mMappedData is.
return mMappedData;
}
void Buffer::DestroyImpl(DestroyReason reason) {
// TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
// - It may be called if the buffer is explicitly destroyed with APIDestroy.
// This case is NOT thread-safe and needs proper synchronization with other
// simultaneous uses of the buffer.
// - It may be called when the last ref to the buffer is dropped and the buffer
// is implicitly destroyed. This case is thread-safe because there are no
// other threads using the buffer since there are no other live refs.
BufferBase::DestroyImpl(reason);
// Cancel any pending map schedule. Even though front-end guarantees that Destroy() cannot run
// in parallel with Queue operations, it doesn't do the same for Device::Tick(),
// Instance::ProcessEvents() or WaitAny(). Thus a scheduled map triggered by those functions
// would race with Destroy() if we don't do a cancel here.
if (reason != DestroyReason::CppDestructor && IsMappable(GetInternalUsage())) {
ToBackend(GetDevice()->GetQueue())->CancelScheduledBufferMapping(this);
}
// If buffer is still mapped, we need to unmap it before releasing the D3D11 resource. If we
// don't do that, there might be some issues on certain drivers such as Intel's.
// Note: The front-end guarantees that DestroyImpl cannot run concurrently with MapAsync,
// UnmapImpl, or Queue operations, so accessing mMappedData here is safe. Additionally, since
// no Queue operation can use this buffer anymore, it won't be scheduled for a remap after a
// cancel above.
if (mMappedData != nullptr && !mMapAtCreationData) {
// We don't need to unmap if the mapping was done on a shadow copy because no real
// buffer is mapped yet.
ToBackend(GetDevice())->DeferUnmapDestroyedBuffer(GetD3D11MappedBuffer());
mMappedData = nullptr;
}
}
std::optional<DeviceGuard> Buffer::UseDeviceGuardForDestroy() {
return std::nullopt;
}
MaybeError Buffer::EnsureDataInitialized(const ScopedCommandRecordingContext* commandContext) {
// Clear padding on first use, regardless of initialization state.
DAWN_TRY(EnsurePaddingInitialized(commandContext));
if (!NeedsInitialization()) {
return {};
}
DAWN_TRY(InitializeToZero(commandContext));
return {};
}
MaybeError Buffer::EnsureDataInitializedAsDestination(
const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
uint64_t size) {
// Clear padding on first use as destination, regardless of initialization state.
DAWN_TRY(EnsurePaddingInitialized(commandContext));
if (!NeedsInitialization()) {
return {};
}
if (IsFullBufferRange(offset, size)) {
SetInitialized(true);
return {};
}
DAWN_TRY(InitializeToZero(commandContext));
return {};
}
MaybeError Buffer::EnsureDataInitializedAsDestination(
const ScopedCommandRecordingContext* commandContext,
const CopyTextureToBufferCmd* copy) {
// Clear padding on first use as destination, regardless of initialization state.
DAWN_TRY(EnsurePaddingInitialized(commandContext));
if (!NeedsInitialization()) {
return {};
}
if (IsFullBufferOverwrittenInTextureToBufferCopy(copy)) {
SetInitialized(true);
} else {
DAWN_TRY(InitializeToZero(commandContext));
}
return {};
}
MaybeError Buffer::InitializeToZero(const ScopedCommandRecordingContext* commandContext) {
DAWN_ASSERT(NeedsInitialization());
DAWN_TRY(ClearWholeBuffer(commandContext, uint8_t(0u)));
SetInitialized(true);
GetDevice()->IncrementLazyClearCountForTesting();
return {};
}
MaybeError Buffer::PredicatedClear(const ScopedSwapStateCommandRecordingContext* commandContext,
ID3D11Predicate* predicate,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
DAWN_UNREACHABLE();
return {};
}
MaybeError Buffer::Clear(const ScopedCommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
DAWN_ASSERT(!mMappedData);
if (size == 0) {
return {};
}
// For non-staging buffers, we can use UpdateSubresource to write the data.
DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size));
return ClearInternal(commandContext, clearValue, offset, size);
}
MaybeError Buffer::ClearWholeBuffer(const ScopedCommandRecordingContext* commandContext,
uint8_t clearValue) {
return ClearInternal(commandContext, clearValue, 0, GetAllocatedSize());
}
MaybeError Buffer::ClearInternal(const ScopedCommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
DAWN_ASSERT(size != 0);
// TODO(dawn:1705): use a reusable zero staging buffer to clear the buffer to avoid this CPU to
// GPU copy.
std::vector<uint8_t> clearData(size, clearValue);
return WriteInternal(commandContext, offset, clearData.data(), size,
/*isInitialWrite=*/true);
}
MaybeError Buffer::EnsurePaddingInitialized(const ScopedCommandRecordingContext* commandContext) {
if (mPaddingCleared) [[likely]] {
return {};
}
DAWN_TRY(ClearPaddingInternal(commandContext));
mPaddingCleared = true;
return {};
}
MaybeError Buffer::ClearPaddingInternal(const ScopedCommandRecordingContext* commandContext) {
uint32_t paddingBytes = GetAllocatedSize() - GetSize();
if (paddingBytes == 0) {
return {};
}
uint32_t clearSize = paddingBytes;
uint64_t clearOffset = GetSize();
DAWN_TRY(ClearInternal(commandContext, 0, clearOffset, clearSize));
return {};
}
ComPtr<ID3D11Buffer> Buffer::GetD3D11MappedBuffer() {
return nullptr;
}
MaybeError Buffer::Write(const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
const void* data,
size_t size) {
DAWN_ASSERT(size != 0);
// For non-staging buffers, we can use UpdateSubresource to write the data.
DAWN_TRY(EnsureDataInitializedAsDestination(commandContext, offset, size));
return WriteInternal(commandContext, offset, data, size, /*isInitialWrite=*/false);
}
// static
MaybeError Buffer::Copy(const ScopedCommandRecordingContext* commandContext,
Buffer* source,
uint64_t sourceOffset,
size_t size,
Buffer* destination,
uint64_t destinationOffset) {
DAWN_ASSERT(size != 0);
DAWN_TRY(source->EnsureDataInitialized(commandContext));
DAWN_TRY(
destination->EnsureDataInitializedAsDestination(commandContext, destinationOffset, size));
return source->CopyToInternal(commandContext, sourceOffset, size, destination,
destinationOffset);
}
ResultOrError<Buffer::ScopedMap> Buffer::ScopedMap::Create(
const ScopedCommandRecordingContext* commandContext,
Buffer* buffer,
wgpu::MapMode mode) {
if (mode == wgpu::MapMode::Write && !buffer->IsCPUWritable()) {
return ScopedMap();
}
if (mode == wgpu::MapMode::Read && !buffer->IsCPUReadable()) {
return ScopedMap();
}
if (buffer->mMappedData) {
return ScopedMap(commandContext, buffer, /*needsUnmap=*/false);
}
DAWN_TRY(buffer->MapInternal(commandContext, mode));
return ScopedMap(commandContext, buffer, /*needsUnmap=*/true);
}
// ScopedMap
Buffer::ScopedMap::ScopedMap() = default;
Buffer::ScopedMap::ScopedMap(const ScopedCommandRecordingContext* commandContext,
Buffer* buffer,
bool needsUnmap)
: mCommandContext(commandContext), mBuffer(buffer), mNeedsUnmap(needsUnmap) {}
Buffer::ScopedMap::~ScopedMap() {
Reset();
}
Buffer::ScopedMap::ScopedMap(Buffer::ScopedMap&& other) {
this->operator=(std::move(other));
}
Buffer::ScopedMap& Buffer::ScopedMap::operator=(Buffer::ScopedMap&& other) {
Reset();
mCommandContext = other.mCommandContext;
mBuffer = other.mBuffer;
mNeedsUnmap = other.mNeedsUnmap;
other.mBuffer = nullptr;
other.mNeedsUnmap = false;
return *this;
}
void Buffer::ScopedMap::Reset() {
if (mNeedsUnmap) {
mBuffer->UnmapInternal(mCommandContext);
}
mCommandContext = nullptr;
mBuffer = nullptr;
mNeedsUnmap = false;
}
uint8_t* Buffer::ScopedMap::GetMappedData() const {
return mBuffer ? static_cast<uint8_t*>(mBuffer->mMappedData) : nullptr;
}
// GPUUsableBuffer::Storage
class GPUUsableBuffer::Storage : public RefCounted, NonCopyable {
public:
explicit Storage(ComPtr<ID3D11Buffer> d3d11Buffer) : mD3d11Buffer(std::move(d3d11Buffer)) {
D3D11_BUFFER_DESC desc;
mD3d11Buffer->GetDesc(&desc);
mD3d11Usage = desc.Usage;
mMappableCopyableFlags = wgpu::BufferUsage::CopySrc;
switch (mD3d11Usage) {
case D3D11_USAGE_STAGING:
mMappableCopyableFlags |= kMappableBufferUsages | wgpu::BufferUsage::CopyDst;
break;
case D3D11_USAGE_DYNAMIC:
mMappableCopyableFlags |= wgpu::BufferUsage::MapWrite;
break;
case D3D11_USAGE_DEFAULT:
mMappableCopyableFlags |= wgpu::BufferUsage::CopyDst;
break;
default:
break;
}
mIsConstantBuffer = desc.BindFlags & D3D11_BIND_CONSTANT_BUFFER;
}
ID3D11Buffer* GetD3D11Buffer() { return mD3d11Buffer.Get(); }
uint64_t GetRevision() const { return mRevision; }
void SetRevision(uint64_t revision) { mRevision = revision; }
bool IsFirstRevision() const { return mRevision == 0; }
bool IsConstantBuffer() const { return mIsConstantBuffer; }
bool IsCPUWritable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapWrite; }
bool IsCPUReadable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapRead; }
bool IsDynamic() const { return mD3d11Usage == D3D11_USAGE_DYNAMIC; }
bool SupportsCopyDst() const { return mMappableCopyableFlags & wgpu::BufferUsage::CopyDst; }
bool IsGPUWritable() const { return mD3d11Usage == D3D11_USAGE_DEFAULT; }
private:
ComPtr<ID3D11Buffer> mD3d11Buffer;
uint64_t mRevision = 0;
D3D11_USAGE mD3d11Usage;
bool mIsConstantBuffer = false;
wgpu::BufferUsage mMappableCopyableFlags;
};
// GPUUsableBuffer
GPUUsableBuffer::GPUUsableBuffer(DeviceBase* device,
const UnpackedPtr<BufferDescriptor>& descriptor)
: Buffer(
device,
descriptor,
/*internalMappableFlags=*/
[](const UnpackedPtr<BufferDescriptor>& descriptor) {
wgpu::BufferUsage mappableFlags = descriptor->usage & kMappableBufferUsages;
if (descriptor->usage & wgpu::BufferUsage::MapRead) {
// Staging buffer can be both mapped read & write.
mappableFlags |= wgpu::BufferUsage::MapWrite;
}
return mappableFlags;
}(descriptor),
/*autoMapMode=*/
GetAutoMapMode(device, descriptor->usage)) {}
GPUUsableBuffer::~GPUUsableBuffer() = default;
void GPUUsableBuffer::DestroyImpl(DestroyReason reason) {
// TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
// - It may be called if the buffer is explicitly destroyed with APIDestroy.
// This case is NOT thread-safe and needs proper synchronization with other
// simultaneous uses of the buffer.
// - It may be called when the last ref to the buffer is dropped and the buffer
// is implicitly destroyed. This case is thread-safe because there are no
// other threads using the buffer since there are no other live refs.
Buffer::DestroyImpl(reason);
mSRVCache.clear();
mUAVCache.clear();
mLastUpdatedStorage = nullptr;
mMappableStorage = nullptr;
mStorages = {};
}
void GPUUsableBuffer::SetLabelImpl() {
for (auto ite = mStorages.begin(); ite != mStorages.end(); ++ite) {
auto storageType = static_cast<StorageType>(std::distance(mStorages.begin(), ite));
SetStorageLabel(storageType);
}
}
void GPUUsableBuffer::SetStorageLabel(StorageType storageType) {
static constexpr ityp::array<GPUUsableBuffer::StorageType, const char*,
static_cast<uint8_t>(StorageType::Count)>
kStorageTypeStrings = {
"Dawn_CPUWritableConstantBuffer",
"Dawn_GPUCopyDstConstantBuffer",
"Dawn_CPUWritableNonConstantBuffer",
"Dawn_GPUWritableNonConstantBuffer",
"Dawn_Staging",
};
if (!mStorages[storageType]) {
return;
}
SetDebugName(ToBackend(GetDevice()), mStorages[storageType]->GetD3D11Buffer(),
kStorageTypeStrings[storageType], GetLabel());
}
MaybeError GPUUsableBuffer::InitializeInternal() {
mStorages = {};
wgpu::BufferUsage usagesToHandle = GetInternalUsage();
// If the buffer has MapRead usage, allocate a staging storage for both MapRead & MapWrite.
// Example: If user creates a buffer with MapWrite + MapRead + Vertex usages:
// - We will allocate:
// - A storage for both MapWrite + MapRead.
// - A storage for Vertex usage.
// - This will require a copy for vertex input whenever the user maps the buffer (either for
// writing or reading).
// - This is acceptable since a buffer with both MapWrite + MapRead is rare.
if ((usagesToHandle & wgpu::BufferUsage::MapRead) || IsStaging(usagesToHandle)) {
DAWN_TRY_ASSIGN(mLastUpdatedStorage, GetOrCreateStorage(StorageType::Staging));
mMappableStorage = mLastUpdatedStorage;
usagesToHandle &= ~kStagingUsages;
}
// We need to create a separate storage for uniform usage, because D3D11 doesn't allow constant
// buffer to be used for other purposes.
if (usagesToHandle & wgpu::BufferUsage::Uniform) {
usagesToHandle &=
~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc | kInternalCopySrcBuffer);
// Since D3D11 doesn't allow both CPU & GPU to write to a buffer, we need separate
// storages for CPU writes and GPU writes.
if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
// Note: we favor CPU write over GPU write if MapWrite is present. If buffer has GPU
// writable usages, the GPU writable storage will be lazily created later.
usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
DAWN_TRY_ASSIGN(mLastUpdatedStorage,
GetOrCreateStorage(StorageType::CPUWritableConstantBuffer));
mMappableStorage = mLastUpdatedStorage;
} else {
// For constant buffer, the only supported GPU op is copy. So create one storage for
// that.
usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
DAWN_TRY_ASSIGN(mLastUpdatedStorage,
GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
}
}
if (usagesToHandle == wgpu::BufferUsage::None) {
return {};
}
// Create separate storage for non-constant buffer usages if required.
if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
// Note: we only need one CPU writable storage. If there are both const buffer and
// non-const buffer usages, we favor CPU writable const buffer first. Since that's most
// likely the common use case where users want to update const buffer on CPU.
DAWN_ASSERT(mMappableStorage == nullptr);
usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
// If a buffer is created with both Storage and MapWrite usages, then
// we will lazily create a GPU writable storage later. Note: we favor CPU writable
// over GPU writable when creating non-constant buffer storage. This is to optimize
// the most common cases where MapWrite buffers are mostly updated by CPU.
DAWN_TRY_ASSIGN(mLastUpdatedStorage,
GetOrCreateStorage(StorageType::CPUWritableNonConstantBuffer));
mMappableStorage = mLastUpdatedStorage;
} else {
usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
DAWN_TRY_ASSIGN(mLastUpdatedStorage,
GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
}
return {};
}
ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateStorage(
StorageType storageType) {
if (mStorages[storageType]) {
return mStorages[storageType].Get();
}
D3D11_BUFFER_DESC bufferDescriptor;
bufferDescriptor.ByteWidth = GetAllocatedSize();
bufferDescriptor.StructureByteStride = 0;
switch (storageType) {
case StorageType::CPUWritableConstantBuffer:
bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
bufferDescriptor.MiscFlags = 0;
break;
case StorageType::GPUCopyDstConstantBuffer:
bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
bufferDescriptor.CPUAccessFlags = 0;
bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
bufferDescriptor.MiscFlags = 0;
break;
case StorageType::CPUWritableNonConstantBuffer: {
// Need to exclude GPU writable usages because CPU writable buffer is not GPU writable
// in D3D11.
auto nonUniformUsage =
GetInternalUsage() & ~(kD3D11GPUWriteUsages | wgpu::BufferUsage::Uniform);
bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
if (bufferDescriptor.BindFlags == 0) {
// Dynamic buffer requires at least one binding flag. If no binding flag is needed
// (one example is MapWrite | QueryResolve), then use D3D11_BIND_INDEX_BUFFER.
bufferDescriptor.BindFlags = D3D11_BIND_INDEX_BUFFER;
DAWN_ASSERT(bufferDescriptor.MiscFlags == 0);
}
} break;
case StorageType::GPUWritableNonConstantBuffer: {
// Need to exclude mapping usages.
const auto nonUniformUsage =
GetInternalUsage() & ~(kMappableBufferUsages | wgpu::BufferUsage::Uniform);
bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
bufferDescriptor.CPUAccessFlags = 0;
bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
} break;
case StorageType::Staging: {
bufferDescriptor.Usage = D3D11_USAGE_STAGING;
bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
bufferDescriptor.BindFlags = 0;
bufferDescriptor.MiscFlags = 0;
} break;
case StorageType::Count:
DAWN_UNREACHABLE();
}
ComPtr<ID3D11Buffer> buffer;
DAWN_TRY(CheckOutOfMemoryHRESULT(
ToBackend(GetDevice())->GetD3D11Device()->CreateBuffer(&bufferDescriptor, nullptr, &buffer),
"ID3D11Device::CreateBuffer"));
mStorages[storageType] = AcquireRef(new Storage(std::move(buffer)));
SetStorageLabel(storageType);
return mStorages[storageType].Get();
}
ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateDstCopyableStorage() {
if (mStorages[StorageType::GPUCopyDstConstantBuffer]) {
return mStorages[StorageType::GPUCopyDstConstantBuffer].Get();
}
if (mStorages[StorageType::GPUWritableNonConstantBuffer]) {
return mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
}
if (GetInternalUsage() & wgpu::BufferUsage::Uniform) {
return GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer);
}
return GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer);
}
MaybeError GPUUsableBuffer::SyncStorage(const ScopedCommandRecordingContext* commandContext,
Storage* dstStorage) {
DAWN_ASSERT(mLastUpdatedStorage);
DAWN_ASSERT(dstStorage);
if (mLastUpdatedStorage->GetRevision() == dstStorage->GetRevision()) {
return {};
}
DAWN_ASSERT(commandContext);
// Must not have pending unmap.
DAWN_CHECK(!mMappedData);
if (dstStorage->SupportsCopyDst()) {
commandContext->CopyResource(dstStorage->GetD3D11Buffer(),
mLastUpdatedStorage->GetD3D11Buffer());
dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());
return {};
}
// TODO(42241146): This is a slow path. It's usually used by uncommon use cases:
// - GPU writes a CPU writable buffer.
DAWN_ASSERT(dstStorage->IsCPUWritable());
Storage* stagingStorage;
DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));
DAWN_TRY(SyncStorage(commandContext, stagingStorage));
D3D11_MAPPED_SUBRESOURCE mappedSrcResource;
DAWN_TRY(CheckHRESULT(commandContext->Map(stagingStorage->GetD3D11Buffer(),
/*Subresource=*/0, D3D11_MAP_READ,
/*MapFlags=*/0, &mappedSrcResource),
"ID3D11DeviceContext::Map src"));
auto MapAndCopy = [](const ScopedCommandRecordingContext* commandContext, ID3D11Buffer* dst,
const void* srcData, size_t size) -> MaybeError {
D3D11_MAPPED_SUBRESOURCE mappedDstResource;
DAWN_TRY(CheckHRESULT(commandContext->Map(dst,
/*Subresource=*/0, D3D11_MAP_WRITE_DISCARD,
/*MapFlags=*/0, &mappedDstResource),
"ID3D11DeviceContext::Map dst"));
memcpy(mappedDstResource.pData, srcData, size);
commandContext->Unmap(dst,
/*Subresource=*/0);
return {};
};
auto result = MapAndCopy(commandContext, dstStorage->GetD3D11Buffer(), mappedSrcResource.pData,
GetAllocatedSize());
commandContext->Unmap(stagingStorage->GetD3D11Buffer(),
/*Subresource=*/0);
if (result.IsError()) {
return result;
}
dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());
return {};
}
void GPUUsableBuffer::IncrStorageRevAndMakeLatest(
const ScopedCommandRecordingContext* commandContext,
Storage* dstStorage) {
DAWN_ASSERT(dstStorage->GetRevision() == mLastUpdatedStorage->GetRevision());
dstStorage->SetRevision(dstStorage->GetRevision() + 1);
mLastUpdatedStorage = dstStorage;
if (dstStorage->IsGPUWritable() && IsMappable(GetInternalUsage())) {
// If this buffer is mappable and the last updated storage is GPU writable, we need to
// update the staging storage when the command buffer is flushed.
// This is to make sure the staging storage will contain the up-to-date GPU modified data.
commandContext->AddBufferForSyncingWithCPU(this);
}
}
MaybeError GPUUsableBuffer::SyncGPUWritesToStaging(
const ScopedCommandRecordingContext* commandContext) {
DAWN_ASSERT(IsMappable(GetInternalUsage()));
// Only sync staging storage. Later other CPU writable storages can be updated by
// copying from staging storage with Map(MAP_WRITE_DISCARD) which won't stall the CPU.
// Otherwise, since CPU writable storages don't support CopyDst, it would require a CPU
// stall in order to sync them here.
Storage* stagingStorage;
DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));
return SyncStorage(commandContext, stagingStorage);
}
MaybeError GPUUsableBuffer::MapInternal(const ScopedCommandRecordingContext* commandContext,
wgpu::MapMode mode) {
DAWN_ASSERT(!mMappedData);
DAWN_ASSERT(mMappableStorage);
if (mMappableStorage->IsDynamic()) {
DAWN_ASSERT(mode == wgpu::MapMode::Write);
// Dynamic buffer can only use D3D11_MAP_WRITE_NO_OVERWRITE
mD3DMapTypeUsed = D3D11_MAP_WRITE_NO_OVERWRITE;
} else {
if (NeedsInitialization() || mode == (wgpu::MapMode::Read | wgpu::MapMode::Write)) {
// Map buffer with D3D11_MAP_READ_WRITE because we need write permission to initialize
// the buffer.
// TODO(dawn:1705): investigate the performance impact of mapping with
// D3D11_MAP_READ_WRITE.
mD3DMapTypeUsed = D3D11_MAP_READ_WRITE;
} else {
if (mode & wgpu::MapMode::Read) {
mD3DMapTypeUsed = D3D11_MAP_READ;
} else {
mD3DMapTypeUsed = D3D11_MAP_WRITE;
}
}
}
// Sync previously modified content before mapping.
DAWN_TRY(SyncStorage(commandContext, mMappableStorage));
D3D11_MAPPED_SUBRESOURCE mappedResource;
DAWN_TRY(CheckHRESULT(commandContext->Map(mMappableStorage->GetD3D11Buffer(),
/*Subresource=*/0, mD3DMapTypeUsed,
/*MapFlags=*/0, &mappedResource),
"ID3D11DeviceContext::Map"));
mMappedData = static_cast<uint8_t*>(mappedResource.pData);
return {};
}
void GPUUsableBuffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) {
DAWN_ASSERT(mMappedData);
DAWN_ASSERT(mMappableStorage);
commandContext->Unmap(mMappableStorage->GetD3D11Buffer(),
/*Subresource=*/0);
mMappedData = nullptr;
// Only increment revision if the buffer was mapped for writing.
if (mD3DMapTypeUsed != D3D11_MAP_READ) {
IncrStorageRevAndMakeLatest(commandContext, mMappableStorage);
}
}
ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11ConstantBuffer(
const ScopedCommandRecordingContext* commandContext) {
auto* storage = mStorages[StorageType::CPUWritableConstantBuffer].Get();
if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
// The CPUWritableConstantBuffer is up to date, use it directly.
return storage->GetD3D11Buffer();
}
// In all other cases we are going to use the GPUCopyDstConstantBuffer because, either it is up
// to date, or we need to update the ConstantBuffer data and doing a CopyResource on the GPU is
// always more efficient than paths involving a memcpy (or potentially a stall).
DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
DAWN_TRY(SyncStorage(commandContext, storage));
return storage->GetD3D11Buffer();
}
ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11NonConstantBuffer(
const ScopedCommandRecordingContext* commandContext) {
auto* storage = mStorages[StorageType::CPUWritableNonConstantBuffer].Get();
if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
// The CPUWritableNonConstantBuffer is up to date, use it directly.
return storage->GetD3D11Buffer();
}
// In all other cases we are going to use the GPUWritableNonConstantBuffe because, either it is
// up to date, or we need to update the non-ConstantBuffer data and doing a CopyResource on the
// GPU is always more efficient than paths involving a memcpy (or potentially a stall).
DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
DAWN_TRY(SyncStorage(commandContext, storage));
return storage->GetD3D11Buffer();
}
ID3D11Buffer* GPUUsableBuffer::GetD3D11ConstantBufferForTesting() {
if (!mStorages[StorageType::CPUWritableConstantBuffer] &&
!mStorages[StorageType::GPUCopyDstConstantBuffer]) {
return nullptr;
}
auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
ID3D11Buffer* buffer;
if (GetDevice()->ConsumedError(GetD3D11ConstantBuffer(&tempCommandContext), &buffer)) {
return nullptr;
}
return buffer;
}
ID3D11Buffer* GPUUsableBuffer::GetD3D11NonConstantBufferForTesting() {
if (!mStorages[StorageType::CPUWritableNonConstantBuffer] &&
!mStorages[StorageType::GPUWritableNonConstantBuffer]) {
return nullptr;
}
auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
ID3D11Buffer* buffer;
if (GetDevice()->ConsumedError(GetD3D11NonConstantBuffer(&tempCommandContext), &buffer)) {
return nullptr;
}
return buffer;
}
ResultOrError<ComPtr<ID3D11ShaderResourceView>>
GPUUsableBuffer::CreateD3D11ShaderResourceViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer,
uint64_t offset,
uint64_t originalSize) {
uint64_t size = Align(originalSize, 4);
DAWN_ASSERT(IsAligned(offset, 4u));
DAWN_ASSERT(size <= GetAllocatedSize());
UINT firstElement = static_cast<UINT>(offset / 4);
UINT numElements = static_cast<UINT>(size / 4);
D3D11_SHADER_RESOURCE_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
desc.BufferEx.FirstElement = firstElement;
desc.BufferEx.NumElements = numElements;
desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
ComPtr<ID3D11ShaderResourceView> srv;
DAWN_TRY(CheckHRESULT(ToBackend(GetDevice())
->GetD3D11Device()
->CreateShaderResourceView(d3d11Buffer, &desc, &srv),
"ShaderResourceView creation"));
return std::move(srv);
}
ResultOrError<ComPtr<ID3D11UnorderedAccessView1>>
GPUUsableBuffer::CreateD3D11UnorderedAccessViewFromD3DBuffer(ID3D11Buffer* d3d11Buffer,
uint64_t offset,
uint64_t originalSize) {
uint64_t size = Align(originalSize, 4);
DAWN_ASSERT(IsAligned(offset, 4u));
DAWN_ASSERT(size <= GetAllocatedSize());
UINT firstElement = static_cast<UINT>(offset / 4);
UINT numElements = static_cast<UINT>(size / 4);
D3D11_UNORDERED_ACCESS_VIEW_DESC1 desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = firstElement;
desc.Buffer.NumElements = numElements;
desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
ComPtr<ID3D11UnorderedAccessView1> uav;
DAWN_TRY(CheckHRESULT(ToBackend(GetDevice())
->GetD3D11Device3()
->CreateUnorderedAccessView1(d3d11Buffer, &desc, &uav),
"UnorderedAccessView creation"));
return std::move(uav);
}
ResultOrError<ComPtr<ID3D11ShaderResourceView>> GPUUsableBuffer::UseAsSRV(
const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
uint64_t size) {
ID3D11Buffer* d3dBuffer;
DAWN_TRY_ASSIGN(d3dBuffer, GetD3D11NonConstantBuffer(commandContext));
auto key = std::make_tuple(d3dBuffer, offset, size);
auto ite = mSRVCache.find(key);
if (ite != mSRVCache.end()) {
return ite->second;
}
ComPtr<ID3D11ShaderResourceView> srv;
DAWN_TRY_ASSIGN(srv, CreateD3D11ShaderResourceViewFromD3DBuffer(d3dBuffer, offset, size));
mSRVCache[key] = srv;
return std::move(srv);
}
ResultOrError<ComPtr<ID3D11UnorderedAccessView>> GPUUsableBuffer::UseAsUAV(
const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
uint64_t size) {
Storage* storage = nullptr;
DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
DAWN_TRY(SyncStorage(commandContext, storage));
ComPtr<ID3D11UnorderedAccessView1> uav;
{
auto key = std::make_tuple(storage->GetD3D11Buffer(), offset, size);
auto ite = mUAVCache.find(key);
if (ite != mUAVCache.end()) {
uav = ite->second;
} else {
DAWN_TRY_ASSIGN(uav, CreateD3D11UnorderedAccessViewFromD3DBuffer(
storage->GetD3D11Buffer(), offset, size));
mUAVCache[key] = uav;
}
}
// Since UAV will modify the storage's content, increment its revision.
IncrStorageRevAndMakeLatest(commandContext, storage);
return ComPtr<ID3D11UnorderedAccessView>(std::move(uav));
}
MaybeError GPUUsableBuffer::UpdateD3D11ConstantBuffer(
const ScopedCommandRecordingContext* commandContext,
ID3D11Buffer* d3d11Buffer,
bool firstTimeUpdate,
uint64_t offset,
const void* data,
size_t size) {
DAWN_ASSERT(size > 0);
// For a full size write, UpdateSubresource1(D3D11_COPY_DISCARD) can be used to update
// constant buffer.
// WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
// as well.
const bool fullSizeUpdate = size >= GetSize() && offset == 0;
const bool canPartialUpdate =
ToBackend(GetDevice())->GetDeviceInfo().supportsPartialConstantBufferUpdate;
if (fullSizeUpdate || firstTimeUpdate) {
const bool requiresFullAllocatedSizeWrite = !canPartialUpdate && !firstTimeUpdate;
// Offset and size must be aligned with 16 for using UpdateSubresource1() on constant
// buffer.
size_t alignedOffset;
if (offset < kConstantBufferUpdateAlignment - 1) {
alignedOffset = 0;
} else {
DAWN_ASSERT(firstTimeUpdate);
// For offset we align to lower value (<= offset).
alignedOffset = Align(offset - (kConstantBufferUpdateAlignment - 1),
kConstantBufferUpdateAlignment);
}
size_t alignedEnd;
if (requiresFullAllocatedSizeWrite) {
alignedEnd = GetAllocatedSize();
} else {
alignedEnd = Align(offset + size, kConstantBufferUpdateAlignment);
}
size_t alignedSize = alignedEnd - alignedOffset;
DAWN_ASSERT((alignedSize % kConstantBufferUpdateAlignment) == 0);
DAWN_ASSERT(alignedSize <= GetAllocatedSize());
DAWN_ASSERT(offset >= alignedOffset);
// Extra bytes on the left of offset we could write to. This is only valid if
// firstTimeUpdate = true.
size_t leftExtraBytes = offset - alignedOffset;
DAWN_ASSERT(leftExtraBytes == 0 || firstTimeUpdate);
// The layout of the buffer is like this:
// |..........................| leftExtraBytes | data | ............... |
// |<----------------- offset ---------------->|<-- size -->|
// |<----- alignedOffset ---->|<--------- alignedSize --------->|
std::unique_ptr<uint8_t[]> alignedBuffer;
if (size != alignedSize) {
alignedBuffer.reset(new uint8_t[alignedSize]);
std::memcpy(alignedBuffer.get() + leftExtraBytes, data, size);
data = alignedBuffer.get();
}
D3D11_BOX dstBox;
dstBox.left = static_cast<UINT>(alignedOffset);
dstBox.top = 0;
dstBox.front = 0;
dstBox.right = static_cast<UINT>(alignedOffset + alignedSize);
dstBox.bottom = 1;
dstBox.back = 1;
// For full buffer write, D3D11_COPY_DISCARD is used to avoid GPU CPU synchronization.
commandContext->UpdateSubresource1(d3d11Buffer, /*DstSubresource=*/0,
requiresFullAllocatedSizeWrite ? nullptr : &dstBox, data,
/*SrcRowPitch=*/0,
/*SrcDepthPitch=*/0,
/*CopyFlags=*/D3D11_COPY_DISCARD);
return {};
}
// If copy offset and size are not 16 bytes aligned, we have to create a staging buffer for
// transfer the data to constant buffer.
Ref<BufferBase> stagingBuffer;
DAWN_TRY_ASSIGN(stagingBuffer, ToBackend(GetDevice())->GetStagingBuffer(commandContext, size));
{
auto scopedUseStaging = stagingBuffer->UseInternal();
DAWN_TRY(ToBackend(stagingBuffer)
->WriteInternal(commandContext, 0, data, size,
/*isInitialWrite=*/true));
DAWN_TRY(ToBackend(stagingBuffer.Get())
->CopyToInternal(commandContext,
/*sourceOffset=*/0,
/*size=*/size, this, offset));
}
ToBackend(GetDevice())->ReturnStagingBuffer(std::move(stagingBuffer));
return {};
}
MaybeError GPUUsableBuffer::WriteInternal(const ScopedCommandRecordingContext* commandContext,
uint64_t offset,
const void* data,
size_t size,
bool isInitialWrite) {
if (size == 0) {
return {};
}
// Map the buffer if it is possible, so WriteInternal() can write to the mapped memory
// directly.
// TODO(crbug.com/345471009): Consider mapping the buffer for non-clearing writes when
// it's not in use by the GPU. This would avoid allocating additional GPU storage.
// However, checking GetLastUsageSerial() is unreliable here because Queue::Submit()
// may have already updated it before entering this function. In practice, this is
// uncommon for mappable buffers since users typically update them via MapAsync when
// they know the buffer is idle.
const bool alreadyMappedForWrite = mMappedData && mD3DMapTypeUsed != D3D11_MAP_READ;
if ((IsCPUWritable() && isInitialWrite) || alreadyMappedForWrite) {
// If buffer is already mapped, creating ScopedMap is a no-op.
ScopedMap scopedMap;
DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write));
DAWN_ASSERT(scopedMap.GetMappedData());
memcpy(scopedMap.GetMappedData() + offset, data, size);
return {};
}
// Mark the buffer as used in pending commands if the mapping path above wasn't taken.
// Mapped writes complete synchronously and don't require tracking.
DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));
// WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
// as well.
bool fullSizeWrite = size >= GetSize() && offset == 0;
// Mapping buffer at this point would stall the CPU. We will create a GPU copyable
// storage and use UpdateSubresource on it below instead. Note if we have both const buffer &
// non-const buffer, we favor writing to non-const buffer, because it has no alignment
// requirement.
Storage* gpuCopyableStorage = mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
if (!gpuCopyableStorage) {
DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
}
if (fullSizeWrite) {
// If this is a full overwrite, no need to copy the old content.
// Just need to copy the revision number.
DAWN_ASSERT(mLastUpdatedStorage);
gpuCopyableStorage->SetRevision(mLastUpdatedStorage->GetRevision());
} else {
DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
}
const bool firstTimeUpdate = gpuCopyableStorage->IsFirstRevision();
// We are going to write to the storage in all code paths, update the revision already.
IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);
if (gpuCopyableStorage->IsConstantBuffer()) {
return UpdateD3D11ConstantBuffer(commandContext, gpuCopyableStorage->GetD3D11Buffer(),
firstTimeUpdate, offset, data, size);
}
D3D11_BOX box;
box.left = static_cast<UINT>(offset);
box.top = 0;
box.front = 0;
box.right = static_cast<UINT>(offset + size);
box.bottom = 1;
box.back = 1;
commandContext->UpdateSubresource1(gpuCopyableStorage->GetD3D11Buffer(),
/*DstSubresource=*/0,
/*pDstBox=*/&box, data,
/*SrcRowPitch=*/0,
/*SrcDepthPitch=*/0,
/*CopyFlags=*/0);
// No need to update constant buffer at this point, when command buffer wants to bind
// the constant buffer in a render/compute pass, it will call GetD3D11ConstantBuffer()
// and the constant buffer will be sync-ed there. WriteBuffer() cannot be called inside
// render/compute pass so no need to sync here.
return {};
}
MaybeError GPUUsableBuffer::CopyToInternal(const ScopedCommandRecordingContext* commandContext,
uint64_t sourceOffset,
size_t size,
Buffer* destination,
uint64_t destinationOffset) {
DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));
ID3D11Buffer* d3d11SourceBuffer = mLastUpdatedStorage->GetD3D11Buffer();
return destination->CopyFromD3DInternal(commandContext, d3d11SourceBuffer, sourceOffset, size,
destinationOffset);
}
MaybeError GPUUsableBuffer::CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
ID3D11Buffer* d3d11SourceBuffer,
uint64_t sourceOffset,
size_t size,
uint64_t destinationOffset) {
DAWN_TRY(TrackUsage(commandContext, GetDevice()->GetQueue()->GetPendingCommandSerial()));
D3D11_BOX srcBox;
srcBox.left = static_cast<UINT>(sourceOffset);
srcBox.top = 0;
srcBox.front = 0;
srcBox.right = static_cast<UINT>(sourceOffset + size);
srcBox.bottom = 1;
srcBox.back = 1;
Storage* gpuCopyableStorage;
if (mLastUpdatedStorage->SupportsCopyDst()) {
gpuCopyableStorage = mLastUpdatedStorage;
} else {
DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
}
commandContext->CopySubresourceRegion(
gpuCopyableStorage->GetD3D11Buffer(), /*DstSubresource=*/0,
/*DstX=*/destinationOffset,
/*DstY=*/0,
/*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0, &srcBox);
IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);
return {};
}
MaybeError GPUUsableBuffer::PredicatedClear(
const ScopedSwapStateCommandRecordingContext* commandContext,
ID3D11Predicate* predicate,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
DAWN_ASSERT(size != 0);
// Don't use mapping, mapping is not affected by ID3D11Predicate.
// Allocate GPU writable storage and sync it. Note: we don't SetPredication() yet otherwise
// it would affect the syncing.
Storage* gpuWritableStorage;
DAWN_TRY_ASSIGN(gpuWritableStorage,
GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
DAWN_TRY(SyncStorage(commandContext, gpuWritableStorage));
// SetPredication() and clear the storage with UpdateSubresource1().
D3D11_BOX box;
box.left = static_cast<UINT>(offset);
box.top = 0;
box.front = 0;
box.right = static_cast<UINT>(offset + size);
box.bottom = 1;
box.back = 1;
// TODO(350493305): Change function signature to accept a single uint64_t value.
// So that we don't need to allocate a vector here.
absl::InlinedVector<uint8_t, sizeof(uint64_t)> clearData(size, clearValue);
// The update will *NOT* be performed if the predicate's data is false.
commandContext->GetD3D11DeviceContext3()->SetPredication(predicate, false);
commandContext->UpdateSubresource1(gpuWritableStorage->GetD3D11Buffer(),
/*DstSubresource=*/0,
/*pDstBox=*/&box, clearData.data(),
/*SrcRowPitch=*/0,
/*SrcDepthPitch=*/0,
/*CopyFlags=*/0);
commandContext->GetD3D11DeviceContext3()->SetPredication(nullptr, false);
IncrStorageRevAndMakeLatest(commandContext, gpuWritableStorage);
return {};
}
ComPtr<ID3D11Buffer> GPUUsableBuffer::GetD3D11MappedBuffer() {
return mMappedData ? mMappableStorage->GetD3D11Buffer() : nullptr;
}
} // namespace dawn::native::d3d11