Implement BufferMapExtendedUsages on D3D11
The new MappableBuffer supports mapping usage and any other usages.
This is achieved by managing several copies of the buffer, each with its
own ID3D11Buffer storage for specific usage.
For example, a buffer that has MapWrite + Storage usage will have at
least two copies:
- One copy with D3D11_USAGE_DYNAMIC for mapping on CPU.
- One copy with D3D11_USAGE_DEFAULT for writing on GPU.
Internally this class will synchronize the content between the copies so
that when it is mapped or used by GPU, the appropriate copy will have
the up-to-date content. The synchronizations are done in a way that
minimizes CPU stall as much as possible.
Bug: 345471009
Change-Id: I78c51a2bed5adcbdb65d05b5fc0f41706a53d9d7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/191980
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn/native/d3d11/BufferD3D11.cpp b/src/dawn/native/d3d11/BufferD3D11.cpp
index 307e905..af37447 100644
--- a/src/dawn/native/d3d11/BufferD3D11.cpp
+++ b/src/dawn/native/d3d11/BufferD3D11.cpp
@@ -52,13 +52,13 @@
namespace {
-constexpr wgpu::BufferUsage kD3D11GPUOnlyUniformBufferUsages =
- wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc;
-
constexpr wgpu::BufferUsage kCopyUsages = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
constexpr wgpu::BufferUsage kStagingUsages = kMappableBufferUsages | kCopyUsages;
+constexpr wgpu::BufferUsage kD3D11GPUWriteUsages =
+ wgpu::BufferUsage::Storage | kInternalStorageBuffer | wgpu::BufferUsage::Indirect;
+
// Resource usage Default Dynamic Immutable Staging
// ------------------------------------------------------------
// GPU-read Yes Yes Yes Yes[1]
@@ -341,65 +341,6 @@
ComPtr<ID3D11Buffer> mD3d11Buffer;
};
-// Buffer that can only be written/read by GPU.
-class GPUOnlyBuffer final : public GPUUsableBuffer {
- public:
- GPUOnlyBuffer(DeviceBase* device, const UnpackedPtr<BufferDescriptor>& descriptor)
- : GPUUsableBuffer(device, descriptor, /*internalMappableFlags=*/wgpu::BufferUsage::None) {}
-
- ResultOrError<ID3D11Buffer*> GetD3D11ConstantBuffer(
- const ScopedCommandRecordingContext* commandContext) override;
- ResultOrError<ID3D11Buffer*> GetD3D11NonConstantBuffer(
- const ScopedCommandRecordingContext* commandContext) override;
-
- ResultOrError<ComPtr<ID3D11ShaderResourceView>> UseAsSRV(
- const ScopedCommandRecordingContext* commandContext,
- uint64_t offset,
- uint64_t size) override;
- ResultOrError<ComPtr<ID3D11UnorderedAccessView1>> UseAsUAV(
- const ScopedCommandRecordingContext* commandContext,
- uint64_t offset,
- uint64_t size) override;
-
- MaybeError PredicatedClear(const ScopedSwapStateCommandRecordingContext* commandContext,
- ID3D11Predicate* predicate,
- uint8_t clearValue,
- uint64_t offset,
- uint64_t size) override;
-
- private:
- // Dawn API
- void DestroyImpl() override;
- void SetLabelImpl() override;
-
- MaybeError InitializeInternal() override;
-
- MaybeError CopyToInternal(const ScopedCommandRecordingContext* commandContext,
- uint64_t sourceOffset,
- size_t size,
- Buffer* destination,
- uint64_t destinationOffset) override;
- MaybeError CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
- ID3D11Buffer* srcD3D11Buffer,
- uint64_t sourceOffset,
- size_t size,
- uint64_t destinationOffset) override;
-
- MaybeError WriteInternal(const ScopedCommandRecordingContext* commandContext,
- uint64_t bufferOffset,
- const void* data,
- size_t size) override;
-
- MaybeError ClearPaddingInternal(const ScopedCommandRecordingContext* commandContext) override;
-
- // The buffer object for constant buffer usage.
- ComPtr<ID3D11Buffer> mD3d11ConstantBuffer;
- // The buffer object for non-constant buffer usages(e.g. storage buffer, vertex buffer, etc.)
- ComPtr<ID3D11Buffer> mD3d11NonConstantBuffer;
-
- bool mConstantBufferIsUpdated = true;
-};
-
// static
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device,
const UnpackedPtr<BufferDescriptor>& descriptor,
@@ -415,7 +356,7 @@
} else if (IsStaging(descriptor->usage)) {
buffer = AcquireRef(new StagingBuffer(device, descriptor));
} else {
- buffer = AcquireRef(new GPUOnlyBuffer(device, descriptor));
+ buffer = AcquireRef(new GPUUsableBuffer(device, descriptor));
}
DAWN_TRY(buffer->Initialize(descriptor->mappedAtCreation, commandContext));
return buffer;
@@ -532,6 +473,11 @@
wgpu::MapMode mode) {
// Needn't map the buffer if this is for a previous mapAsync that was cancelled.
if (completedSerial >= mMapReadySerial) {
+ // Map then initialize data using mapped pointer.
+ // The mapped pointer is always writable because:
+ // - If mode is Write, then it's already writable.
+ // - If mode is Read, it's only possible to map staging buffer. In that case,
+ // D3D11_MAP_READ_WRITE will be used, hence the mapped pointer will also be writable.
// TODO(dawn:1705): make sure the map call is not blocked by the GPU operations.
DAWN_TRY(MapInternal(commandContext, mode));
@@ -772,10 +718,447 @@
return mBuffer ? mBuffer->mMappedData.get() : nullptr;
}
+// GPUUsableBuffer::Storage
+class GPUUsableBuffer::Storage : public RefCounted, NonCopyable {
+ public:
+ explicit Storage(ComPtr<ID3D11Buffer> d3d11Buffer) : mD3d11Buffer(std::move(d3d11Buffer)) {
+ D3D11_BUFFER_DESC desc;
+ mD3d11Buffer->GetDesc(&desc);
+ mD3d11Usage = desc.Usage;
+
+ mMappableCopyableFlags = wgpu::BufferUsage::CopySrc;
+
+ switch (mD3d11Usage) {
+ case D3D11_USAGE_STAGING:
+ mMappableCopyableFlags |= kMappableBufferUsages | wgpu::BufferUsage::CopyDst;
+ break;
+ case D3D11_USAGE_DYNAMIC:
+ mMappableCopyableFlags |= wgpu::BufferUsage::MapWrite;
+ break;
+ case D3D11_USAGE_DEFAULT:
+ mMappableCopyableFlags |= wgpu::BufferUsage::CopyDst;
+ break;
+ default:
+ break;
+ }
+
+ mIsConstantBuffer = desc.BindFlags & D3D11_BIND_CONSTANT_BUFFER;
+ }
+
+ ID3D11Buffer* GetD3D11Buffer() { return mD3d11Buffer.Get(); }
+
+ uint64_t GetRevision() const { return mRevision; }
+ void SetRevision(uint64_t revision) { mRevision = revision; }
+ bool IsFirstRevision() const { return mRevision == 0; }
+
+ bool IsConstantBuffer() const { return mIsConstantBuffer; }
+
+ bool IsCPUWritable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapWrite; }
+ bool IsCPUReadable() const { return mMappableCopyableFlags & wgpu::BufferUsage::MapRead; }
+ bool IsStaging() const { return IsCPUReadable(); }
+ bool SupportsCopyDst() const { return mMappableCopyableFlags & wgpu::BufferUsage::CopyDst; }
+ bool IsGPUWritable() const { return mD3d11Usage == D3D11_USAGE_DEFAULT; }
+
+ private:
+ ComPtr<ID3D11Buffer> mD3d11Buffer;
+ uint64_t mRevision = 0;
+ D3D11_USAGE mD3d11Usage;
+ bool mIsConstantBuffer = false;
+ wgpu::BufferUsage mMappableCopyableFlags;
+};
+
// GPUUsableBuffer
+GPUUsableBuffer::GPUUsableBuffer(DeviceBase* device,
+ const UnpackedPtr<BufferDescriptor>& descriptor)
+ : Buffer(device,
+ descriptor,
+ /*internalMappableFlags=*/descriptor->usage & kMappableBufferUsages) {}
+
+GPUUsableBuffer::~GPUUsableBuffer() = default;
+
+void GPUUsableBuffer::DestroyImpl() {
+ // TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
+ // - It may be called if the buffer is explicitly destroyed with APIDestroy.
+ // This case is NOT thread-safe and needs proper synchronization with other
+ // simultaneous uses of the buffer.
+ // - It may be called when the last ref to the buffer is dropped and the buffer
+ // is implicitly destroyed. This case is thread-safe because there are no
+ // other threads using the buffer since there are no other live refs.
+ Buffer::DestroyImpl();
+
+ mLastUpdatedStorage = nullptr;
+ mCPUWritableStorage = nullptr;
+ mMappedStorage = nullptr;
+
+ mStorages = {};
+}
+
+void GPUUsableBuffer::SetLabelImpl() {
+ for (auto ite = mStorages.begin(); ite != mStorages.end(); ++ite) {
+ auto storageType = static_cast<StorageType>(std::distance(mStorages.begin(), ite));
+ SetStorageLabel(storageType);
+ }
+}
+
+void GPUUsableBuffer::SetStorageLabel(StorageType storageType) {
+ static constexpr ityp::array<GPUUsableBuffer::StorageType, const char*,
+ static_cast<uint8_t>(StorageType::Count)>
+ kStorageTypeStrings = {
+ "Dawn_CPUWritableConstantBuffer",
+ "Dawn_GPUCopyDstConstantBuffer",
+ "Dawn_CPUWritableNonConstantBuffer",
+ "Dawn_GPUWritableNonConstantBuffer",
+ "Dawn_Staging",
+ };
+
+ if (!mStorages[storageType]) {
+ return;
+ }
+
+ SetDebugName(ToBackend(GetDevice()), mStorages[storageType]->GetD3D11Buffer(),
+ kStorageTypeStrings[storageType], GetLabel());
+}
+
+MaybeError GPUUsableBuffer::InitializeInternal() {
+ DAWN_ASSERT(!IsStaging(GetUsage()));
+
+ mStorages = {};
+
+ wgpu::BufferUsage usagesToHandle = GetUsage();
+
+ // We need to create a separate storage for uniform usage, because D3D11 doesn't allow constant
+ // buffer to be used for other purposes.
+ if (usagesToHandle & wgpu::BufferUsage::Uniform) {
+ usagesToHandle &= ~(wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
+
+ // Since D3D11 doesn't allow both CPU & GPU to write to a buffer, we need separate
+ // storages for CPU writes and GPU writes.
+ if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
+ // Note: we favor CPU write over GPU write if MapWrite is present. If buffer has GPU
+ // writable usages, the GPU writable storage will be lazily created later.
+ usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
+ DAWN_TRY_ASSIGN(mLastUpdatedStorage,
+ GetOrCreateStorage(StorageType::CPUWritableConstantBuffer));
+ mCPUWritableStorage = mLastUpdatedStorage;
+ } else {
+ // For constant buffer, the only supported GPU op is copy. So create one storage for
+ // that.
+ usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
+ DAWN_TRY_ASSIGN(mLastUpdatedStorage,
+ GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
+ }
+ }
+
+ if (usagesToHandle == wgpu::BufferUsage::None) {
+ return {};
+ }
+
+ // Create separate storage for non-constant buffer usages if required.
+ if (!IsStaging(usagesToHandle)) {
+ if (usagesToHandle & wgpu::BufferUsage::MapWrite) {
+ // Note: we only need one CPU writable storage. If there are both const buffer and
+ // non-const buffer usages, we favor CPU writable const buffer first. Since that's most
+ // likely the common use case where users want to update const buffer on CPU.
+ DAWN_ASSERT(mCPUWritableStorage == nullptr);
+ usagesToHandle &= ~wgpu::BufferUsage::MapWrite;
+ // If a buffer is created with both Storage and MapWrite usages, then
+ // we will lazily create a GPU writable storage later. Note: we favor CPU writable
+ // over GPU writable when creating non-constant buffer storage. This is to optimize
+ // the most common cases where MapWrite buffers are mostly updated by CPU.
+ DAWN_TRY_ASSIGN(mLastUpdatedStorage,
+ GetOrCreateStorage(StorageType::CPUWritableNonConstantBuffer));
+ mCPUWritableStorage = mLastUpdatedStorage;
+ } else {
+ usagesToHandle &= ~wgpu::BufferUsage::CopyDst;
+ DAWN_TRY_ASSIGN(mLastUpdatedStorage,
+ GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
+ }
+ }
+
+ // Special storage for staging.
+ if (IsMappable(usagesToHandle)) {
+ DAWN_TRY_ASSIGN(mLastUpdatedStorage, GetOrCreateStorage(StorageType::Staging));
+ }
+
+ return {};
+}
+
+ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateStorage(
+ StorageType storageType) {
+ if (mStorages[storageType]) {
+ return mStorages[storageType].Get();
+ }
+ D3D11_BUFFER_DESC bufferDescriptor;
+ bufferDescriptor.ByteWidth = GetAllocatedSize();
+ bufferDescriptor.StructureByteStride = 0;
+
+ switch (storageType) {
+ case StorageType::CPUWritableConstantBuffer:
+ bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
+ bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+ bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ bufferDescriptor.MiscFlags = 0;
+ break;
+ case StorageType::GPUCopyDstConstantBuffer:
+ bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
+ bufferDescriptor.CPUAccessFlags = 0;
+ bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ bufferDescriptor.MiscFlags = 0;
+ break;
+ case StorageType::CPUWritableNonConstantBuffer: {
+ // Need to exclude GPU writable usages because CPU writable buffer is not GPU writable
+ // in D3D11.
+ auto nonUniformUsage =
+ GetUsage() & ~(kD3D11GPUWriteUsages | wgpu::BufferUsage::Uniform);
+ bufferDescriptor.Usage = D3D11_USAGE_DYNAMIC;
+ bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+ bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
+ bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
+ if (bufferDescriptor.BindFlags == 0) {
+ // Dynamic buffer requires at least one binding flag. If no binding flag is needed
+ // (one example is MapWrite | QueryResolve), then use D3D11_BIND_INDEX_BUFFER.
+ bufferDescriptor.BindFlags = D3D11_BIND_INDEX_BUFFER;
+ DAWN_ASSERT(bufferDescriptor.MiscFlags == 0);
+ }
+ } break;
+ case StorageType::GPUWritableNonConstantBuffer: {
+ // Need to exclude mapping usages.
+ const auto nonUniformUsage =
+ GetUsage() & ~(kMappableBufferUsages | wgpu::BufferUsage::Uniform);
+ bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
+ bufferDescriptor.CPUAccessFlags = 0;
+ bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
+ bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
+ } break;
+ case StorageType::Staging: {
+ bufferDescriptor.Usage = D3D11_USAGE_STAGING;
+ bufferDescriptor.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ bufferDescriptor.BindFlags = 0;
+ bufferDescriptor.MiscFlags = 0;
+ } break;
+ case StorageType::Count:
+ DAWN_UNREACHABLE();
+ }
+
+ ComPtr<ID3D11Buffer> buffer;
+ DAWN_TRY(CheckOutOfMemoryHRESULT(
+ ToBackend(GetDevice())->GetD3D11Device()->CreateBuffer(&bufferDescriptor, nullptr, &buffer),
+ "ID3D11Device::CreateBuffer"));
+
+ mStorages[storageType] = AcquireRef(new Storage(std::move(buffer)));
+
+ SetStorageLabel(storageType);
+
+ return mStorages[storageType].Get();
+}
+
+ResultOrError<GPUUsableBuffer::Storage*> GPUUsableBuffer::GetOrCreateDstCopyableStorage() {
+ if (mStorages[StorageType::GPUCopyDstConstantBuffer]) {
+ return mStorages[StorageType::GPUCopyDstConstantBuffer].Get();
+ }
+ if (mStorages[StorageType::GPUWritableNonConstantBuffer]) {
+ return mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
+ }
+
+ if (GetUsage() & wgpu::BufferUsage::Uniform) {
+ return GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer);
+ }
+
+ return GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer);
+}
+
+MaybeError GPUUsableBuffer::SyncStorage(const ScopedCommandRecordingContext* commandContext,
+ Storage* dstStorage) {
+ DAWN_ASSERT(mLastUpdatedStorage);
+ DAWN_ASSERT(dstStorage);
+ if (mLastUpdatedStorage->GetRevision() == dstStorage->GetRevision()) {
+ return {};
+ }
+
+ DAWN_ASSERT(commandContext);
+
+ if (dstStorage->SupportsCopyDst()) {
+ commandContext->CopyResource(dstStorage->GetD3D11Buffer(),
+ mLastUpdatedStorage->GetD3D11Buffer());
+ dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());
+ return {};
+ }
+
+ // TODO(42241146): This is a slow path. It's usually used by uncommon use cases:
+ // - GPU writes a CPU writable buffer.
+ DAWN_ASSERT(dstStorage->IsCPUWritable());
+ Storage* stagingStorage;
+ DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));
+ DAWN_TRY(SyncStorage(commandContext, stagingStorage));
+ D3D11_MAPPED_SUBRESOURCE mappedSrcResource;
+ DAWN_TRY(CheckHRESULT(commandContext->Map(stagingStorage->GetD3D11Buffer(),
+ /*Subresource=*/0, D3D11_MAP_READ,
+ /*MapFlags=*/0, &mappedSrcResource),
+ "ID3D11DeviceContext::Map src"));
+
+ auto MapAndCopy = [](const ScopedCommandRecordingContext* commandContext, ID3D11Buffer* dst,
+ const void* srcData, size_t size) -> MaybeError {
+ D3D11_MAPPED_SUBRESOURCE mappedDstResource;
+ DAWN_TRY(CheckHRESULT(commandContext->Map(dst,
+ /*Subresource=*/0, D3D11_MAP_WRITE_DISCARD,
+ /*MapFlags=*/0, &mappedDstResource),
+ "ID3D11DeviceContext::Map dst"));
+ memcpy(mappedDstResource.pData, srcData, size);
+ commandContext->Unmap(dst,
+ /*Subresource=*/0);
+ return {};
+ };
+
+ auto result = MapAndCopy(commandContext, dstStorage->GetD3D11Buffer(), mappedSrcResource.pData,
+ GetAllocatedSize());
+
+ commandContext->Unmap(stagingStorage->GetD3D11Buffer(),
+ /*Subresource=*/0);
+
+ if (result.IsError()) {
+ return result;
+ }
+
+ dstStorage->SetRevision(mLastUpdatedStorage->GetRevision());
+
+ return {};
+}
+
+void GPUUsableBuffer::IncrStorageRevAndMakeLatest(
+ const ScopedCommandRecordingContext* commandContext,
+ Storage* dstStorage) {
+ DAWN_ASSERT(dstStorage->GetRevision() == mLastUpdatedStorage->GetRevision());
+ dstStorage->SetRevision(dstStorage->GetRevision() + 1);
+ mLastUpdatedStorage = dstStorage;
+
+ if (dstStorage->IsGPUWritable() && IsMappable(GetUsage())) {
+ // If this buffer is mappable and the last updated storage is GPU writable, we need to
+ // update the staging storage when the command buffer is flushed.
+ // This is to make sure the staging storage will contain the up-to-date GPU modified data.
+ commandContext->AddBufferForSyncingWithCPU(this);
+ }
+}
+
+MaybeError GPUUsableBuffer::SyncGPUWritesToStaging(
+ const ScopedCommandRecordingContext* commandContext) {
+ DAWN_ASSERT(IsMappable(GetUsage()));
+
+ // Only sync staging storage. Later other CPU writable storages can be updated by
+ // copying from staging storage with Map(MAP_WRITE_DISCARD) which won't stall the CPU.
+ // Otherwise, since CPU writable storages don't support CopyDst, it would require a CPU
+ // stall in order to sync them here.
+ Storage* stagingStorage;
+ DAWN_TRY_ASSIGN(stagingStorage, GetOrCreateStorage(StorageType::Staging));
+
+ return SyncStorage(commandContext, stagingStorage);
+}
+
+MaybeError GPUUsableBuffer::MapInternal(const ScopedCommandRecordingContext* commandContext,
+ wgpu::MapMode mode) {
+ DAWN_ASSERT(!mMappedData);
+
+ D3D11_MAP mapType;
+ Storage* storage;
+ if (mode == wgpu::MapMode::Write) {
+ DAWN_ASSERT(!mCPUWritableStorage->IsStaging());
+ // Use D3D11_MAP_WRITE_NO_OVERWRITE to guarantee driver that we don't overwrite data in
+ // use by GPU. MapAsync() already ensures that any GPU commands using this buffer
+ // already finish. In return driver won't try to stall CPU for mapping access.
+ mapType = D3D11_MAP_WRITE_NO_OVERWRITE;
+ storage = mCPUWritableStorage;
+ } else {
+ // Always map buffer with D3D11_MAP_READ_WRITE if possible even for mapping
+ // wgpu::MapMode:Read, because we need write permission to initialize the buffer.
+ // TODO(dawn:1705): investigate the performance impact of mapping with
+ // D3D11_MAP_READ_WRITE.
+ mapType = D3D11_MAP_READ_WRITE;
+ // If buffer has MapRead usage, a staging storage should already be created in
+ // InitializeInternal().
+ storage = mStorages[StorageType::Staging].Get();
+ }
+
+ DAWN_ASSERT(storage);
+
+ // Sync previously modified content before mapping.
+ DAWN_TRY(SyncStorage(commandContext, storage));
+
+ D3D11_MAPPED_SUBRESOURCE mappedResource;
+ DAWN_TRY(CheckHRESULT(commandContext->Map(storage->GetD3D11Buffer(),
+ /*Subresource=*/0, mapType,
+ /*MapFlags=*/0, &mappedResource),
+ "ID3D11DeviceContext::Map"));
+ mMappedData = static_cast<uint8_t*>(mappedResource.pData);
+ mMappedStorage = storage;
+
+ return {};
+}
+
+void GPUUsableBuffer::UnmapInternal(const ScopedCommandRecordingContext* commandContext) {
+ DAWN_ASSERT(mMappedData);
+ commandContext->Unmap(mMappedStorage->GetD3D11Buffer(),
+ /*Subresource=*/0);
+ mMappedData = nullptr;
+ // Since D3D11_MAP_READ_WRITE is used even for MapMode::Read, we need to increment the
+ // revision.
+ IncrStorageRevAndMakeLatest(commandContext, mMappedStorage);
+
+ auto* stagingStorage = mStorages[StorageType::Staging].Get();
+
+ if (stagingStorage && mLastUpdatedStorage != stagingStorage) {
+ // If we have staging buffer (for MapRead), it has to be updated so later when user calls
+ // Map + Read on this buffer, the stall might be avoided. Note: This is uncommon case where
+ // the buffer is created with both MapRead & MapWrite. Technically it's impossible for the
+ // following code to return error. Because in staging storage case, only CopyResource()
+ // needs to be used. No extra allocations needed.
+ [[maybe_unused]] bool hasError =
+ GetDevice()->ConsumedError(SyncStorage(commandContext, stagingStorage));
+ DAWN_ASSERT(!hasError);
+ }
+
+ mMappedStorage = nullptr;
+}
+
+ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11ConstantBuffer(
+ const ScopedCommandRecordingContext* commandContext) {
+ auto* storage = mStorages[StorageType::CPUWritableConstantBuffer].Get();
+ if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
+ // The CPUWritableConstantBuffer is up to date, use it directly.
+ return storage->GetD3D11Buffer();
+ }
+
+ // In all other cases we are going to use the GPUCopyDstConstantBuffer because, either it is up
+ // to date, or we need to update the ConstantBuffer data and doing a CopyResource on the GPU is
+ // always more efficient than paths involving a memcpy (or potentially a stall).
+ DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUCopyDstConstantBuffer));
+ DAWN_TRY(SyncStorage(commandContext, storage));
+ return storage->GetD3D11Buffer();
+}
+
+ResultOrError<ID3D11Buffer*> GPUUsableBuffer::GetD3D11NonConstantBuffer(
+ const ScopedCommandRecordingContext* commandContext) {
+ auto* storage = mStorages[StorageType::CPUWritableNonConstantBuffer].Get();
+ if (storage && storage->GetRevision() == mLastUpdatedStorage->GetRevision()) {
+ // The CPUWritableNonConstantBuffer is up to date, use it directly.
+ return storage->GetD3D11Buffer();
+ }
+
+ // In all other cases we are going to use the GPUWritableNonConstantBuffe because, either it is
+ // up to date, or we need to update the non-ConstantBuffer data and doing a CopyResource on the
+ // GPU is always more efficient than paths involving a memcpy (or potentially a stall).
+ DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
+ DAWN_TRY(SyncStorage(commandContext, storage));
+ return storage->GetD3D11Buffer();
+}
+
ID3D11Buffer* GPUUsableBuffer::GetD3D11ConstantBufferForTesting() {
+ if (!mStorages[StorageType::CPUWritableConstantBuffer] &&
+ !mStorages[StorageType::GPUCopyDstConstantBuffer]) {
+ return nullptr;
+ }
+ auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
+ ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
ID3D11Buffer* buffer;
- if (GetDevice()->ConsumedError(GetD3D11ConstantBuffer(nullptr), &buffer)) {
+ if (GetDevice()->ConsumedError(GetD3D11ConstantBuffer(&tempCommandContext), &buffer)) {
return nullptr;
}
@@ -783,8 +1166,14 @@
}
ID3D11Buffer* GPUUsableBuffer::GetD3D11NonConstantBufferForTesting() {
+ if (!mStorages[StorageType::CPUWritableNonConstantBuffer] &&
+ !mStorages[StorageType::GPUWritableNonConstantBuffer]) {
+ return nullptr;
+ }
+ auto tempCommandContext = ToBackend(GetDevice()->GetQueue())
+ ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
ID3D11Buffer* buffer;
- if (GetDevice()->ConsumedError(GetD3D11NonConstantBuffer(nullptr), &buffer)) {
+ if (GetDevice()->ConsumedError(GetD3D11NonConstantBuffer(&tempCommandContext), &buffer)) {
return nullptr;
}
@@ -841,6 +1230,35 @@
return uav;
}
+ResultOrError<ComPtr<ID3D11ShaderResourceView>> GPUUsableBuffer::UseAsSRV(
+ const ScopedCommandRecordingContext* commandContext,
+ uint64_t offset,
+ uint64_t size) {
+ ID3D11Buffer* d3dBuffer;
+
+ DAWN_TRY_ASSIGN(d3dBuffer, GetD3D11NonConstantBuffer(commandContext));
+
+ return CreateD3D11ShaderResourceViewFromD3DBuffer(d3dBuffer, offset, size);
+}
+
+ResultOrError<ComPtr<ID3D11UnorderedAccessView1>> GPUUsableBuffer::UseAsUAV(
+ const ScopedCommandRecordingContext* commandContext,
+ uint64_t offset,
+ uint64_t size) {
+ Storage* storage = nullptr;
+ DAWN_TRY_ASSIGN(storage, GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
+ DAWN_TRY(SyncStorage(commandContext, storage));
+
+ ComPtr<ID3D11UnorderedAccessView1> uav;
+ DAWN_TRY_ASSIGN(
+ uav, CreateD3D11UnorderedAccessViewFromD3DBuffer(storage->GetD3D11Buffer(), offset, size));
+
+ // Since UAV will modify the storage's content, increment its revision.
+ IncrStorageRevAndMakeLatest(commandContext, storage);
+
+ return uav;
+}
+
MaybeError GPUUsableBuffer::UpdateD3D11ConstantBuffer(
const ScopedCommandRecordingContext* commandContext,
ID3D11Buffer* d3d11Buffer,
@@ -852,21 +1270,31 @@
// For a full size write, UpdateSubresource1(D3D11_COPY_DISCARD) can be used to update
// constant buffer.
- // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write as
- // well.
- bool fullSizeUpdate = size >= GetSize() && offset == 0;
+ // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
+ // as well.
+ const bool fullSizeUpdate = size >= GetSize() && offset == 0;
+ const bool canPartialUpdate =
+ ToBackend(GetDevice())->GetDeviceInfo().supportsPartialConstantBufferUpdate;
if (fullSizeUpdate || firstTimeUpdate) {
+ const bool requiresFullAllocatedSizeWrite = !canPartialUpdate && !firstTimeUpdate;
+
// Offset and size must be aligned with 16 for using UpdateSubresource1() on constant
// buffer.
size_t alignedOffset;
if (offset < kConstantBufferUpdateAlignment - 1) {
alignedOffset = 0;
} else {
- // For offset we align to value <= offset.
+ DAWN_ASSERT(firstTimeUpdate);
+ // For offset we align to lower value (<= offset).
alignedOffset = Align(offset - (kConstantBufferUpdateAlignment - 1),
kConstantBufferUpdateAlignment);
}
- size_t alignedEnd = Align(offset + size, kConstantBufferUpdateAlignment);
+ size_t alignedEnd;
+ if (requiresFullAllocatedSizeWrite) {
+ alignedEnd = GetAllocatedSize();
+ } else {
+ alignedEnd = Align(offset + size, kConstantBufferUpdateAlignment);
+ }
size_t alignedSize = alignedEnd - alignedOffset;
DAWN_ASSERT((alignedSize % kConstantBufferUpdateAlignment) == 0);
@@ -897,7 +1325,8 @@
dstBox.bottom = 1;
dstBox.back = 1;
// For full buffer write, D3D11_COPY_DISCARD is used to avoid GPU CPU synchronization.
- commandContext->UpdateSubresource1(d3d11Buffer, /*DstSubresource=*/0, &dstBox, data,
+ commandContext->UpdateSubresource1(d3d11Buffer, /*DstSubresource=*/0,
+ requiresFullAllocatedSizeWrite ? nullptr : &dstBox, data,
/*SrcRowPitch=*/0,
/*SrcDepthPitch=*/0,
/*CopyFlags=*/D3D11_COPY_DISCARD);
@@ -919,195 +1348,90 @@
return {};
}
-// GPUOnlyBuffer
-void GPUOnlyBuffer::DestroyImpl() {
- // TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
- // - It may be called if the buffer is explicitly destroyed with APIDestroy.
- // This case is NOT thread-safe and needs proper synchronization with other
- // simultaneous uses of the buffer.
- // - It may be called when the last ref to the buffer is dropped and the buffer
- // is implicitly destroyed. This case is thread-safe because there are no
- // other threads using the buffer since there are no other live refs.
- GPUUsableBuffer::DestroyImpl();
-
- mD3d11ConstantBuffer = nullptr;
- mD3d11NonConstantBuffer = nullptr;
-}
-
-void GPUOnlyBuffer::SetLabelImpl() {
- SetDebugName(ToBackend(GetDevice()), mD3d11NonConstantBuffer.Get(), "Dawn_Buffer", GetLabel());
- SetDebugName(ToBackend(GetDevice()), mD3d11ConstantBuffer.Get(), "Dawn_ConstantBuffer",
- GetLabel());
-}
-
-MaybeError GPUOnlyBuffer::InitializeInternal() {
- DAWN_ASSERT(!IsMappable(GetUsage()));
-
- bool needsConstantBuffer = GetUsage() & wgpu::BufferUsage::Uniform;
- bool onlyNeedsConstantBuffer =
- needsConstantBuffer && IsSubset(GetUsage(), kD3D11GPUOnlyUniformBufferUsages);
-
- if (!onlyNeedsConstantBuffer) {
- // Create mD3d11NonConstantBuffer
- wgpu::BufferUsage nonUniformUsage = GetUsage() & ~wgpu::BufferUsage::Uniform;
- D3D11_BUFFER_DESC bufferDescriptor;
- bufferDescriptor.ByteWidth = mAllocatedSize;
- bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
- bufferDescriptor.BindFlags = D3D11BufferBindFlags(nonUniformUsage);
- bufferDescriptor.CPUAccessFlags = 0;
- bufferDescriptor.MiscFlags = D3D11BufferMiscFlags(nonUniformUsage);
- bufferDescriptor.StructureByteStride = 0;
-
- DAWN_TRY(CheckOutOfMemoryHRESULT(
- ToBackend(GetDevice())
- ->GetD3D11Device()
- ->CreateBuffer(&bufferDescriptor, nullptr, &mD3d11NonConstantBuffer),
- "ID3D11Device::CreateBuffer"));
- }
-
- if (needsConstantBuffer) {
- // Create mD3d11ConstantBuffer
- D3D11_BUFFER_DESC bufferDescriptor;
- bufferDescriptor.ByteWidth = mAllocatedSize;
- bufferDescriptor.Usage = D3D11_USAGE_DEFAULT;
- bufferDescriptor.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
- bufferDescriptor.CPUAccessFlags = 0;
- bufferDescriptor.MiscFlags = 0;
- bufferDescriptor.StructureByteStride = 0;
-
- DAWN_TRY(CheckOutOfMemoryHRESULT(
- ToBackend(GetDevice())
- ->GetD3D11Device()
- ->CreateBuffer(&bufferDescriptor, nullptr, &mD3d11ConstantBuffer),
- "ID3D11Device::CreateBuffer"));
- }
-
- DAWN_ASSERT(mD3d11NonConstantBuffer || mD3d11ConstantBuffer);
-
- return {};
-}
-
-MaybeError GPUOnlyBuffer::PredicatedClear(
- const ScopedSwapStateCommandRecordingContext* commandContext,
- ID3D11Predicate* predicate,
- uint8_t clearValue,
- uint64_t offset,
- uint64_t size) {
- // The clear will *NOT* be performed if the predicate's data is false.
- commandContext->GetD3D11DeviceContext4()->SetPredication(predicate, false);
- auto result = Clear(commandContext, clearValue, offset, size);
- commandContext->GetD3D11DeviceContext4()->SetPredication(nullptr, false);
- return result;
-}
-
-ResultOrError<ID3D11Buffer*> GPUOnlyBuffer::GetD3D11ConstantBuffer(
- const ScopedCommandRecordingContext* commandContext) {
- if (mConstantBufferIsUpdated) {
- return mD3d11ConstantBuffer.Get();
- }
-
- DAWN_ASSERT(mD3d11NonConstantBuffer);
- DAWN_ASSERT(mD3d11ConstantBuffer);
- if (commandContext) {
- commandContext->CopyResource(mD3d11ConstantBuffer.Get(), mD3d11NonConstantBuffer.Get());
- } else {
- auto tempCommandContext =
- ToBackend(GetDevice()->GetQueue())
- ->GetScopedPendingCommandContext(QueueBase::SubmitMode::Normal);
- tempCommandContext.CopyResource(mD3d11ConstantBuffer.Get(), mD3d11NonConstantBuffer.Get());
- }
- mConstantBufferIsUpdated = true;
-
- return mD3d11ConstantBuffer.Get();
-}
-
-ResultOrError<ID3D11Buffer*> GPUOnlyBuffer::GetD3D11NonConstantBuffer(
- const ScopedCommandRecordingContext*) {
- return mD3d11NonConstantBuffer.Get();
-}
-
-ResultOrError<ComPtr<ID3D11ShaderResourceView>>
-GPUOnlyBuffer::UseAsSRV(const ScopedCommandRecordingContext*, uint64_t offset, uint64_t size) {
- return CreateD3D11ShaderResourceViewFromD3DBuffer(mD3d11NonConstantBuffer.Get(), offset, size);
-}
-
-ResultOrError<ComPtr<ID3D11UnorderedAccessView1>>
-GPUOnlyBuffer::UseAsUAV(const ScopedCommandRecordingContext*, uint64_t offset, uint64_t size) {
- ComPtr<ID3D11UnorderedAccessView1> uav;
- DAWN_TRY_ASSIGN(uav, CreateD3D11UnorderedAccessViewFromD3DBuffer(mD3d11NonConstantBuffer.Get(),
- offset, size));
-
- // Since UAV will modify the non-constant buffer's content, the constant buffer's content would
- // also need to be updated afterwards.
- mConstantBufferIsUpdated = false;
-
- return uav;
-}
-
-MaybeError GPUOnlyBuffer::WriteInternal(const ScopedCommandRecordingContext* commandContext,
- uint64_t offset,
- const void* data,
- size_t size) {
+MaybeError GPUUsableBuffer::WriteInternal(const ScopedCommandRecordingContext* commandContext,
+ uint64_t offset,
+ const void* data,
+ size_t size) {
if (size == 0) {
return {};
}
- if (mD3d11NonConstantBuffer) {
- D3D11_BOX box;
- box.left = static_cast<UINT>(offset);
- box.top = 0;
- box.front = 0;
- box.right = static_cast<UINT>(offset + size);
- box.bottom = 1;
- box.back = 1;
- commandContext->UpdateSubresource1(mD3d11NonConstantBuffer.Get(), /*DstSubresource=*/0,
- /*pDstBox=*/&box, data,
- /*SrcRowPitch=*/0,
- /*SrcDepthPitch=*/0,
- /*CopyFlags=*/0);
- if (!mD3d11ConstantBuffer) {
- return {};
- }
+ // Map the buffer if it is possible, so WriteInternal() can write the mapped memory
+ // directly.
+ if (IsCPUWritable() &&
+ mLastUsageSerial <= GetDevice()->GetQueue()->GetCompletedCommandSerial()) {
+ ScopedMap scopedMap;
+ DAWN_TRY_ASSIGN(scopedMap, ScopedMap::Create(commandContext, this, wgpu::MapMode::Write));
- // if mConstantBufferIsUpdated is false, the content of mD3d11ConstantBuffer will be
- // updated by EnsureConstantBufferIsUpdated() when the constant buffer is about to be used.
- if (!mConstantBufferIsUpdated) {
- return {};
- }
-
- // Copy the modified part of the mD3d11NonConstantBuffer to mD3d11ConstantBuffer.
- commandContext->CopySubresourceRegion(
- mD3d11ConstantBuffer.Get(), /*DstSubresource=*/0, /*DstX=*/offset,
- /*DstY=*/0,
- /*DstZ=*/0, mD3d11NonConstantBuffer.Get(), /*SrcSubresource=*/0, /*pSrcBux=*/&box);
-
+ DAWN_ASSERT(scopedMap.GetMappedData());
+ memcpy(scopedMap.GetMappedData() + offset, data, size);
return {};
}
- DAWN_ASSERT(mD3d11ConstantBuffer);
+ // WriteInternal() can be called with GetAllocatedSize(). We treat it as a full buffer write
+ // as well.
+ bool fullSizeWrite = size >= GetSize() && offset == 0;
- return UpdateD3D11ConstantBuffer(commandContext, mD3d11ConstantBuffer.Get(),
- /*firstUpdate=*/false, offset, data, size);
+ // Mapping buffer at this point would stall the CPU. We will create a GPU copyable
+ // storage and use UpdateSubresource on it below instead. Note if we have both const buffer &
+ // non-const buffer, we favor writing to non-const buffer, because it has no alignment
+ // requirement.
+ Storage* gpuCopyableStorage = mStorages[StorageType::GPUWritableNonConstantBuffer].Get();
+ if (!gpuCopyableStorage) {
+ DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
+ }
+
+ if (!fullSizeWrite) {
+ DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
+ }
+
+ const bool firstTimeUpdate = gpuCopyableStorage->IsFirstRevision();
+
+ // We are going to write to the storage in all code paths, update the revision already.
+ IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);
+
+ if (gpuCopyableStorage->IsConstantBuffer()) {
+ return UpdateD3D11ConstantBuffer(commandContext, gpuCopyableStorage->GetD3D11Buffer(),
+ firstTimeUpdate, offset, data, size);
+ }
+
+ D3D11_BOX box;
+ box.left = static_cast<UINT>(offset);
+ box.top = 0;
+ box.front = 0;
+ box.right = static_cast<UINT>(offset + size);
+ box.bottom = 1;
+ box.back = 1;
+ commandContext->UpdateSubresource1(gpuCopyableStorage->GetD3D11Buffer(),
+ /*DstSubresource=*/0,
+ /*pDstBox=*/&box, data,
+ /*SrcRowPitch=*/0,
+ /*SrcDepthPitch=*/0,
+ /*CopyFlags=*/0);
+
+ // No need to update constant buffer at this point, when command buffer wants to bind
+ // the constant buffer in a render/compute pass, it will call GetD3D11ConstantBuffer()
+ // and the constant buffer will be sync-ed there. WriteBuffer() cannot be called inside
+ // render/compute pass so no need to sync here.
+ return {};
}
-MaybeError GPUOnlyBuffer::CopyToInternal(const ScopedCommandRecordingContext* commandContext,
- uint64_t sourceOffset,
- size_t size,
- Buffer* destination,
- uint64_t destinationOffset) {
- ID3D11Buffer* d3d11SourceBuffer =
- mD3d11NonConstantBuffer ? mD3d11NonConstantBuffer.Get() : mD3d11ConstantBuffer.Get();
- DAWN_ASSERT(d3d11SourceBuffer);
+MaybeError GPUUsableBuffer::CopyToInternal(const ScopedCommandRecordingContext* commandContext,
+ uint64_t sourceOffset,
+ size_t size,
+ Buffer* destination,
+ uint64_t destinationOffset) {
+ ID3D11Buffer* d3d11SourceBuffer = mLastUpdatedStorage->GetD3D11Buffer();
return destination->CopyFromD3DInternal(commandContext, d3d11SourceBuffer, sourceOffset, size,
destinationOffset);
}
-MaybeError GPUOnlyBuffer::CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
- ID3D11Buffer* d3d11SourceBuffer,
- uint64_t sourceOffset,
- size_t size,
- uint64_t destinationOffset) {
+MaybeError GPUUsableBuffer::CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
+ ID3D11Buffer* d3d11SourceBuffer,
+ uint64_t sourceOffset,
+ size_t size,
+ uint64_t destinationOffset) {
D3D11_BOX srcBox;
srcBox.left = static_cast<UINT>(sourceOffset);
srcBox.top = 0;
@@ -1116,53 +1440,61 @@
srcBox.bottom = 1;
srcBox.back = 1;
- if (mD3d11NonConstantBuffer) {
- commandContext->CopySubresourceRegion(mD3d11NonConstantBuffer.Get(), /*DstSubresource=*/0,
- /*DstX=*/destinationOffset,
- /*DstY=*/0,
- /*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0,
- &srcBox);
- }
+ Storage* gpuCopyableStorage;
+ DAWN_TRY_ASSIGN(gpuCopyableStorage, GetOrCreateDstCopyableStorage());
+ DAWN_TRY(SyncStorage(commandContext, gpuCopyableStorage));
- // if mConstantBufferIsUpdated is false, the content of mD3d11ConstantBuffer will be
- // updated by EnsureConstantBufferIsUpdated() when the constant buffer is about to be used.
- if (!mConstantBufferIsUpdated) {
- return {};
- }
+ commandContext->CopySubresourceRegion(
+ gpuCopyableStorage->GetD3D11Buffer(), /*DstSubresource=*/0,
+ /*DstX=*/destinationOffset,
+ /*DstY=*/0,
+ /*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0, &srcBox);
- if (mD3d11ConstantBuffer) {
- commandContext->CopySubresourceRegion(mD3d11ConstantBuffer.Get(), /*DstSubresource=*/0,
- /*DstX=*/destinationOffset,
- /*DstY=*/0,
- /*DstZ=*/0, d3d11SourceBuffer, /*SrcSubresource=*/0,
- &srcBox);
- }
+ IncrStorageRevAndMakeLatest(commandContext, gpuCopyableStorage);
return {};
}
-MaybeError GPUOnlyBuffer::ClearPaddingInternal(
- const ScopedCommandRecordingContext* commandContext) {
- uint32_t paddingBytes = GetAllocatedSize() - GetSize();
- if (paddingBytes == 0) {
- return {};
- }
+MaybeError GPUUsableBuffer::PredicatedClear(
+ const ScopedSwapStateCommandRecordingContext* commandContext,
+ ID3D11Predicate* predicate,
+ uint8_t clearValue,
+ uint64_t offset,
+ uint64_t size) {
+ DAWN_ASSERT(size != 0);
- uint32_t clearSize = paddingBytes;
- uint64_t clearOffset = GetSize();
- // 'UpdateSubresource1' is more preferable for updating uniform buffers, as it incurs no
- // GPU stall.
- if (mD3d11ConstantBuffer && !mD3d11NonConstantBuffer) {
- clearSize = Align(paddingBytes, kConstantBufferUpdateAlignment);
- clearOffset = GetAllocatedSize() - clearSize;
+ // Don't use mapping, mapping is not affected by ID3D11Predicate.
+ // Allocate GPU writable storage and sync it. Note: we don't SetPredication() yet otherwise
+ // it would affect the syncing.
+ Storage* gpuWritableStorage;
+ DAWN_TRY_ASSIGN(gpuWritableStorage,
+ GetOrCreateStorage(StorageType::GPUWritableNonConstantBuffer));
+ DAWN_TRY(SyncStorage(commandContext, gpuWritableStorage));
- std::vector<uint8_t> clearData(clearSize, 0);
- DAWN_TRY(UpdateD3D11ConstantBuffer(commandContext, mD3d11ConstantBuffer.Get(),
- /*firstTimeUpdate=*/true, clearOffset, clearData.data(),
- clearSize));
- } else {
- DAWN_TRY(ClearInternal(commandContext, 0, clearOffset, paddingBytes));
- }
+ // SetPredication() and clear the storage with UpdateSubresource1().
+ D3D11_BOX box;
+ box.left = static_cast<UINT>(offset);
+ box.top = 0;
+ box.front = 0;
+ box.right = static_cast<UINT>(offset + size);
+ box.bottom = 1;
+ box.back = 1;
+
+ // TODO(350493305): Change function signature to accept a single uint64_t value.
+ // So that we don't need to allocate a vector here.
+ absl::InlinedVector<uint8_t, sizeof(uint64_t)> clearData(size, clearValue);
+
+ // The update will *NOT* be performed if the predicate's data is false.
+ commandContext->GetD3D11DeviceContext4()->SetPredication(predicate, false);
+ commandContext->UpdateSubresource1(gpuWritableStorage->GetD3D11Buffer(),
+ /*DstSubresource=*/0,
+ /*pDstBox=*/&box, clearData.data(),
+ /*SrcRowPitch=*/0,
+ /*SrcDepthPitch=*/0,
+ /*CopyFlags=*/0);
+ commandContext->GetD3D11DeviceContext4()->SetPredication(nullptr, false);
+
+ IncrStorageRevAndMakeLatest(commandContext, gpuWritableStorage);
return {};
}
diff --git a/src/dawn/native/d3d11/BufferD3D11.h b/src/dawn/native/d3d11/BufferD3D11.h
index 268ebbd9..e37504a 100644
--- a/src/dawn/native/d3d11/BufferD3D11.h
+++ b/src/dawn/native/d3d11/BufferD3D11.h
@@ -32,6 +32,7 @@
#include <memory>
#include <utility>
+#include "dawn/common/ityp_array.h"
#include "dawn/native/Buffer.h"
#include "dawn/native/d3d/d3d_platform.h"
#include "dawn/native/d3d11/Forward.h"
@@ -181,29 +182,70 @@
ExecutionSerial mMapReadySerial = kMaxExecutionSerial;
};
-// Buffer that can be used by GPU.
-class GPUUsableBuffer : public Buffer {
+// Buffer that can be used by GPU. It manages several copies of the buffer, each with its own
+// ID3D11Buffer storage for specific usage. For example, a buffer that has MapWrite + Storage usage
+// will have at least two copies:
+// - One copy with D3D11_USAGE_DYNAMIC for mapping on CPU.
+// - One copy with D3D11_USAGE_DEFAULT for writing on GPU.
+// Internally this class will synchronize the content between the copies so that when it is mapped
+// or used by GPU, the appropriate copy will have the up-to-date content. The synchronizations are
+// done in a way that minimizes CPU stall as much as possible.
+// TODO(349848481): Consider making this the only Buffer class since it could cover all use cases.
+class GPUUsableBuffer final : public Buffer {
public:
- virtual ResultOrError<ID3D11Buffer*> GetD3D11ConstantBuffer(
- const ScopedCommandRecordingContext* commandContext) = 0;
- virtual ResultOrError<ID3D11Buffer*> GetD3D11NonConstantBuffer(
- const ScopedCommandRecordingContext* commandContext) = 0;
+ GPUUsableBuffer(DeviceBase* device, const UnpackedPtr<BufferDescriptor>& descriptor);
+ ~GPUUsableBuffer() override;
+
+ ResultOrError<ID3D11Buffer*> GetD3D11ConstantBuffer(
+ const ScopedCommandRecordingContext* commandContext);
+ ResultOrError<ID3D11Buffer*> GetD3D11NonConstantBuffer(
+ const ScopedCommandRecordingContext* commandContext);
+
ID3D11Buffer* GetD3D11ConstantBufferForTesting();
ID3D11Buffer* GetD3D11NonConstantBufferForTesting();
- virtual ResultOrError<ComPtr<ID3D11ShaderResourceView>> UseAsSRV(
- const ScopedCommandRecordingContext* commandContext,
- uint64_t offset,
- uint64_t size) = 0;
+ ResultOrError<ComPtr<ID3D11ShaderResourceView>>
+ UseAsSRV(const ScopedCommandRecordingContext* commandContext, uint64_t offset, uint64_t size);
+ ResultOrError<ComPtr<ID3D11UnorderedAccessView1>>
+ UseAsUAV(const ScopedCommandRecordingContext* commandContext, uint64_t offset, uint64_t size);
- // Use this buffer as UAV and mark it as being mutated by shader.
- virtual ResultOrError<ComPtr<ID3D11UnorderedAccessView1>> UseAsUAV(
- const ScopedCommandRecordingContext* commandContext,
- uint64_t offset,
- uint64_t size) = 0;
+ MaybeError PredicatedClear(const ScopedSwapStateCommandRecordingContext* commandContext,
+ ID3D11Predicate* predicate,
+ uint8_t clearValue,
+ uint64_t offset,
+ uint64_t size) override;
- protected:
- using Buffer::Buffer;
+ // Make sure CPU accessible storages are up-to-date. This is usually called at the end of a
+ // command buffer after the buffer was modified on GPU.
+ MaybeError SyncGPUWritesToStaging(const ScopedCommandRecordingContext* commandContext);
+
+ private:
+ class Storage;
+
+ // Dawn API
+ void DestroyImpl() override;
+ void SetLabelImpl() override;
+
+ MaybeError InitializeInternal() override;
+ MaybeError MapInternal(const ScopedCommandRecordingContext* commandContext,
+ wgpu::MapMode mode) override;
+ void UnmapInternal(const ScopedCommandRecordingContext* commandContext) override;
+
+ MaybeError CopyToInternal(const ScopedCommandRecordingContext* commandContext,
+ uint64_t sourceOffset,
+ size_t size,
+ Buffer* destination,
+ uint64_t destinationOffset) override;
+ MaybeError CopyFromD3DInternal(const ScopedCommandRecordingContext* commandContext,
+ ID3D11Buffer* srcD3D11Buffer,
+ uint64_t sourceOffset,
+ size_t size,
+ uint64_t destinationOffset) override;
+
+ MaybeError WriteInternal(const ScopedCommandRecordingContext* commandContext,
+ uint64_t bufferOffset,
+ const void* data,
+ size_t size) override;
ResultOrError<ComPtr<ID3D11ShaderResourceView>> CreateD3D11ShaderResourceViewFromD3DBuffer(
ID3D11Buffer* d3d11Buffer,
@@ -220,6 +262,68 @@
uint64_t bufferOffset,
const void* data,
size_t size);
+
+ // Storage types for different usages.
+ // - Since D3D11 doesn't allow both CPU and GPU to write to a buffer, we need separate storages
+ // for CPU writing and GPU writing usages.
+ // - Since D3D11 constant buffer cannot be bound for other purposes (e.g. vertex, storage, etc),
+ // we also need a separate storage for constant buffer and one storage for non-constant buffer
+ // purpose. Note: constant buffer's only supported GPU writing operation is CopyDst.
+ // - Lastly, we need a separate storage for MapRead because only D3D11 staging buffer can be
+ // read by CPU.
+ //
+ // One example of a buffer being created with MapWrite | Uniform | Storage and being used:
+ // - Map + CPU write: `CPUWritableConstantBuffer` gets updated.
+ // - write on GPU:
+ // - buffer->UsedAsUAV: `CPUWritableConstantBuffer` is copied to
+ // `GPUWritableNonConstantBuffer`
+ // - GPU modifies `GPUWritableNonConstantBuffer`.
+ // - commandContext->AddBufferForSyncingWithCPU.
+ // - Queue::Submit
+ // - commandContext->FlushBuffersForSyncingWithCPU
+ // - buffer->SyncGPUWritesToStaging: `GPUWritableNonConstantBuffer` is copied to
+ // `Staging`.
+ // - Map again:
+ // - `Staging` is copied to `CPUWritableConstantBuffer` with DISCARD flag
+ enum class StorageType : uint8_t {
+ // Storage for write mapping with constant buffer usage,
+ CPUWritableConstantBuffer,
+ // Storage for CopyB2B with destination having constant buffer usage,
+ GPUCopyDstConstantBuffer,
+ // Storage for write mapping with other usages (non-constant buffer),
+ CPUWritableNonConstantBuffer,
+ // Storage for GPU writing with other usages (non-constant buffer),
+ GPUWritableNonConstantBuffer,
+ // Storage for staging usage,
+ Staging,
+
+ Count,
+ };
+
+ ResultOrError<Storage*> GetOrCreateStorage(StorageType storageType);
+ // Get or create storage supporting CopyDst usage.
+ ResultOrError<Storage*> GetOrCreateDstCopyableStorage();
+
+ void SetStorageLabel(StorageType storageType);
+
+ // Update dstStorage to latest revision
+ MaybeError SyncStorage(const ScopedCommandRecordingContext* commandContext,
+ Storage* dstStorage);
+ // Increment the dstStorage's revision and make it the latest updated storage.
+ void IncrStorageRevAndMakeLatest(const ScopedCommandRecordingContext* commandContext,
+ Storage* dstStorage);
+
+ using StorageMap =
+ ityp::array<StorageType, Ref<Storage>, static_cast<uint8_t>(StorageType::Count)>;
+
+ StorageMap mStorages;
+
+ // The storage contains most up-to-date content.
+ raw_ptr<Storage> mLastUpdatedStorage;
+ // This points to either CPU writable constant buffer or CPU writable non-constant buffer. We
+ // don't need both to exist.
+ raw_ptr<Storage> mCPUWritableStorage;
+ raw_ptr<Storage> mMappedStorage;
};
static inline GPUUsableBuffer* ToGPUUsableBuffer(BufferBase* buffer) {
diff --git a/src/dawn/native/d3d11/CommandBufferD3D11.cpp b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
index 571289d..6d419af 100644
--- a/src/dawn/native/d3d11/CommandBufferD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
@@ -227,6 +227,7 @@
for (BufferBase* buffer : scope.buffers) {
DAWN_TRY(ToBackend(buffer)->EnsureDataInitialized(commandContext));
+ buffer->MarkUsedInPendingCommands();
}
return {};
@@ -240,6 +241,10 @@
switch (type) {
case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>();
+ for (BufferBase* buffer :
+ GetResourceUsages().computePasses[nextComputePassNumber].referencedBuffers) {
+ buffer->MarkUsedInPendingCommands();
+ }
for (TextureBase* texture :
GetResourceUsages().computePasses[nextComputePassNumber].referencedTextures) {
DAWN_TRY(ToBackend(texture)->SynchronizeTextureBeforeUse(commandContext));
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
index 130e7ae..6cff29a 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
@@ -146,6 +146,18 @@
Get()->mNeedsFence = true;
}
+void ScopedCommandRecordingContext::AddBufferForSyncingWithCPU(GPUUsableBuffer* buffer) const {
+ Get()->mBuffersToSyncWithCPU.push_back(buffer);
+}
+
+MaybeError ScopedCommandRecordingContext::FlushBuffersForSyncingWithCPU() const {
+ for (auto buffer : Get()->mBuffersToSyncWithCPU) {
+ DAWN_TRY(buffer->SyncGPUWritesToStaging(this));
+ }
+ Get()->mBuffersToSyncWithCPU.clear();
+ return {};
+}
+
ScopedSwapStateCommandRecordingContext::ScopedSwapStateCommandRecordingContext(
CommandRecordingContextGuard&& guard)
: ScopedCommandRecordingContext(std::move(guard)),
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
index 6b4f2c9..c0e432f 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
@@ -29,6 +29,7 @@
#define SRC_DAWN_NATIVE_D3D11_COMMANDRECORDINGCONTEXT_D3D11_H_
#include "absl/container/flat_hash_set.h"
+#include "absl/container/inlined_vector.h"
#include "dawn/common/MutexProtected.h"
#include "dawn/common/NonCopyable.h"
#include "dawn/common/Ref.h"
@@ -115,6 +116,10 @@
bool mNeedsFence = false;
+ // List of buffers to sync their CPU accessible storages.
+ // Use inlined vector to avoid heap allocation when the vector is empty.
+ absl::InlinedVector<GPUUsableBuffer*, 1> mBuffersToSyncWithCPU;
+
Ref<Device> mDevice;
};
@@ -165,6 +170,11 @@
MaybeError AcquireKeyedMutex(Ref<d3d::KeyedMutex> keyedMutex) const;
void SetNeedsFence() const;
+
+ // Add a buffer to a pending list for syncing CPU storages. The list is typically processed at
+ // the end of a command buffer when it is about to be submitted.
+ void AddBufferForSyncingWithCPU(GPUUsableBuffer* buffer) const;
+ MaybeError FlushBuffersForSyncingWithCPU() const;
};
// For using ID3D11DeviceContext directly. It swaps and resets ID3DDeviceContextState of
diff --git a/src/dawn/native/d3d11/DeviceInfoD3D11.cpp b/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
index 0138bc9..9d04721 100644
--- a/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
+++ b/src/dawn/native/d3d11/DeviceInfoD3D11.cpp
@@ -38,6 +38,15 @@
const ComPtr<ID3D11Device>& device) {
DeviceInfo info = {};
+ D3D11_FEATURE_DATA_D3D11_OPTIONS options;
+ DAWN_TRY(CheckHRESULT(
+ device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)),
+ "D3D11_FEATURE_D3D11_OPTIONS"));
+ info.supportsMapNoOverwriteDynamicBuffers =
+ options.MapNoOverwriteOnDynamicBufferSRV && options.MapNoOverwriteOnDynamicConstantBuffer;
+
+ info.supportsPartialConstantBufferUpdate = options.ConstantBufferPartialUpdate;
+
D3D11_FEATURE_DATA_D3D11_OPTIONS2 options2;
DAWN_TRY(CheckHRESULT(
device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &options2, sizeof(options2)),
diff --git a/src/dawn/native/d3d11/DeviceInfoD3D11.h b/src/dawn/native/d3d11/DeviceInfoD3D11.h
index ec6388e..2ce2e5d 100644
--- a/src/dawn/native/d3d11/DeviceInfoD3D11.h
+++ b/src/dawn/native/d3d11/DeviceInfoD3D11.h
@@ -49,6 +49,8 @@
size_t sharedSystemMemory;
bool supportsMonitoredFence;
bool supportsNonMonitoredFence;
+ bool supportsMapNoOverwriteDynamicBuffers;
+ bool supportsPartialConstantBufferUpdate;
};
ResultOrError<DeviceInfo> GatherDeviceInfo(const ComPtr<IDXGIAdapter4>& adapter,
diff --git a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
index ffc1f5d..130c7b0 100644
--- a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
@@ -151,6 +151,10 @@
EnableFeature(Feature::R8UnormStorage);
EnableFeature(Feature::ShaderModuleCompilationOptions);
EnableFeature(Feature::DawnLoadResolveTexture);
+ if (mDeviceInfo.isUMA && mDeviceInfo.supportsMapNoOverwriteDynamicBuffers) {
+ // With UMA we should allow mapping usages on more type of buffers.
+ EnableFeature(Feature::BufferMapExtendedUsages);
+ }
// Multi planar formats are always supported since Feature Level 11.0
// https://learn.microsoft.com/en-us/windows/win32/direct3ddxgi/format-support-for-direct3d-11-0-feature-level-hardware
diff --git a/src/dawn/native/d3d11/QueueD3D11.cpp b/src/dawn/native/d3d11/QueueD3D11.cpp
index 0b83880..0ac8457 100644
--- a/src/dawn/native/d3d11/QueueD3D11.cpp
+++ b/src/dawn/native/d3d11/QueueD3D11.cpp
@@ -294,6 +294,8 @@
MaybeError MonitoredQueue::NextSerial() {
auto commandContext = GetScopedPendingCommandContext(SubmitMode::Passive);
+ DAWN_TRY(commandContext.FlushBuffersForSyncingWithCPU());
+
IncrementLastSubmittedCommandSerial();
TRACE_EVENT1(GetDevice()->GetPlatform(), General, "D3D11Device::SignalFence", "serial",
uint64_t(GetLastSubmittedCommandSerial()));
@@ -339,6 +341,8 @@
MaybeError UnmonitoredQueue::NextSerial() {
auto commandContext = GetScopedPendingCommandContext(SubmitMode::Passive);
+ DAWN_TRY(commandContext.FlushBuffersForSyncingWithCPU());
+
IncrementLastSubmittedCommandSerial();
ExecutionSerial lastSubmittedSerial = GetLastSubmittedCommandSerial();
if (commandContext->AcquireNeedsFence()) {
diff --git a/src/dawn/tests/end2end/BufferTests.cpp b/src/dawn/tests/end2end/BufferTests.cpp
index 4746259..738a9cb 100644
--- a/src/dawn/tests/end2end/BufferTests.cpp
+++ b/src/dawn/tests/end2end/BufferTests.cpp
@@ -31,11 +31,13 @@
#include <limits>
#include <sstream>
#include <string>
+#include <utility>
#include <vector>
#include "dawn/tests/DawnTest.h"
#include "dawn/tests/MockCallback.h"
#include "dawn/utils/ComboRenderPipelineDescriptor.h"
+#include "dawn/utils/TestUtils.h"
#include "dawn/utils/WGPUHelpers.h"
#include "partition_alloc/pointers/raw_ptr.h"
@@ -48,6 +50,7 @@
using MockMapAsyncCallback = MockCppCallback<void (*)(wgpu::MapAsyncStatus, const char*)>;
using FutureCallbackMode = std::optional<wgpu::CallbackMode>;
+
DAWN_TEST_PARAM_STRUCT(BufferMappingTestParams, FutureCallbackMode);
class BufferMappingTests : public DawnTestWithParams<BufferMappingTestParams> {
@@ -1384,13 +1387,13 @@
OpenGLESBackend({"disable_resource_suballocation"}),
VulkanBackend({"disable_resource_suballocation"}));
-class BufferMapExtendedUsagesTests : public BufferMappingTests {
+class BufferMapExtendedUsagesTests : public DawnTest {
protected:
void SetUp() override {
- BufferMappingTests::SetUp();
+ DawnTest::SetUp();
DAWN_TEST_UNSUPPORTED_IF(UsesWire());
- // Skip all tests if the BufferMapExtendedUsages feature is not supported.
+ // Skip all tests if the required feature is not supported.
DAWN_TEST_UNSUPPORTED_IF(!SupportsFeatures({wgpu::FeatureName::BufferMapExtendedUsages}));
}
@@ -1402,7 +1405,47 @@
return requiredFeatures;
}
- wgpu::RenderPipeline CreateRenderPipelineForTest(bool colorFromUniformBuffer) {
+ void MapAsyncAndWait(const wgpu::Buffer& buffer,
+ wgpu::MapMode mode,
+ size_t offset,
+ size_t size) {
+ wgpu::Future future = buffer.MapAsync(mode, offset, size, wgpu::CallbackMode::WaitAnyOnly,
+ [](wgpu::MapAsyncStatus status, const char*) {
+ ASSERT_EQ(wgpu::MapAsyncStatus::Success, status);
+ });
+ wgpu::FutureWaitInfo waitInfo = {future};
+ GetInstance().WaitAny(1, &waitInfo, UINT64_MAX);
+ ASSERT_TRUE(waitInfo.completed);
+ }
+
+ wgpu::Buffer CreateBufferFromData(const void* data, uint64_t size, wgpu::BufferUsage usage) {
+ if (!(usage & wgpu::BufferUsage::MapWrite)) {
+ return utils::CreateBufferFromData(device, data, size, usage);
+ }
+
+ wgpu::BufferDescriptor descriptor;
+ descriptor.size = size;
+ descriptor.usage = usage;
+
+ wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+ MapAsyncAndWait(buffer, wgpu::MapMode::Write, 0, size);
+ memcpy(buffer.GetMappedRange(), data, size);
+ buffer.Unmap();
+
+ return buffer;
+ }
+
+ enum class ColorSrc {
+ UniformBuffer,
+ VertexBuffer,
+ StorageBuffer,
+ };
+
+ wgpu::RenderPipeline CreateRenderPipelineForTest(
+ ColorSrc colorSource,
+ wgpu::VertexStepMode vertexBufferStepMode = wgpu::VertexStepMode::Vertex,
+ wgpu::VertexFormat vertexBufferFormat = wgpu::VertexFormat::Unorm8x4) {
utils::ComboRenderPipelineDescriptor pipelineDescriptor;
std::ostringstream vs;
@@ -1418,38 +1461,57 @@
vec2f(-1.0, 3.0));
)";
- if (colorFromUniformBuffer) {
- // Color is from uniform buffer.
- vs << R"(
- struct Uniforms {
- color : vec4f,
- }
- @binding(0) @group(0) var<uniform> uniforms : Uniforms;
+ switch (colorSource) {
+ case ColorSrc::UniformBuffer:
+ // Color is from uniform buffer.
+ vs << R"(
+ struct Uniforms {
+ color : vec4f,
+ }
+ @binding(0) @group(0) var<uniform> uniforms : Uniforms;
- @vertex
- fn main(@builtin(vertex_index) vertexIndex : u32) -> VertexOut {
- var output : VertexOut;
- output.position = vec4f(vertexPos[vertexIndex % 3], 0.0, 1.0);
- output.color = uniforms.color;
- return output;
- })";
- } else {
- // Color is from vertex buffer.
- vs << R"(
- @vertex
- fn main(@location(0) vertexColor : vec4f,
- @builtin(vertex_index) vertexIndex : u32) -> VertexOut {
- var output : VertexOut;
- output.position = vec4f(vertexPos[vertexIndex % 3], 0.0, 1.0);
- output.color = vertexColor;
- return output;
- })";
+ @vertex
+ fn main(@builtin(vertex_index) vertexIndex : u32) -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4f(vertexPos[vertexIndex % 3], 0.0, 1.0);
+ output.color = uniforms.color;
+ return output;
+ })";
+ break;
+ case ColorSrc::VertexBuffer:
+ // Color is from vertex buffer.
+ vs << R"(
+ @vertex
+ fn main(@location(0) vertexColor : vec4f,
+ @builtin(vertex_index) vertexIndex : u32) -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4f(vertexPos[vertexIndex % 3], 0.0, 1.0);
+ output.color = vertexColor;
+ return output;
+ })";
- pipelineDescriptor.vertex.bufferCount = 1;
- pipelineDescriptor.cBuffers[0].arrayStride = 4;
- pipelineDescriptor.cBuffers[0].attributeCount = 1;
- pipelineDescriptor.cBuffers[0].stepMode = wgpu::VertexStepMode::Vertex;
- pipelineDescriptor.cAttributes[0].format = wgpu::VertexFormat::Unorm8x4;
+ pipelineDescriptor.vertex.bufferCount = 1;
+ pipelineDescriptor.cBuffers[0].arrayStride =
+ utils::VertexFormatSize(vertexBufferFormat);
+ pipelineDescriptor.cBuffers[0].attributeCount = 1;
+ pipelineDescriptor.cBuffers[0].stepMode = vertexBufferStepMode;
+ pipelineDescriptor.cAttributes[0].format = vertexBufferFormat;
+ break;
+ case ColorSrc::StorageBuffer:
+ vs << R"(
+ struct Uniforms {
+ color : vec4f,
+ }
+ @binding(0) @group(0) var<storage, read> ssbo : Uniforms;
+
+ @vertex
+ fn main(@builtin(vertex_index) vertexIndex : u32) -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4f(vertexPos[vertexIndex % 3], 0.0, 1.0);
+ output.color = ssbo.color;
+ return output;
+ })";
+ break;
}
constexpr char fs[] = R"(
@fragment
@@ -1471,8 +1533,10 @@
wgpu::RenderPipeline pipeline,
wgpu::Buffer vertexBuffer,
wgpu::Buffer indexBuffer,
- wgpu::BindGroup uniformsBindGroup) {
- wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+ wgpu::BindGroup uniformsBindGroup,
+ wgpu::CommandEncoder existingEncoder = nullptr) {
+ wgpu::CommandEncoder commandEncoder =
+ existingEncoder ? std::move(existingEncoder) : device.CreateCommandEncoder();
wgpu::RenderPassEncoder renderPassEncoder = commandEncoder.BeginRenderPass(&renderPass);
renderPassEncoder.SetPipeline(pipeline);
if (uniformsBindGroup) {
@@ -1494,7 +1558,9 @@
queue.Submit(1, &commands);
}
- static constexpr wgpu::BufferUsage kNonMapUsages[] = {
+ void MixMapWriteAndGPUWriteBufferThenDraw(ColorSrc colorSrc);
+
+ static constexpr wgpu::BufferUsage kMapExtendedUsages[] = {
wgpu::BufferUsage::CopySrc, wgpu::BufferUsage::CopyDst, wgpu::BufferUsage::Index,
wgpu::BufferUsage::Vertex, wgpu::BufferUsage::Uniform, wgpu::BufferUsage::Storage,
wgpu::BufferUsage::Indirect, wgpu::BufferUsage::QueryResolve,
@@ -1506,7 +1572,7 @@
wgpu::BufferDescriptor descriptor;
descriptor.size = 4;
- for (const auto otherUsage : kNonMapUsages) {
+ for (const auto otherUsage : kMapExtendedUsages) {
descriptor.usage = wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopyDst | otherUsage;
wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
@@ -1526,7 +1592,7 @@
wgpu::BufferDescriptor descriptor;
descriptor.size = 4;
- for (const auto otherUsage : kNonMapUsages) {
+ for (const auto otherUsage : kMapExtendedUsages) {
descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc | otherUsage;
wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
@@ -1548,11 +1614,10 @@
utils::RGBA8::kGreen};
// Create buffer with initial red color data.
- wgpu::Buffer vertexBuffer = utils::CreateBufferFromData(
- device, kReds, sizeof(kReds), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Vertex);
+ wgpu::Buffer vertexBuffer = CreateBufferFromData(
+ kReds, sizeof(kReds), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Vertex);
- wgpu::RenderPipeline renderPipeline =
- CreateRenderPipelineForTest(/*colorFromUniformBuffer=*/false);
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::VertexBuffer);
auto redRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
auto greenRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
@@ -1584,14 +1649,12 @@
const uint16_t kRedIndices[] = {0, 1, 2, 0};
const uint16_t kGreenIndices[] = {3, 4, 5, 3};
- wgpu::Buffer vertexBuffer = utils::CreateBufferFromData(
- device, kVertexColors, sizeof(kVertexColors), wgpu::BufferUsage::Vertex);
- wgpu::Buffer indexBuffer =
- utils::CreateBufferFromData(device, kRedIndices, sizeof(kRedIndices),
- wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Index);
+ wgpu::Buffer vertexBuffer =
+ CreateBufferFromData(kVertexColors, sizeof(kVertexColors), wgpu::BufferUsage::Vertex);
+ wgpu::Buffer indexBuffer = CreateBufferFromData(
+ kRedIndices, sizeof(kRedIndices), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Index);
- wgpu::RenderPipeline renderPipeline =
- CreateRenderPipelineForTest(/*colorFromUniformBuffer=*/false);
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::VertexBuffer);
auto redRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
auto greenRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
@@ -1613,17 +1676,97 @@
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kGreen, greenRenderPass.color, 0, 0);
}
+// Test that mapping an occlusion QueryResolve buffer then draw with the buffer then mapping again
+// works.
+TEST_P(BufferMapExtendedUsagesTests, MapWriteQueryBufferThenDrawThenMapWrite) {
+ constexpr uint64_t kExpectedVal2 = 1;
+ constexpr uint64_t kExpectedVal3 = 2;
+ constexpr size_t kQueryResolveBufferSize = 3 * sizeof(uint64_t);
+ const utils::RGBA8 kReds[] = {utils::RGBA8::kRed, utils::RGBA8::kRed, utils::RGBA8::kRed};
+
+ // Create buffer with initial red color data.
+ wgpu::Buffer vertexBuffer =
+ CreateBufferFromData(kReds, sizeof(kReds), wgpu::BufferUsage::Vertex);
+
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::VertexBuffer);
+
+ // Create Occlusion Query Set
+ wgpu::QuerySet querySet;
+ {
+ wgpu::QuerySetDescriptor descriptor;
+ descriptor.count = 1;
+ descriptor.type = wgpu::QueryType::Occlusion;
+ querySet = device.CreateQuerySet(&descriptor);
+ }
+
+ // Create QueryResolve buffer with 2nd expected value written to 2nd uint64_t element.
+ wgpu::Buffer queryBuffer;
+ {
+ constexpr uint64_t kInitialData[] = {0, kExpectedVal2, 0};
+ queryBuffer =
+ CreateBufferFromData(kInitialData, sizeof(kInitialData),
+ wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::MapWrite |
+ wgpu::BufferUsage::CopySrc);
+ }
+
+ // Draw with occlusion query resolved to 1st uint64_t element.
+ {
+ auto renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+ renderPass.renderPassInfo.occlusionQuerySet = querySet;
+ wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+ wgpu::RenderPassEncoder renderPassEncoder =
+ commandEncoder.BeginRenderPass(&renderPass.renderPassInfo);
+ renderPassEncoder.SetPipeline(renderPipeline);
+ renderPassEncoder.SetVertexBuffer(0, vertexBuffer);
+ renderPassEncoder.BeginOcclusionQuery(0);
+ renderPassEncoder.Draw(3);
+ renderPassEncoder.EndOcclusionQuery();
+ renderPassEncoder.End();
+
+ commandEncoder.ResolveQuerySet(querySet, 0, 1, queryBuffer, 0);
+
+ wgpu::CommandBuffer commands = commandEncoder.Finish();
+ queue.Submit(1, &commands);
+ }
+
+ // Map write to 3rd uint64_t element
+ {
+ constexpr size_t k3rdElemOffset = 2 * sizeof(uint64_t);
+ MapAsyncAndWait(queryBuffer, wgpu::MapMode::Write, 0, kQueryResolveBufferSize);
+ ASSERT_NE(nullptr, queryBuffer.GetMappedRange());
+ memcpy(queryBuffer.GetMappedRange(k3rdElemOffset), &kExpectedVal3, sizeof(kExpectedVal3));
+ queryBuffer.Unmap();
+ }
+
+ class NonZeroExpectation : public detail::Expectation {
+ public:
+ testing::AssertionResult Check(const void* data, size_t size) override {
+ DAWN_ASSERT(size % sizeof(uint64_t) == 0);
+ const uint64_t* actual = static_cast<const uint64_t*>(data);
+
+ if (actual[0] == 0) {
+ return testing::AssertionFailure() << "Expected data[0] to be non-zero.\n";
+ }
+
+ return testing::AssertionSuccess();
+ }
+ };
+
+ EXPECT_BUFFER(queryBuffer, 0, sizeof(uint64_t), new NonZeroExpectation());
+ EXPECT_BUFFER_U64_EQ(kExpectedVal2, queryBuffer, sizeof(uint64_t));
+ EXPECT_BUFFER_U64_EQ(kExpectedVal3, queryBuffer, 2 * sizeof(uint64_t));
+}
+
// Test that mapping a uniform buffer, modifying the data then draw with the buffer works.
TEST_P(BufferMapExtendedUsagesTests, MapWriteUniformBufferAndDraw) {
const float kRed[] = {1.0f, 0.0f, 0.0f, 1.0f};
const float kGreen[] = {0.0f, 1.0f, 0.0f, 1.0f};
// Create buffer with initial red color data.
- wgpu::Buffer uniformBuffer = utils::CreateBufferFromData(
- device, &kRed, sizeof(kRed), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Uniform);
+ wgpu::Buffer uniformBuffer = CreateBufferFromData(
+ &kRed, sizeof(kRed), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Uniform);
- wgpu::RenderPipeline renderPipeline =
- CreateRenderPipelineForTest(/*colorFromUniformBuffer=*/true);
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::UniformBuffer);
wgpu::BindGroup uniformsBindGroup = utils::MakeBindGroup(
device, renderPipeline.GetBindGroupLayout(0), {{0, uniformBuffer, 0, sizeof(kRed)}});
@@ -1647,6 +1790,39 @@
EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kGreen, greenRenderPass.color, 0, 0);
}
+// Test that mapping a storage buffer, modifying the data then draw with the buffer works.
+TEST_P(BufferMapExtendedUsagesTests, MapWriteStorageBufferAndDraw) {
+ const float kRed[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ const float kGreen[] = {0.0f, 1.0f, 0.0f, 1.0f};
+
+ // Create buffer with initial red color data.
+ wgpu::Buffer storageBuffer = CreateBufferFromData(
+ &kRed, sizeof(kRed), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::Storage);
+
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::StorageBuffer);
+ wgpu::BindGroup uniformsBindGroup = utils::MakeBindGroup(
+ device, renderPipeline.GetBindGroupLayout(0), {{0, storageBuffer, 0, sizeof(kRed)}});
+
+ auto redRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
+ auto greenRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+ // First render pass: draw with red color uniform buffer.
+ EncodeAndSubmitRenderPassForTest(redRenderPass.renderPassInfo, renderPipeline, nullptr, nullptr,
+ uniformsBindGroup);
+
+ // Second render pass: draw with green color uniform buffer.
+ MapAsyncAndWait(storageBuffer, wgpu::MapMode::Write, 0, sizeof(kGreen));
+ ASSERT_NE(nullptr, storageBuffer.GetMappedRange());
+ memcpy(storageBuffer.GetMappedRange(), &kGreen, sizeof(kGreen));
+ storageBuffer.Unmap();
+
+ EncodeAndSubmitRenderPassForTest(greenRenderPass.renderPassInfo, renderPipeline, nullptr,
+ nullptr, uniformsBindGroup);
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kRed, redRenderPass.color, 0, 0);
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kGreen, greenRenderPass.color, 0, 0);
+}
+
// Test that map write a storage buffer, modifying it on GPU, then map read it on CPU works.
TEST_P(BufferMapExtendedUsagesTests, MapWriteThenGPUWriteStorageBufferThenMapRead) {
const uint32_t kInitialValue = 1;
@@ -1711,10 +1887,215 @@
ssbo.Unmap();
}
-DAWN_INSTANTIATE_TEST_P(BufferMapExtendedUsagesTests,
- {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
- OpenGLESBackend(), VulkanBackend()},
- {std::nullopt});
+// Test the follow scenario:
+// - map write a buffer
+// - modifying it on GPU.
+// - map write it again.
+// - draw using the buffer.
+void BufferMapExtendedUsagesTests::MixMapWriteAndGPUWriteBufferThenDraw(ColorSrc colorSrc) {
+ const float kRed[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ const float kFinalColor[] = {1.0f, 1.0f, 1.0f, 1.0f};
+ constexpr size_t kSize = sizeof(kFinalColor);
+
+ // Create buffer with initial red color data.
+ wgpu::Buffer ssbo;
+ {
+ wgpu::BufferUsage usage =
+ wgpu::BufferUsage::Storage | wgpu::BufferUsage::MapRead | wgpu::BufferUsage::MapWrite;
+
+ switch (colorSrc) {
+ case ColorSrc::UniformBuffer:
+ usage |= wgpu::BufferUsage::Uniform;
+ break;
+ case ColorSrc::VertexBuffer:
+ usage |= wgpu::BufferUsage::Vertex;
+ break;
+ case ColorSrc::StorageBuffer:
+ // already include
+ break;
+ }
+ ssbo = CreateBufferFromData(&kRed, kSize, usage);
+ }
+
+ // Compute pipeline
+ wgpu::ComputePipeline pipeline;
+ {
+ wgpu::ComputePipelineDescriptor csDesc;
+ csDesc.compute.module = utils::CreateShaderModule(device, R"(
+ @group(0) @binding(0) var<storage, read_write> ssbo : vec4f;
+ @compute @workgroup_size(1) fn main() {
+ ssbo.g = 1.0;
+ })");
+
+ pipeline = device.CreateComputePipeline(&csDesc);
+ }
+
+ // Modify the buffer's green channel in compute shader.
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+
+ ASSERT_NE(nullptr, pipeline.Get());
+ wgpu::BindGroup ssboWritebindGroup =
+ utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+ {
+ {0, ssbo, 0, kSize},
+ });
+ pass.SetBindGroup(0, ssboWritebindGroup);
+ pass.SetPipeline(pipeline);
+ pass.DispatchWorkgroups(1);
+ pass.End();
+
+ wgpu::CommandBuffer commands = encoder.Finish();
+
+ queue.Submit(1, &commands);
+ }
+
+ // MapWrite and modify the buffer's blue channel
+ {
+ const float kOne = 1.0f;
+ constexpr size_t kBlueChannelOffset = 2 * sizeof(float);
+ MapAsyncAndWait(ssbo, wgpu::MapMode::Write, 0, kSize);
+ ASSERT_NE(nullptr, ssbo.GetMappedRange());
+ memcpy(ssbo.GetMappedRange(kBlueChannelOffset), &kOne, sizeof(kOne));
+ ssbo.Unmap();
+ }
+
+ // Draw using the color from the buffer.
+ {
+ // Render pipeline
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(
+ colorSrc, wgpu::VertexStepMode::Instance, wgpu::VertexFormat::Float32x4);
+
+ wgpu::Buffer vertexBuffer;
+ wgpu::BindGroup ssboReadBindGroup;
+ switch (colorSrc) {
+ case ColorSrc::VertexBuffer:
+ vertexBuffer = ssbo;
+ break;
+ case ColorSrc::UniformBuffer:
+ case ColorSrc::StorageBuffer:
+ ssboReadBindGroup = utils::MakeBindGroup(
+ device, renderPipeline.GetBindGroupLayout(0), {{0, ssbo, 0, kSize}});
+ break;
+ }
+
+ auto finalRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
+ EncodeAndSubmitRenderPassForTest(finalRenderPass.renderPassInfo, renderPipeline,
+ vertexBuffer, nullptr, ssboReadBindGroup);
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kWhite, finalRenderPass.color, 0, 0);
+ }
+
+ // Read the final value.
+ MapAsyncAndWait(ssbo, wgpu::MapMode::Read, 0, kSize);
+ CheckMapping(ssbo.GetConstMappedRange(0, kSize), &kFinalColor, kSize);
+ ssbo.Unmap();
+}
+
+TEST_P(BufferMapExtendedUsagesTests, MixMapWriteAndGPUWriteVertexBufferThenDraw) {
+ MixMapWriteAndGPUWriteBufferThenDraw(ColorSrc::VertexBuffer);
+}
+
+TEST_P(BufferMapExtendedUsagesTests, MixMapWriteAndGPUWriteUniformBufferThenDraw) {
+ MixMapWriteAndGPUWriteBufferThenDraw(ColorSrc::UniformBuffer);
+}
+
+TEST_P(BufferMapExtendedUsagesTests, MixMapWriteAndGPUWriteStorageBufferThenDraw) {
+ MixMapWriteAndGPUWriteBufferThenDraw(ColorSrc::StorageBuffer);
+}
+
+// Test the follow scenario:
+// - map write a storage buffer
+// - modifying it on GPU.
+// - copy another buffer to the storage buffer.
+// - draw using the storage buffer.
+TEST_P(BufferMapExtendedUsagesTests,
+ MapWriteThenGPUWriteStorageBufferThenCopyFromAnotherBufferThenDraw) {
+ const float kRed[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ const float kBlue[] = {0.0f, 0.0f, 1.0f, 1.0f};
+ const float kFinalColor[] = {1.0f, 1.0f, 1.0f, 1.0f};
+ constexpr size_t kSize = sizeof(kFinalColor);
+
+ // Create buffer with initial red color data.
+ wgpu::Buffer ssbo;
+ {
+ wgpu::BufferDescriptor descriptor;
+ descriptor.size = kSize;
+
+ descriptor.usage =
+ wgpu::BufferUsage::Storage | wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopyDst;
+ ssbo = device.CreateBuffer(&descriptor);
+
+ MapAsyncAndWait(ssbo, wgpu::MapMode::Write, 0, kSize);
+ ASSERT_NE(nullptr, ssbo.GetMappedRange());
+ memcpy(ssbo.GetMappedRange(), &kRed, kSize);
+ ssbo.Unmap();
+ }
+ // Create buffer with blue color data.
+ wgpu::Buffer blueBuffer =
+ utils::CreateBufferFromData(device, &kBlue, sizeof(kBlue), wgpu::BufferUsage::CopySrc);
+
+ // Compute pipeline
+ wgpu::ComputePipeline pipeline;
+ {
+ wgpu::ComputePipelineDescriptor csDesc;
+ csDesc.compute.module = utils::CreateShaderModule(device, R"(
+ @group(0) @binding(0) var<storage, read_write> ssbo : vec4f;
+ @compute @workgroup_size(1) fn main() {
+ ssbo.g = 1.0;
+ })");
+
+ pipeline = device.CreateComputePipeline(&csDesc);
+ }
+
+ // Modify the buffer's green channel in compute shader.
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ {
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+
+ ASSERT_NE(nullptr, pipeline.Get());
+ wgpu::BindGroup ssboWritebindGroup =
+ utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+ {
+ {0, ssbo, 0, kSize},
+ });
+ pass.SetBindGroup(0, ssboWritebindGroup);
+ pass.SetPipeline(pipeline);
+ pass.DispatchWorkgroups(1);
+ pass.End();
+ }
+
+ // Modify the buffer's blue channel with CopyB2B
+ {
+ constexpr size_t kBlueChannelOffset = 2 * sizeof(float);
+ encoder.CopyBufferToBuffer(blueBuffer, kBlueChannelOffset, ssbo, kBlueChannelOffset,
+ sizeof(float));
+ }
+
+ // Draw using the color from the buffer.
+ {
+ // Render pipeline
+ wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest(ColorSrc::StorageBuffer);
+ wgpu::BindGroup ssboReadBindGroup = utils::MakeBindGroup(
+ device, renderPipeline.GetBindGroupLayout(0), {{0, ssbo, 0, kSize}});
+
+ auto finalRenderPass = utils::CreateBasicRenderPass(device, 1, 1);
+ EncodeAndSubmitRenderPassForTest(finalRenderPass.renderPassInfo, renderPipeline, nullptr,
+ nullptr, ssboReadBindGroup, std::move(encoder));
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8::kWhite, finalRenderPass.color, 0, 0);
+ }
+}
+
+DAWN_INSTANTIATE_TEST(BufferMapExtendedUsagesTests,
+ D3D11Backend(),
+ D3D12Backend(),
+ MetalBackend(),
+ OpenGLBackend(),
+ OpenGLESBackend(),
+ VulkanBackend());
} // anonymous namespace
} // namespace dawn
diff --git a/src/dawn/tests/perf_tests/BufferUploadPerf.cpp b/src/dawn/tests/perf_tests/BufferUploadPerf.cpp
index 9eaaeee..bc8a5ab 100644
--- a/src/dawn/tests/perf_tests/BufferUploadPerf.cpp
+++ b/src/dawn/tests/perf_tests/BufferUploadPerf.cpp
@@ -38,6 +38,8 @@
enum class UploadMethod {
WriteBuffer,
MappedAtCreation,
+ MapWithExtendedUsages,
+ StagingBuffer,
};
// Perf delta exists between ranges [0, 1MB] vs [1MB, MAX_SIZE).
@@ -71,6 +73,12 @@
case UploadMethod::MappedAtCreation:
ostream << "_MappedAtCreation";
break;
+ case UploadMethod::MapWithExtendedUsages:
+ ostream << "_MapWithExtendedUsages";
+ break;
+ case UploadMethod::StagingBuffer:
+ ostream << "_StagingBuffer";
+ break;
}
switch (param.uploadSize) {
@@ -151,6 +159,9 @@
queue.Submit(1, &commands);
break;
}
+
+ default:
+ DAWN_UNREACHABLE();
}
}
@@ -158,6 +169,147 @@
RunTest();
}
+class BufferMapExtendedUsagesPerf : public DawnPerfTestWithParams<BufferUploadParams> {
+ public:
+ BufferMapExtendedUsagesPerf()
+ : DawnPerfTestWithParams(kNumIterations, 1),
+ data(static_cast<size_t>(GetParam().uploadSize)) {}
+ ~BufferMapExtendedUsagesPerf() override = default;
+
+ void SetUp() override;
+
+ private:
+ void Step() override;
+
+ std::vector<wgpu::FeatureName> GetRequiredFeatures() override;
+
+ void MapAsyncAndWait(const wgpu::Buffer& buffer,
+ wgpu::MapMode mode,
+ size_t offset,
+ size_t size);
+
+ wgpu::Buffer buffers[kNumIterations];
+ wgpu::Buffer stagingBuffers[kNumIterations];
+ std::vector<uint8_t> data;
+};
+
+std::vector<wgpu::FeatureName> BufferMapExtendedUsagesPerf::GetRequiredFeatures() {
+ std::vector<wgpu::FeatureName> requiredFeatures = DawnPerfTestWithParams::GetRequiredFeatures();
+ if (!UsesWire() && GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages &&
+ SupportsFeatures({wgpu::FeatureName::BufferMapExtendedUsages})) {
+ requiredFeatures.push_back(wgpu::FeatureName::BufferMapExtendedUsages);
+ }
+ return requiredFeatures;
+}
+
+void BufferMapExtendedUsagesPerf::SetUp() {
+ DawnPerfTestWithParams<BufferUploadParams>::SetUp();
+
+ // Skip all tests if the BufferMapExtendedUsages feature is not supported.
+ DAWN_TEST_UNSUPPORTED_IF(GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages &&
+ !device.HasFeature(wgpu::FeatureName::BufferMapExtendedUsages));
+
+ for (auto& buffer : buffers) {
+ wgpu::BufferDescriptor desc = {};
+ desc.size = data.size();
+
+ if (GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages) {
+ desc.usage = wgpu::BufferUsage::MapWrite;
+ } else {
+ desc.usage = wgpu::BufferUsage::CopyDst;
+ }
+
+ desc.usage |= wgpu::BufferUsage::Storage;
+
+ buffer = device.CreateBuffer(&desc);
+ }
+
+ if (GetParam().uploadMethod == UploadMethod::StagingBuffer) {
+ for (auto& buffer : stagingBuffers) {
+ wgpu::BufferDescriptor desc = {};
+ desc.size = data.size();
+
+ desc.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+
+ buffer = device.CreateBuffer(&desc);
+ }
+ }
+}
+
+void BufferMapExtendedUsagesPerf::Step() {
+ switch (GetParam().uploadMethod) {
+ case UploadMethod::WriteBuffer: {
+ for (unsigned int i = 0; i < kNumIterations; ++i) {
+ queue.WriteBuffer(buffers[i], 0, data.data(), data.size());
+ }
+ // Make sure all WriteBuffer's are flushed.
+ queue.Submit(0, nullptr);
+ break;
+ }
+
+ case UploadMethod::MappedAtCreation: {
+ wgpu::BufferDescriptor desc = {};
+ desc.size = data.size();
+ desc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::MapWrite;
+ desc.mappedAtCreation = true;
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+
+ for (unsigned int i = 0; i < kNumIterations; ++i) {
+ stagingBuffers[i] = device.CreateBuffer(&desc);
+ memcpy(stagingBuffers[i].GetMappedRange(0, data.size()), data.data(), data.size());
+ stagingBuffers[i].Unmap();
+ encoder.CopyBufferToBuffer(stagingBuffers[i], 0, buffers[i], 0, data.size());
+ }
+
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+ break;
+ }
+
+ case UploadMethod::StagingBuffer: {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+
+ for (unsigned int i = 0; i < kNumIterations; ++i) {
+ MapAsyncAndWait(stagingBuffers[i], wgpu::MapMode::Write, 0, data.size());
+ memcpy(stagingBuffers[i].GetMappedRange(0, data.size()), data.data(), data.size());
+ stagingBuffers[i].Unmap();
+ encoder.CopyBufferToBuffer(stagingBuffers[i], 0, buffers[i], 0, data.size());
+ }
+
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+ break;
+ }
+
+ case UploadMethod::MapWithExtendedUsages: {
+ for (unsigned int i = 0; i < kNumIterations; ++i) {
+ MapAsyncAndWait(buffers[i], wgpu::MapMode::Write, 0, data.size());
+ memcpy(buffers[i].GetMappedRange(0, data.size()), data.data(), data.size());
+ buffers[i].Unmap();
+ }
+ break;
+ }
+ }
+}
+
+void BufferMapExtendedUsagesPerf::MapAsyncAndWait(const wgpu::Buffer& buffer,
+ wgpu::MapMode mode,
+ size_t offset,
+ size_t size) {
+ wgpu::Future future = buffer.MapAsync(mode, offset, size, wgpu::CallbackMode::WaitAnyOnly,
+ [](wgpu::MapAsyncStatus status, const char*) {
+ ASSERT_EQ(wgpu::MapAsyncStatus::Success, status);
+ });
+ wgpu::FutureWaitInfo waitInfo = {future};
+ GetInstance().WaitAny(1, &waitInfo, UINT64_MAX);
+ ASSERT_TRUE(waitInfo.completed);
+}
+
+TEST_P(BufferMapExtendedUsagesPerf, Run) {
+ RunTest();
+}
+
DAWN_INSTANTIATE_TEST_P(BufferUploadPerf,
{D3D12Backend(), MetalBackend(), OpenGLBackend(), VulkanBackend()},
{UploadMethod::WriteBuffer, UploadMethod::MappedAtCreation},
@@ -165,5 +317,14 @@
UploadSize::BufferSize_1MB, UploadSize::BufferSize_4MB,
UploadSize::BufferSize_16MB});
+DAWN_INSTANTIATE_TEST_P(BufferMapExtendedUsagesPerf,
+ {D3D12Backend(), D3D11Backend(), MetalBackend(), OpenGLBackend(),
+ VulkanBackend()},
+ {UploadMethod::WriteBuffer, UploadMethod::MappedAtCreation,
+ UploadMethod::MapWithExtendedUsages, UploadMethod::StagingBuffer},
+ {UploadSize::BufferSize_1KB, UploadSize::BufferSize_64KB,
+ UploadSize::BufferSize_1MB, UploadSize::BufferSize_4MB,
+ UploadSize::BufferSize_16MB});
+
} // anonymous namespace
} // namespace dawn
diff --git a/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp b/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp
index 401da1e..2e1f25f 100644
--- a/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp
+++ b/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp
@@ -69,6 +69,8 @@
SingleStagingBuffer,
// Map and copy to a specific staging buffer first for each uniform buffer to then copy from.
MultipleStagingBuffer,
+ // Map uniform buffer directly
+ MapWithExtendedUsages,
};
enum class UploadSize {
@@ -109,6 +111,9 @@
case UploadMethod::MultipleStagingBuffer:
ostream << "_MultipleStagingBuffer";
break;
+ case UploadMethod::MapWithExtendedUsages:
+ ostream << "_MapWithExtendedUsages";
+ break;
}
switch (param.uploadSize) {
@@ -147,6 +152,7 @@
wgpu::Buffer buffer;
};
void Step() override;
+ std::vector<wgpu::FeatureName> GetRequiredFeatures() override;
size_t GetBufferSize();
wgpu::Buffer FindOrCreateUniformBuffer();
@@ -169,6 +175,15 @@
MutexProtected<std::queue<wgpu::Buffer>> mMultipleStagingBuffers;
};
+std::vector<wgpu::FeatureName> UniformBufferUpdatePerf::GetRequiredFeatures() {
+ std::vector<wgpu::FeatureName> requiredFeatures = DawnPerfTestWithParams::GetRequiredFeatures();
+ if (!UsesWire() && GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages &&
+ SupportsFeatures({wgpu::FeatureName::BufferMapExtendedUsages})) {
+ requiredFeatures.push_back(wgpu::FeatureName::BufferMapExtendedUsages);
+ }
+ return requiredFeatures;
+}
+
size_t UniformBufferUpdatePerf::GetBufferSize() {
// The actual data size, and buffer create size should be same for full upload size.
return GetParam().uploadSize == UploadSize::Full ? kUniformDataSize : kUniformBufferSize;
@@ -182,7 +197,15 @@
return buffer;
}
wgpu::BufferDescriptor descriptor;
- descriptor.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+ descriptor.usage = wgpu::BufferUsage::Uniform;
+
+ if (GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages) {
+ descriptor.usage |= wgpu::BufferUsage::MapWrite;
+ descriptor.mappedAtCreation = true;
+ } else {
+ descriptor.usage |= wgpu::BufferUsage::CopyDst;
+ }
+
descriptor.size = GetBufferSize();
return device.CreateBuffer(&descriptor);
}
@@ -214,6 +237,10 @@
void UniformBufferUpdatePerf::SetUp() {
DawnPerfTestWithParams<UniformBufferUpdateParams>::SetUp();
+ // Skip all tests if the BufferMapExtendedUsages feature is not supported.
+ DAWN_TEST_UNSUPPORTED_IF(GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages &&
+ !device.HasFeature(wgpu::FeatureName::BufferMapExtendedUsages));
+
// Create the color / depth stencil attachments.
wgpu::TextureDescriptor descriptor = {};
descriptor.dimension = wgpu::TextureDimension::e2D;
@@ -273,6 +300,14 @@
mSingleStagingBuffer = FindOrCreateStagingBuffer();
memcpy(mSingleStagingBuffer.GetMappedRange(0, data.size()), data.data(), data.size());
mSingleStagingBuffer.Unmap();
+
+ if (GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages &&
+ GetParam().uniformBuffer == UniformBuffer::Single) {
+ auto buffer = FindOrCreateUniformBuffer();
+ memcpy(buffer.GetMappedRange(0, data.size()), data.data(), data.size());
+ buffer.Unmap();
+ ReturnUniformBuffer(buffer);
+ }
}
void UniformBufferUpdatePerf::Step() {
@@ -294,6 +329,12 @@
stagingBuffer.Unmap();
commands.CopyBufferToBuffer(stagingBuffer, 0, uniformBuffer, 0, data.size());
break;
+ case UploadMethod::MapWithExtendedUsages:
+ if (GetParam().uniformBuffer == UniformBuffer::Multiple) {
+ memcpy(uniformBuffer.GetMappedRange(0, data.size()), data.data(), data.size());
+ uniformBuffer.Unmap();
+ }
+ break;
}
utils::ComboRenderPassDescriptor renderPass({mColorAttachmentTextureView},
mDepthStencilAttachment);
@@ -326,12 +367,24 @@
break;
case UniformBuffer::Multiple:
// Return the uniform buffer once it's done with the last submit.
- queue.OnSubmittedWorkDone(wgpu::CallbackMode::AllowProcessEvents,
- [this, uniformBuffer](wgpu::QueueWorkDoneStatus status) {
- if (status == wgpu::QueueWorkDoneStatus::Success) {
- this->ReturnUniformBuffer(uniformBuffer);
- }
- });
+ if (GetParam().uploadMethod == UploadMethod::MapWithExtendedUsages) {
+ uniformBuffer.MapAsync(
+ wgpu::MapMode::Write, 0, GetBufferSize(),
+ wgpu::CallbackMode::AllowProcessEvents,
+ [this, uniformBuffer](wgpu::MapAsyncStatus status, const char*) {
+ if (status == wgpu::MapAsyncStatus::Success) {
+ this->ReturnUniformBuffer(uniformBuffer);
+ }
+ });
+ } else {
+ queue.OnSubmittedWorkDone(
+ wgpu::CallbackMode::AllowProcessEvents,
+ [this, uniformBuffer](wgpu::QueueWorkDoneStatus status) {
+ if (status == wgpu::QueueWorkDoneStatus::Success) {
+ this->ReturnUniformBuffer(uniformBuffer);
+ }
+ });
+ }
break;
}
@@ -357,7 +410,7 @@
{D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
OpenGLESBackend(), VulkanBackend()},
{UploadMethod::WriteBuffer, UploadMethod::SingleStagingBuffer,
- UploadMethod::MultipleStagingBuffer},
+ UploadMethod::MultipleStagingBuffer, UploadMethod::MapWithExtendedUsages},
{UploadSize::Partial, UploadSize::Full},
{UniformBuffer::Single, UniformBuffer::Multiple});