blob: c5ee6fb3aa999ed59623e705a1e139d38ca8f3d7 [file] [log] [blame]
// Copyright 2025 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dawn/native/webgpu/BufferWGPU.h"
#include <algorithm>
#include <string>
#include <utility>
#include "dawn/common/StringViewUtils.h"
#include "dawn/native/Buffer.h"
#include "dawn/native/webgpu/CaptureContext.h"
#include "dawn/native/webgpu/DeviceWGPU.h"
#include "dawn/native/webgpu/QueueWGPU.h"
#include "dawn/native/webgpu/Serialization.h"
namespace dawn::native::webgpu {
// static
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device,
const UnpackedPtr<BufferDescriptor>& descriptor) {
auto actualUsage = ComputeInternalBufferUsages(device, descriptor->usage, descriptor->size);
// Make the inner buffer copyable for readback if possible.
if (!(actualUsage & wgpu::BufferUsage::MapRead)) {
actualUsage |= wgpu::BufferUsage::CopySrc;
}
// Resolve internal usages to regular ones.
if (actualUsage & kInternalStorageBuffer) {
actualUsage &= ~kInternalStorageBuffer;
actualUsage |= wgpu::BufferUsage::Storage;
}
if (actualUsage & kReadOnlyStorageBuffer) {
actualUsage &= ~kReadOnlyStorageBuffer;
actualUsage |= wgpu::BufferUsage::Storage;
}
if (actualUsage & kInternalCopySrcBuffer) {
actualUsage &= ~kInternalCopySrcBuffer;
actualUsage |= wgpu::BufferUsage::CopySrc;
}
WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT;
desc.label = ToOutputStringView(descriptor->label);
desc.usage = ToAPI(actualUsage);
desc.size = descriptor->size;
desc.mappedAtCreation = descriptor->mappedAtCreation;
WGPUBuffer innerBuffer = device->wgpu.deviceCreateBuffer(device->GetInnerHandle(), &desc);
if (innerBuffer == nullptr) {
// innerBuffer can be nullptr when mappedAtCreation == true and fails.
// Return an error buffer.
const BufferDescriptor* rawDescriptor = *descriptor;
return ToBackend(BufferBase::MakeError(device, rawDescriptor));
}
Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor, innerBuffer));
return std::move(buffer);
}
Buffer::Buffer(Device* device,
const UnpackedPtr<BufferDescriptor>& descriptor,
WGPUBuffer innerBuffer)
: BufferBase(device, descriptor),
RecordableObject(schema::ObjectType::Buffer),
ObjectWGPU(device->wgpu.bufferRelease) {
mInnerHandle = innerBuffer;
mAllocatedSize = GetSize();
}
bool Buffer::IsCPUWritableAtCreation() const {
return ToBackend(GetDevice())->wgpu.bufferGetMapState(mInnerHandle) ==
WGPUBufferMapState_Mapped;
}
MaybeError Buffer::MapAtCreationImpl() {
mMappedData = ToBackend(GetDevice())->wgpu.bufferGetMappedRange(mInnerHandle, 0, GetSize());
return {};
}
MaybeError Buffer::MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) {
struct MapAsyncResult {
WGPUMapAsyncStatus status;
std::string message;
} mapAsyncResult = {};
WGPUBufferMapCallbackInfo innerCallbackInfo = {};
innerCallbackInfo.mode = WGPUCallbackMode_WaitAnyOnly;
innerCallbackInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView message,
void* result_param, void* userdata_param) {
MapAsyncResult* result = reinterpret_cast<MapAsyncResult*>(result_param);
result->status = status;
result->message = ToString(message);
};
innerCallbackInfo.userdata1 = &mapAsyncResult;
innerCallbackInfo.userdata2 = this;
auto& wgpu = ToBackend(GetDevice())->wgpu;
// TODO(crbug.com/413053623): We do not have a way to efficiently process the async event
// on the inner webgpu layer. For now we simply wait on the future.
WGPUFutureWaitInfo waitInfo = {};
waitInfo.future = wgpu.bufferMapAsync(mInnerHandle, static_cast<WGPUMapMode>(mode), offset,
size, innerCallbackInfo);
wgpu.instanceWaitAny(ToBackend(GetDevice())->GetInnerInstance(), 1, &waitInfo, UINT64_MAX);
if (mapAsyncResult.status != WGPUMapAsyncStatus_Success) {
return DAWN_INTERNAL_ERROR(mapAsyncResult.message);
}
// The frontend asks that the pointer returned by GetMappedPointer is from the start of
// the resource but WGPU gives us the pointer at offset. Remove the offset.
if (bool{mode & wgpu::MapMode::Write}) {
mMappedData =
static_cast<uint8_t*>(wgpu.bufferGetMappedRange(mInnerHandle, offset, size)) - offset;
} else if (bool{mode & wgpu::MapMode::Read}) {
mMappedData = static_cast<uint8_t*>(const_cast<void*>(
wgpu.bufferGetConstMappedRange(mInnerHandle, offset, size))) -
offset;
} else {
DAWN_UNREACHABLE();
}
return {};
}
MaybeError Buffer::FinalizeMapImpl(BufferState newState) {
return {};
}
void* Buffer::GetMappedPointerImpl() {
// The mapping offset has already been removed.
return mMappedData;
}
void Buffer::UnmapImpl(BufferState oldState, BufferState newState) {
auto deviceGuard = GetDevice()->GetGuard();
if (IsMappedState(oldState) && MapMode() == wgpu::MapMode::Write &&
newState != BufferState::Destroyed) {
CaptureContext* captureContext = ToBackend(GetDevice()->GetQueue())->GetCaptureContext();
if (captureContext != nullptr) {
[[maybe_unused]] auto result =
captureContext->CaptureUnmapBuffer(this, MapOffset(), mMappedData, MapSize());
}
}
if (mInnerHandle) {
ToBackend(GetDevice())->wgpu.bufferUnmap(mInnerHandle);
}
mMappedData = nullptr;
}
void Buffer::DestroyImpl(DestroyReason reason) {
BufferBase::DestroyImpl(reason);
auto& wgpu = ToBackend(GetDevice())->wgpu;
wgpu.bufferDestroy(mInnerHandle);
}
void Buffer::SetLabelImpl() {
ToBackend(GetDevice())->CaptureSetLabel(this, GetLabel());
}
MaybeError Buffer::AddReferenced(CaptureContext& captureContext) {
// Buffers do not reference other objects.
return {};
}
MaybeError Buffer::CaptureCreationParameters(CaptureContext& captureContext) {
schema::Buffer buf{{
.size = GetSize(),
.usage = GetUsage(),
}};
Serialize(captureContext, buf);
return {};
}
MaybeError Buffer::CaptureContentIfNeeded(CaptureContext& captureContext,
schema::ObjectId id,
bool newResource) {
// TODO(451338754): If it's a new resource and we know the buffer is all zero then don't
// capture.
wgpu::BufferUsage usage = GetUsage();
bool unwritableOnPlayback = usage & wgpu::BufferUsage::MapWrite;
if (!newResource || unwritableOnPlayback) {
return {};
}
// A MapRead buffer is never used as input since it's only allowed CopyDst
// so we don't need its contents.
if (usage & wgpu::BufferUsage::MapRead) {
return {};
}
return AddContentToCapture(captureContext);
}
// TODO(451650604): We currently get at most 1mb at a time to keep memory usage down.
// Revisit for speed later.
MaybeError Buffer::AddContentToCapture(CaptureContext& captureContext) {
// TODO(473593119): Handle the unaligned trailing bytes.
// TODO(473568230): Support copies with unaligned size.
// copyBufferToBuffer requires 4 byte alignment for both size and offset which prevents
// us from copying the trailing bytes. writeBuffer has the same alignment requirements.
// so the user can't put bytes in via writeBuffer. mapAsync requires offset to be 8 byte
// aligned and size to be 4 bytes so the user can not set those last bytes with mapAsync.
// We can still access those bytes with copyBufferToTexture and copyTextureToBuffer though.
// For now, we just ignore the last 3 bytes.
uint64_t copyableSize = AlignDown(GetSize(), 4);
if (copyableSize == 0) {
return {};
}
struct MapAsyncResult {
WGPUMapAsyncStatus status;
std::string message;
} mapAsyncResult = {};
schema::RootCommandWriteBufferCmd cmd{{
.data = {{
.bufferId = captureContext.GetId(this),
.bufferOffset = 0,
.size = copyableSize,
}},
}};
Serialize(captureContext, cmd);
WGPUBuffer srcBuffer = GetInnerHandle();
WGPUBuffer copyBuffer = captureContext.GetCopyBuffer();
WGPUQueue queue = ToBackend(GetDevice()->GetQueue())->GetInnerHandle();
Device* device = ToBackend(GetDevice());
WGPUDevice innerDevice = device->GetInnerHandle();
auto& wgpu = device->wgpu;
CaptureContext::ScopedContentWriter writer(captureContext);
for (uint64_t offset = 0; offset < copyableSize; offset += CaptureContext::kCopyBufferSize) {
uint64_t copySize = std::min(CaptureContext::kCopyBufferSize, copyableSize - offset);
WGPUCommandEncoder encoder = wgpu.deviceCreateCommandEncoder(innerDevice, nullptr);
wgpu.commandEncoderCopyBufferToBuffer(encoder, srcBuffer, offset, copyBuffer, 0, copySize);
WGPUCommandBuffer commandBuffer = wgpu.commandEncoderFinish(encoder, nullptr);
wgpu.queueSubmit(queue, 1, &commandBuffer);
wgpu.commandBufferRelease(commandBuffer);
wgpu.commandEncoderRelease(encoder);
// Map the buffer to read back the content.
WGPUBufferMapCallbackInfo innerCallbackInfo = {};
innerCallbackInfo.mode = WGPUCallbackMode_WaitAnyOnly;
innerCallbackInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView message,
void* result_param, void* userdata_param) {
MapAsyncResult* result = reinterpret_cast<MapAsyncResult*>(result_param);
result->status = status;
result->message = ToString(message);
};
innerCallbackInfo.userdata1 = &mapAsyncResult;
innerCallbackInfo.userdata2 = this;
// We read this back synchronously. I'm not sure we could do much more.
WGPUFutureWaitInfo waitInfo = {};
waitInfo.future =
wgpu.bufferMapAsync(copyBuffer, WGPUMapMode_Read, 0, copySize, innerCallbackInfo);
wgpu.instanceWaitAny(device->GetInnerInstance(), 1, &waitInfo, UINT64_MAX);
DAWN_ASSERT(mapAsyncResult.status == WGPUMapAsyncStatus_Success);
if (mapAsyncResult.status != WGPUMapAsyncStatus_Success) {
return DAWN_INTERNAL_ERROR(mapAsyncResult.message);
}
const void* data = wgpu.bufferGetConstMappedRange(copyBuffer, 0, copySize);
writer.WriteContentBytes(data, copySize);
wgpu.bufferUnmap(copyBuffer);
}
return {};
}
} // namespace dawn::native::webgpu