Plumbs alignment offset through to RingBufferAllocator.
- By plumbing the offset through, the allocator can be smarter and only
allocate extra space for padding when necessary.
Bug: dawn:1985
Change-Id: I25f6762683cdd787e9a9d5ef3519231955a24914
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/151280
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/DynamicUploader.cpp b/src/dawn/native/DynamicUploader.cpp
index f1de07b..980c441 100644
--- a/src/dawn/native/DynamicUploader.cpp
+++ b/src/dawn/native/DynamicUploader.cpp
@@ -32,7 +32,8 @@
}
ResultOrError<UploadHandle> DynamicUploader::AllocateInternal(uint64_t allocationSize,
- ExecutionSerial serial) {
+ ExecutionSerial serial,
+ uint64_t offsetAlignment) {
// Disable further sub-allocation should the request be too large.
if (allocationSize > kRingBufferSize) {
BufferDescriptor bufferDesc = {};
@@ -54,25 +55,20 @@
}
// Note: Validation ensures size is already aligned.
- // First-fit: find next smallest buffer large enough to satisfy the allocation request.
+ // First-fit: find next buffer large enough to satisfy the allocation request.
+ uint64_t startOffset = RingBufferAllocator::kInvalidOffset;
RingBuffer* targetRingBuffer = mRingBuffers.back().get();
for (auto& ringBuffer : mRingBuffers) {
- const RingBufferAllocator& ringBufferAllocator = ringBuffer->mAllocator;
+ RingBufferAllocator& ringBufferAllocator = ringBuffer->mAllocator;
// Prevent overflow.
DAWN_ASSERT(ringBufferAllocator.GetSize() >= ringBufferAllocator.GetUsedSize());
- const uint64_t remainingSize =
- ringBufferAllocator.GetSize() - ringBufferAllocator.GetUsedSize();
- if (allocationSize <= remainingSize) {
+ startOffset = ringBufferAllocator.Allocate(allocationSize, serial, offsetAlignment);
+ if (startOffset != RingBufferAllocator::kInvalidOffset) {
targetRingBuffer = ringBuffer.get();
break;
}
}
- uint64_t startOffset = RingBufferAllocator::kInvalidOffset;
- if (targetRingBuffer != nullptr) {
- startOffset = targetRingBuffer->mAllocator.Allocate(allocationSize, serial);
- }
-
// Upon failure, append a newly created ring buffer to fulfill the
// request.
if (startOffset == RingBufferAllocator::kInvalidOffset) {
@@ -126,19 +122,11 @@
mReleasedStagingBuffers.ClearUpTo(lastCompletedSerial);
}
-// TODO(dawn:512): Optimize this function so that it doesn't allocate additional memory
-// when it's not necessary.
ResultOrError<UploadHandle> DynamicUploader::Allocate(uint64_t allocationSize,
ExecutionSerial serial,
uint64_t offsetAlignment) {
DAWN_ASSERT(offsetAlignment > 0);
- UploadHandle uploadHandle;
- DAWN_TRY_ASSIGN(uploadHandle, AllocateInternal(allocationSize + offsetAlignment - 1, serial));
- uint64_t additionalOffset =
- Align(uploadHandle.startOffset, offsetAlignment) - uploadHandle.startOffset;
- uploadHandle.mappedBuffer = static_cast<uint8_t*>(uploadHandle.mappedBuffer) + additionalOffset;
- uploadHandle.startOffset += additionalOffset;
- return uploadHandle;
+ return AllocateInternal(allocationSize, serial, offsetAlignment);
}
bool DynamicUploader::ShouldFlush() {
diff --git a/src/dawn/native/DynamicUploader.h b/src/dawn/native/DynamicUploader.h
index e49ae0c..80b8f11 100644
--- a/src/dawn/native/DynamicUploader.h
+++ b/src/dawn/native/DynamicUploader.h
@@ -63,7 +63,9 @@
RingBufferAllocator mAllocator;
};
- ResultOrError<UploadHandle> AllocateInternal(uint64_t allocationSize, ExecutionSerial serial);
+ ResultOrError<UploadHandle> AllocateInternal(uint64_t allocationSize,
+ ExecutionSerial serial,
+ uint64_t offsetAlignment);
std::vector<std::unique_ptr<RingBuffer>> mRingBuffers;
SerialQueue<ExecutionSerial, Ref<BufferBase>> mReleasedStagingBuffers;
diff --git a/src/dawn/native/RingBufferAllocator.cpp b/src/dawn/native/RingBufferAllocator.cpp
index 3d02699..828d04dd 100644
--- a/src/dawn/native/RingBufferAllocator.cpp
+++ b/src/dawn/native/RingBufferAllocator.cpp
@@ -16,6 +16,8 @@
#include <utility>
+#include "dawn/common/Math.h"
+
// Note: Current RingBufferAllocator implementation uses two indices (start and end) to implement a
// circular queue. However, this approach defines a full queue when one element is still unused.
//
@@ -70,7 +72,9 @@
// queue, which identifies an existing (or new) frames-worth of resources. Internally, the
// ring-buffer maintains offsets of 3 "memory" states: Free, Reclaimed, and Used. This is done
// in FIFO order as older frames would free resources before newer ones.
-uint64_t RingBufferAllocator::Allocate(uint64_t allocationSize, ExecutionSerial serial) {
+uint64_t RingBufferAllocator::Allocate(uint64_t allocationSize,
+ ExecutionSerial serial,
+ uint64_t offsetAlignment) {
// Check if the buffer is full by comparing the used size.
// If the buffer is not split where waste occurs (e.g. cannot fit new sub-alloc in front), a
// subsequent sub-alloc could fail where the used size was previously adjusted to include
@@ -86,43 +90,45 @@
}
uint64_t startOffset = kInvalidOffset;
+ uint64_t currentRequestSize = 0u;
+
+ // Compute an alignment offset for the buffer if allocating at the end.
+ const uint64_t alignmentOffset = Align(mUsedEndOffset, offsetAlignment) - mUsedEndOffset;
+ const uint64_t alignedUsedEndOffset = mUsedEndOffset + alignmentOffset;
// Check if the buffer is NOT split (i.e sub-alloc on ends)
if (mUsedStartOffset <= mUsedEndOffset) {
// Order is important (try to sub-alloc at end first).
// This is due to FIFO order where sub-allocs are inserted from left-to-right (when not
// wrapped).
- if (mUsedEndOffset + allocationSize <= mMaxBlockSize) {
- startOffset = mUsedEndOffset;
- mUsedEndOffset += allocationSize;
- mUsedSize += allocationSize;
- mCurrentRequestSize += allocationSize;
+ if (alignedUsedEndOffset + allocationSize <= mMaxBlockSize) {
+ startOffset = alignedUsedEndOffset;
+ mUsedSize += allocationSize + alignmentOffset;
+ currentRequestSize = allocationSize + alignmentOffset;
} else if (allocationSize <= mUsedStartOffset) { // Try to sub-alloc at front.
// Count the space at the end so that a subsequent
// sub-alloc cannot fail when the buffer is full.
const uint64_t requestSize = (mMaxBlockSize - mUsedEndOffset) + allocationSize;
startOffset = 0;
- mUsedEndOffset = allocationSize;
mUsedSize += requestSize;
- mCurrentRequestSize += requestSize;
+ currentRequestSize = requestSize;
}
- } else if (mUsedEndOffset + allocationSize <=
- mUsedStartOffset) { // Otherwise, buffer is split where sub-alloc must be
- // in-between.
- startOffset = mUsedEndOffset;
- mUsedEndOffset += allocationSize;
- mUsedSize += allocationSize;
- mCurrentRequestSize += allocationSize;
+ } else if (alignedUsedEndOffset + allocationSize <= mUsedStartOffset) {
+ // Otherwise, buffer is split where sub-alloc must be in-between.
+ startOffset = alignedUsedEndOffset;
+ mUsedSize += allocationSize + alignmentOffset;
+ currentRequestSize = allocationSize + alignmentOffset;
}
if (startOffset != kInvalidOffset) {
+ mUsedEndOffset = startOffset + allocationSize;
+
Request request;
request.endOffset = mUsedEndOffset;
- request.size = mCurrentRequestSize;
+ request.size = currentRequestSize;
mInflightRequests.Enqueue(std::move(request), serial);
- mCurrentRequestSize = 0; // reset
}
return startOffset;
diff --git a/src/dawn/native/RingBufferAllocator.h b/src/dawn/native/RingBufferAllocator.h
index 6aeb142..e3b6840 100644
--- a/src/dawn/native/RingBufferAllocator.h
+++ b/src/dawn/native/RingBufferAllocator.h
@@ -33,7 +33,9 @@
RingBufferAllocator& operator=(const RingBufferAllocator&);
- uint64_t Allocate(uint64_t allocationSize, ExecutionSerial serial);
+ uint64_t Allocate(uint64_t allocationSize,
+ ExecutionSerial serial,
+ uint64_t offsetAlignment = 1);
void Deallocate(ExecutionSerial lastCompletedSerial);
uint64_t GetSize() const;
@@ -55,8 +57,6 @@
uint64_t mUsedStartOffset = 0; // Head of used sub-alloc requests (in bytes).
uint64_t mMaxBlockSize = 0; // Max size of the ring buffer (in bytes).
uint64_t mUsedSize = 0; // Size of the sub-alloc requests (in bytes) of the ring buffer.
- uint64_t mCurrentRequestSize =
- 0; // Size of the sub-alloc requests (in bytes) of the current serial.
};
} // namespace dawn::native
diff --git a/src/dawn/tests/end2end/QueueTests.cpp b/src/dawn/tests/end2end/QueueTests.cpp
index 0cbd7c7..f38fdb4 100644
--- a/src/dawn/tests/end2end/QueueTests.cpp
+++ b/src/dawn/tests/end2end/QueueTests.cpp
@@ -174,6 +174,19 @@
EXPECT_BUFFER_U32_RANGE_EQ(expectedData.data(), buffer, 0, kElements);
}
+// Test using the max buffer size. Regression test for dawn:1985. We don't bother validating the
+// results for this case since that would take a lot longer, just that there are no errors.
+TEST_P(QueueWriteBufferTests, MaxBufferSizeWriteBuffer) {
+ uint32_t maxBufferSize = GetSupportedLimits().limits.maxBufferSize;
+ wgpu::BufferDescriptor descriptor;
+ descriptor.size = maxBufferSize;
+ descriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+ wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+
+ std::vector<uint8_t> data(maxBufferSize);
+ queue.WriteBuffer(buffer, 0, data.data(), maxBufferSize);
+}
+
// Test a special code path: writing when dynamic uploader already contatins some unaligned
// data, it might be necessary to use a ring buffer with properly aligned offset.
TEST_P(QueueWriteBufferTests, UnalignedDynamicUploader) {