Add method to reduce memory usage by dropping scratch buffers
Add dawn::native::ReduceMemoryUsage() to be called by Chromium when
going idle or on memory pressure. Currently, this only drops scratch
buffers e.g. DynamicUploader, InternalPipelineStore, etc.
Bug: 357139493
Change-Id: Ida06b851f19eb95982980f1649c118ec69fea43b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/202277
Reviewed-by: Loko Kung <lokokung@google.com>
Auto-Submit: Sunny Sachanandani <sunnyps@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/include/dawn/native/DawnNative.h b/include/dawn/native/DawnNative.h
index 594c387..50e4ccf 100644
--- a/include/dawn/native/DawnNative.h
+++ b/include/dawn/native/DawnNative.h
@@ -302,6 +302,9 @@
// total estimated memory usage, and is intended for background tracing for UMA.
DAWN_NATIVE_EXPORT uint64_t ComputeEstimatedMemoryUsage(WGPUDevice device);
+// Free any unused GPU memory like staging buffers, cached resources, etc.
+DAWN_NATIVE_EXPORT void ReduceMemoryUsage(WGPUDevice device);
+
} // namespace dawn::native
#endif // INCLUDE_DAWN_NATIVE_DAWNNATIVE_H_
diff --git a/src/dawn/native/DawnNative.cpp b/src/dawn/native/DawnNative.cpp
index 885e62c..3b84d98 100644
--- a/src/dawn/native/DawnNative.cpp
+++ b/src/dawn/native/DawnNative.cpp
@@ -306,11 +306,18 @@
}
void DumpMemoryStatistics(WGPUDevice device, MemoryDump* dump) {
+ auto deviceLock(FromAPI(device)->GetScopedLock());
FromAPI(device)->DumpMemoryStatistics(dump);
}
uint64_t ComputeEstimatedMemoryUsage(WGPUDevice device) {
+ auto deviceLock(FromAPI(device)->GetScopedLock());
return FromAPI(device)->ComputeEstimatedMemoryUsage();
}
+void ReduceMemoryUsage(WGPUDevice device) {
+ auto deviceLock(FromAPI(device)->GetScopedLock());
+ FromAPI(device)->ReduceMemoryUsage();
+}
+
} // namespace dawn::native
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index e58623f..7a9238a 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -2588,6 +2588,7 @@
}
void DeviceBase::DumpMemoryStatistics(dawn::native::MemoryDump* dump) const {
+ DAWN_ASSERT(IsLockedByCurrentThreadIfNeeded());
std::string prefix = absl::StrFormat("device_%p", static_cast<const void*>(this));
GetObjectTrackingList(ObjectType::Texture)->ForEach([&](const ApiObjectBase* texture) {
static_cast<const TextureBase*>(texture)->DumpMemoryStatistics(dump, prefix.c_str());
@@ -2598,6 +2599,7 @@
}
uint64_t DeviceBase::ComputeEstimatedMemoryUsage() const {
+ DAWN_ASSERT(IsLockedByCurrentThreadIfNeeded());
uint64_t size = 0;
GetObjectTrackingList(ObjectType::Texture)->ForEach([&](const ApiObjectBase* texture) {
size += static_cast<const TextureBase*>(texture)->ComputeEstimatedByteSize();
@@ -2608,6 +2610,16 @@
return size;
}
+void DeviceBase::ReduceMemoryUsage() {
+ DAWN_ASSERT(IsLockedByCurrentThreadIfNeeded());
+ if (ConsumedError(GetQueue()->CheckPassedSerials())) {
+ return;
+ }
+ GetDynamicUploader()->Deallocate(GetQueue()->GetCompletedCommandSerial(), /*freeAll=*/true);
+ mInternalPipelineStore->ResetScratchBuffers();
+ mTemporaryUniformBuffer = nullptr;
+}
+
ResultOrError<Ref<BufferBase>> DeviceBase::GetOrCreateTemporaryUniformBuffer(size_t size) {
if (!mTemporaryUniformBuffer || mTemporaryUniformBuffer->GetSize() != size) {
BufferDescriptor desc;
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index 3dc04ec..a731f60 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -456,6 +456,7 @@
void DumpMemoryStatistics(dawn::native::MemoryDump* dump) const;
uint64_t ComputeEstimatedMemoryUsage() const;
+ void ReduceMemoryUsage();
ResultOrError<Ref<BufferBase>> GetOrCreateTemporaryUniformBuffer(size_t size);
diff --git a/src/dawn/native/DynamicUploader.cpp b/src/dawn/native/DynamicUploader.cpp
index 0922df2..77930ac 100644
--- a/src/dawn/native/DynamicUploader.cpp
+++ b/src/dawn/native/DynamicUploader.cpp
@@ -122,7 +122,7 @@
return uploadHandle;
}
-void DynamicUploader::Deallocate(ExecutionSerial lastCompletedSerial) {
+void DynamicUploader::Deallocate(ExecutionSerial lastCompletedSerial, bool freeAll) {
// Reclaim memory within the ring buffers by ticking (or removing requests no longer
// in-flight).
size_t i = 0;
@@ -130,8 +130,9 @@
mRingBuffers[i]->mAllocator.Deallocate(lastCompletedSerial);
// Never erase the last buffer as to prevent re-creating smaller buffers
- // again. The last buffer is the largest.
- if (mRingBuffers[i]->mAllocator.Empty() && i < mRingBuffers.size() - 1) {
+ // again unless explicitly asked to do so. The last buffer is the largest.
+ const bool shouldFree = (i < mRingBuffers.size() - 1) || freeAll;
+ if (mRingBuffers[i]->mAllocator.Empty() && shouldFree) {
mRingBuffers.erase(mRingBuffers.begin() + i);
} else {
i++;
diff --git a/src/dawn/native/DynamicUploader.h b/src/dawn/native/DynamicUploader.h
index 88bedfa..c3adbe3 100644
--- a/src/dawn/native/DynamicUploader.h
+++ b/src/dawn/native/DynamicUploader.h
@@ -64,7 +64,7 @@
ResultOrError<UploadHandle> Allocate(uint64_t allocationSize,
ExecutionSerial serial,
uint64_t offsetAlignment);
- void Deallocate(ExecutionSerial lastCompletedSerial);
+ void Deallocate(ExecutionSerial lastCompletedSerial, bool freeAll = false);
bool ShouldFlush();
diff --git a/src/dawn/native/InternalPipelineStore.cpp b/src/dawn/native/InternalPipelineStore.cpp
index 52d8ee4..58d1d4d 100644
--- a/src/dawn/native/InternalPipelineStore.cpp
+++ b/src/dawn/native/InternalPipelineStore.cpp
@@ -45,4 +45,9 @@
InternalPipelineStore::~InternalPipelineStore() = default;
+void InternalPipelineStore::ResetScratchBuffers() {
+ scratchStorage.Reset();
+ scratchIndirectStorage.Reset();
+}
+
} // namespace dawn::native
diff --git a/src/dawn/native/InternalPipelineStore.h b/src/dawn/native/InternalPipelineStore.h
index 294fafa..527cc35 100644
--- a/src/dawn/native/InternalPipelineStore.h
+++ b/src/dawn/native/InternalPipelineStore.h
@@ -64,6 +64,8 @@
Ref<ShaderModuleBase> placeholderFragmentShader;
+ void ResetScratchBuffers();
+
// A scratch buffer suitable for use as a copy destination and storage binding.
ScratchBuffer scratchStorage;
diff --git a/src/dawn/native/Queue.h b/src/dawn/native/Queue.h
index 9f6c3ef..d0a53bf 100644
--- a/src/dawn/native/Queue.h
+++ b/src/dawn/native/Queue.h
@@ -116,6 +116,17 @@
void DestroyImpl() override;
+ virtual MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) = 0;
+ virtual MaybeError WriteBufferImpl(BufferBase* buffer,
+ uint64_t bufferOffset,
+ const void* data,
+ size_t size);
+ virtual MaybeError WriteTextureImpl(const ImageCopyTexture& destination,
+ const void* data,
+ size_t dataSize,
+ const TextureDataLayout& dataLayout,
+ const Extent3D& writeSize);
+
private:
MaybeError WriteTextureInternal(const ImageCopyTexture* destination,
const void* data,
@@ -131,17 +142,6 @@
const Extent3D* copySize,
const CopyTextureForBrowserOptions* options);
- virtual MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) = 0;
- virtual MaybeError WriteBufferImpl(BufferBase* buffer,
- uint64_t bufferOffset,
- const void* data,
- size_t size);
- virtual MaybeError WriteTextureImpl(const ImageCopyTexture& destination,
- const void* data,
- size_t dataSize,
- const TextureDataLayout& dataLayout,
- const Extent3D& writeSize);
-
MaybeError ValidateSubmit(uint32_t commandCount, CommandBufferBase* const* commands) const;
MaybeError ValidateOnSubmittedWorkDone(wgpu::QueueWorkDoneStatus* status) const;
MaybeError ValidateWriteTexture(const ImageCopyTexture* destination,
diff --git a/src/dawn/tests/unittests/native/MemoryInstrumentationTests.cpp b/src/dawn/tests/unittests/native/MemoryInstrumentationTests.cpp
index ceb9a5e..ca1dfee 100644
--- a/src/dawn/tests/unittests/native/MemoryInstrumentationTests.cpp
+++ b/src/dawn/tests/unittests/native/MemoryInstrumentationTests.cpp
@@ -25,7 +25,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include <chrono>
#include <string>
+#include <thread>
#include <utility>
#include "dawn/native/DawnNative.h"
@@ -207,5 +209,35 @@
kBufferAllocatedSize + kMipmappedTextureSize + kMultisampleTextureSize + kETC2TextureSize);
}
+TEST_F(MemoryInstrumentationTest, ReduceMemoryUsage) {
+ constexpr uint64_t kBufferSize = 32;
+ constexpr wgpu::BufferDescriptor kBufferDesc = {
+ .usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst,
+ .size = kBufferSize,
+ };
+ wgpu::Buffer uniformBuffer = device.CreateBuffer(&kBufferDesc);
+ EXPECT_TRUE(uniformBuffer);
+
+ std::array<uint8_t, kBufferSize> zeroes = {};
+ device.GetQueue().WriteBuffer(uniformBuffer, 0, zeroes.data(), zeroes.size());
+ device.GetQueue().Submit(0, nullptr);
+
+ uniformBuffer.Destroy();
+
+ wgpu::Future completionFuture = device.GetQueue().OnSubmittedWorkDone(
+ wgpu::CallbackMode::WaitAnyOnly, [](wgpu::QueueWorkDoneStatus status) {});
+
+ wgpu::WaitStatus waitStatus = wgpu::WaitStatus::TimedOut;
+ while (waitStatus != wgpu::WaitStatus::Success) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ waitStatus = wgpu::Instance(ToAPI(mDeviceMock->GetInstance())).WaitAny(completionFuture, 0);
+ }
+
+ // DynamicUploader buffers will still be alive.
+ EXPECT_GT(ComputeEstimatedMemoryUsage(device.Get()), uint64_t(0));
+ ReduceMemoryUsage(device.Get());
+ EXPECT_EQ(ComputeEstimatedMemoryUsage(device.Get()), uint64_t(0));
+}
+
} // namespace
} // namespace dawn::native
diff --git a/src/dawn/tests/unittests/native/mocks/QueueMock.cpp b/src/dawn/tests/unittests/native/mocks/QueueMock.cpp
index b66e6c6..a58050c 100644
--- a/src/dawn/tests/unittests/native/mocks/QueueMock.cpp
+++ b/src/dawn/tests/unittests/native/mocks/QueueMock.cpp
@@ -29,11 +29,27 @@
#include "dawn/tests/unittests/native/mocks/DeviceMock.h"
+using testing::WithArgs;
+
namespace dawn::native {
QueueMock::QueueMock(DeviceMock* device, const QueueDescriptor* descriptor)
: QueueBase(device, descriptor) {
ON_CALL(*this, DestroyImpl).WillByDefault([this] { this->QueueBase::DestroyImpl(); });
+ ON_CALL(*this, SubmitImpl)
+ .WillByDefault([this](uint32_t, CommandBufferBase* const*) -> MaybeError {
+ this->QueueBase::IncrementLastSubmittedCommandSerial();
+ return {};
+ });
+ ON_CALL(*this, CheckAndUpdateCompletedSerials)
+ .WillByDefault([this]() -> ResultOrError<ExecutionSerial> {
+ return this->QueueBase::GetLastSubmittedCommandSerial();
+ });
+ ON_CALL(*this, WriteBufferImpl)
+ .WillByDefault(WithArgs<0, 1, 2, 3>([this](BufferBase* buffer, uint64_t bufferOffset,
+ const void* data, size_t size) -> MaybeError {
+ return this->QueueBase::WriteBufferImpl(buffer, bufferOffset, data, size);
+ }));
}
QueueMock::~QueueMock() = default;