blob: 31de72770da0033e2651488680d28d40844b6b1b [file] [log] [blame]
// Copyright 2019 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dawn/native/vulkan/ResourceMemoryAllocatorVk.h"
#include <algorithm>
#include <utility>
#include "dawn/common/Math.h"
#include "dawn/native/BuddyMemoryAllocator.h"
#include "dawn/native/Queue.h"
#include "dawn/native/ResourceHeapAllocator.h"
#include "dawn/native/vulkan/DeviceVk.h"
#include "dawn/native/vulkan/FencedDeleter.h"
#include "dawn/native/vulkan/ResourceHeapVk.h"
#include "dawn/native/vulkan/VulkanError.h"
#include "partition_alloc/pointers/raw_ptr.h"
namespace dawn::native::vulkan {
namespace {
VkDeviceSize GetMaxSuballocationSize(VkDeviceSize heapBlockSize) {
// Have each bucket of the buddy system allocate at least some resource of the maximum
// size
// TODO(crbug.com/dawn/849): This is a hardcoded heuristic to choose when to suballocate but it
// should ideally depend on the size of the memory heaps and other factors.
return heapBlockSize / 2;
}
bool IsMemoryKindMappable(MemoryKind memoryKind) {
return memoryKind & (MemoryKind::ReadMappable | MemoryKind::WriteMappable);
}
VkMemoryPropertyFlags GetRequiredMemoryPropertyFlags(MemoryKind memoryKind, bool mappable) {
VkMemoryPropertyFlags vkFlags = 0;
// Mappable resource must be host visible and host coherent.
if (mappable) {
vkFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
vkFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
// DEVICE_LOCAL_BIT must be set when MemoryKind::DeviceLocal is required.
if (memoryKind & MemoryKind::DeviceLocal) {
vkFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
}
// HOST_CACHED_BIT must be set when MemoryKind::HostCached is required.
if (memoryKind & MemoryKind::HostCached) {
vkFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
}
return vkFlags;
}
} // anonymous namespace
bool SupportsBufferMapExtendedUsages(const VulkanDeviceInfo& deviceInfo) {
// On Vulkan the memory type of the mappable buffers with extended usages must have all below
// memory property flags.
constexpr VkMemoryPropertyFlags kMapExtendedUsageMemoryPropertyFlags =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
for (const auto& memoryType : deviceInfo.memoryTypes) {
if ((memoryType.propertyFlags & kMapExtendedUsageMemoryPropertyFlags) ==
kMapExtendedUsageMemoryPropertyFlags) {
return true;
}
}
return false;
}
// SingleTypeAllocator is a combination of a BuddyMemoryAllocator and its client and can
// service suballocation requests, but for a single Vulkan memory type.
class ResourceMemoryAllocator::SingleTypeAllocator : public ResourceHeapAllocator {
public:
SingleTypeAllocator(Device* device,
size_t memoryTypeIndex,
VkDeviceSize maxHeapSize,
VkDeviceSize heapBlockSize,
ResourceMemoryAllocator* memoryAllocator)
: mDevice(device),
mResourceMemoryAllocator(memoryAllocator),
mMemoryTypeIndex(memoryTypeIndex),
mMaxHeapSize(maxHeapSize),
mPooledMemoryAllocator(this),
mBuddySystem(
// Round down to a power of 2 that's <= mMemoryHeapSize. This will always
// be a multiple of heapBlockSize because heapBlockSize is a power of 2.
uint64_t(1) << Log2(mMaxHeapSize),
// Take the min in the very unlikely case the memory heap is tiny.
std::min(uint64_t(1) << Log2(mMaxHeapSize), heapBlockSize),
&mPooledMemoryAllocator) {
DAWN_ASSERT(IsPowerOfTwo(heapBlockSize));
}
~SingleTypeAllocator() override = default;
// Frees any heaps that are unused and waiting to be recycled by the pool allocator.
void FreeRecycledMemory() { mPooledMemoryAllocator.FreeRecycledAllocations(); }
ResultOrError<ResourceMemoryAllocation> AllocateMemory(uint64_t size, uint64_t alignment) {
return mBuddySystem.Allocate(size, alignment);
}
void DeallocateMemory(const ResourceMemoryAllocation& allocation) {
mBuddySystem.Deallocate(allocation);
}
// Implementation of the MemoryAllocator interface to be a client of BuddyMemoryAllocator
ResultOrError<std::unique_ptr<ResourceHeapBase>> AllocateResourceHeap(uint64_t size) override {
if (size > mMaxHeapSize) {
return DAWN_OUT_OF_MEMORY_ERROR("Allocation size too large");
}
VkMemoryAllocateInfo allocateInfo;
allocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocateInfo.pNext = nullptr;
allocateInfo.allocationSize = size;
allocateInfo.memoryTypeIndex = mMemoryTypeIndex;
VkDeviceMemory allocatedMemory = VK_NULL_HANDLE;
// First check OOM that we want to surface to the application.
DAWN_TRY(
CheckVkOOMThenSuccess(mDevice->fn.AllocateMemory(mDevice->GetVkDevice(), &allocateInfo,
nullptr, &*allocatedMemory),
"vkAllocateMemory"));
DAWN_ASSERT(allocatedMemory != VK_NULL_HANDLE);
mResourceMemoryAllocator->RecordHeapAllocation(size);
return {std::make_unique<ResourceHeap>(allocatedMemory, mMemoryTypeIndex, size)};
}
void DeallocateResourceHeap(std::unique_ptr<ResourceHeapBase> allocation) override {
mResourceMemoryAllocator->DeallocateResourceHeap(ToBackend(allocation.get()));
}
private:
raw_ptr<Device> mDevice;
raw_ptr<ResourceMemoryAllocator> mResourceMemoryAllocator;
size_t mMemoryTypeIndex;
VkDeviceSize mMaxHeapSize;
PooledResourceMemoryAllocator mPooledMemoryAllocator;
BuddyMemoryAllocator mBuddySystem;
};
VkDeviceSize ResourceMemoryAllocator::GetHeapBlockSize(const DawnDeviceAllocatorControl* control) {
static constexpr VkDeviceSize kDefaultHeapBlockSize = 8ull * 1024ull * 1024ull; // 8MiB
VkDeviceSize heapBlockSize = kDefaultHeapBlockSize;
if (control && control->allocatorHeapBlockSize > 0) {
heapBlockSize = control->allocatorHeapBlockSize;
}
DAWN_ASSERT(IsPowerOfTwo(heapBlockSize));
return heapBlockSize;
}
// Implementation of ResourceMemoryAllocator
ResourceMemoryAllocator::ResourceMemoryAllocator(Device* device, VkDeviceSize heapBlockSize)
: mDevice(device), mMaxSizeForSuballocation(GetMaxSuballocationSize(heapBlockSize)) {
const VulkanDeviceInfo& info = mDevice->GetDeviceInfo();
mAllocatorsPerType.reserve(info.memoryTypes.size());
for (size_t i = 0; i < info.memoryTypes.size(); i++) {
mAllocatorsPerType.emplace_back(std::make_unique<SingleTypeAllocator>(
mDevice, i, info.memoryHeaps[info.memoryTypes[i].heapIndex].size, heapBlockSize, this));
}
}
ResourceMemoryAllocator::~ResourceMemoryAllocator() = default;
ResultOrError<ResourceMemoryAllocation> ResourceMemoryAllocator::Allocate(
const VkMemoryRequirements& requirements,
MemoryKind kind,
bool forceDisableSubAllocation) {
// The Vulkan spec guarantees at least one memory type is valid.
int memoryType = FindBestTypeIndex(requirements, kind);
DAWN_ASSERT(memoryType >= 0);
VkDeviceSize size = requirements.size;
// Sub-allocate non-mappable resources because at the moment the mapped pointer
// is part of the resource and not the heap, which doesn't match the Vulkan model.
// TODO(crbug.com/dawn/849): allow sub-allocating mappable resources, maybe.
if (!forceDisableSubAllocation && requirements.size < mMaxSizeForSuballocation &&
!IsMemoryKindMappable(kind) &&
!mDevice->IsToggleEnabled(Toggle::DisableResourceSuballocation)) {
// When sub-allocating, Vulkan requires that we respect bufferImageGranularity. Some
// hardware puts information on the memory's page table entry and allocating a linear
// resource in the same page as a non-linear (aka opaque) resource can cause issues.
// Probably because some texture compression flags are stored on the page table entry,
// and allocating a linear resource removes these flags.
//
// Anyway, just to be safe we ask that all sub-allocated resources are allocated with at
// least this alignment. TODO(crbug.com/dawn/849): this is suboptimal because multiple
// linear (resp. opaque) resources can coexist in the same page. In particular Nvidia
// GPUs often use a granularity of 64k which will lead to a lot of wasted spec. Revisit
// with a more efficient algorithm later.
const VulkanDeviceInfo& info = mDevice->GetDeviceInfo();
uint64_t alignment =
std::max(requirements.alignment, info.properties.limits.bufferImageGranularity);
if ((info.memoryTypes[memoryType].propertyFlags &
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) ==
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
// Host accesses to non-coherent memory are bounded by nonCoherentAtomSize. We may map
// host visible "non-mappable" memory when taking the fast path during buffer uploads.
alignment = std::max(alignment, info.properties.limits.nonCoherentAtomSize);
}
ResourceMemoryAllocation subAllocation;
DAWN_TRY_ASSIGN(subAllocation, mAllocatorsPerType[memoryType]->AllocateMemory(
requirements.size, alignment));
if (subAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
mTotalUsedMemory += requirements.size;
return subAllocation;
}
}
// If sub-allocation failed, allocate memory just for it.
std::unique_ptr<ResourceHeapBase> resourceHeap;
DAWN_TRY_ASSIGN(resourceHeap, mAllocatorsPerType[memoryType]->AllocateResourceHeap(size));
void* mappedPointer = nullptr;
if (IsMemoryKindMappable(kind)) {
DAWN_TRY_WITH_CLEANUP(
CheckVkSuccess(mDevice->fn.MapMemory(mDevice->GetVkDevice(),
ToBackend(resourceHeap.get())->GetMemory(), 0,
size, 0, &mappedPointer),
"vkMapMemory"),
{ mAllocatorsPerType[memoryType]->DeallocateResourceHeap(std::move(resourceHeap)); });
}
mTotalUsedMemory += size;
AllocationInfo info;
info.mMethod = AllocationMethod::kDirect;
info.mRequestedSize = size;
return ResourceMemoryAllocation(info, /*offset*/ 0, resourceHeap.release(),
static_cast<uint8_t*>(mappedPointer));
}
void ResourceMemoryAllocator::Deallocate(ResourceMemoryAllocation* allocation) {
switch (allocation->GetInfo().mMethod) {
// Some memory allocation can never be initialized, for example when wrapping
// swapchain VkImages with a Texture.
case AllocationMethod::kInvalid:
break;
// For direct allocation we can put the memory for deletion immediately and the fence
// deleter will make sure the resources are freed before the memory.
case AllocationMethod::kDirect: {
ResourceHeap* heap = ToBackend(allocation->GetResourceHeap());
// Track the direct allocation that will be deallocated for both allocated and used
// memory sizes.
DAWN_ASSERT(mTotalUsedMemory >= allocation->GetInfo().mRequestedSize);
mUsedMemoryToDecrement[mDevice->GetFencedDeleter()->GetCurrentDeletionSerial()] +=
allocation->GetInfo().mRequestedSize;
allocation->Invalidate();
DeallocateResourceHeap(heap);
delete heap;
break;
}
// Suballocations aren't freed immediately, otherwise another resource allocation could
// happen just after that aliases the old one and would require a barrier.
// TODO(crbug.com/dawn/851): Maybe we can produce the correct barriers to reduce the
// latency to reclaim memory.
case AllocationMethod::kSubAllocated: {
ExecutionSerial deletionSerial =
mDevice->GetFencedDeleter()->GetCurrentDeletionSerial();
mSubAllocationsToDelete.Enqueue(*allocation, deletionSerial);
// Track suballocation that will be deallocated for used memory sizes.
DAWN_ASSERT(mTotalUsedMemory >= allocation->GetInfo().mRequestedSize);
mUsedMemoryToDecrement[deletionSerial] += allocation->GetInfo().mRequestedSize;
break;
}
default:
DAWN_UNREACHABLE();
break;
}
// Invalidate the underlying resource heap in case the client accidentally
// calls DeallocateMemory again using the same allocation.
allocation->Invalidate();
}
ExecutionSerial ResourceMemoryAllocator::GetLastPendingDeletionSerial() {
ExecutionSerial lastSerial = kBeginningOfGPUTime;
auto GetLastSubmitted = [&lastSerial](auto& queue) {
if (!queue.Empty()) {
lastSerial = std::max(lastSerial, queue.LastSerial());
}
};
GetLastSubmitted(mSubAllocationsToDelete);
return lastSerial;
}
void ResourceMemoryAllocator::RecordHeapAllocation(VkDeviceSize size) {
mTotalAllocatedMemory += size;
}
void ResourceMemoryAllocator::DeallocateResourceHeap(ResourceHeap* heap) {
DAWN_ASSERT(mTotalAllocatedMemory >= heap->GetSize());
MutexProtected<FencedDeleter>& fencedDeleter = mDevice->GetFencedDeleter();
mAllocatedMemoryToDecrement[fencedDeleter->GetCurrentDeletionSerial()] += heap->GetSize();
fencedDeleter->DeleteWhenUnused(heap->GetMemory());
}
void ResourceMemoryAllocator::Tick(ExecutionSerial completedSerial) {
for (const ResourceMemoryAllocation& allocation :
mSubAllocationsToDelete.IterateUpTo(completedSerial)) {
DAWN_ASSERT(allocation.GetInfo().mMethod == AllocationMethod::kSubAllocated);
size_t memoryType = ToBackend(allocation.GetResourceHeap())->GetMemoryType();
mAllocatorsPerType[memoryType]->DeallocateMemory(allocation);
}
mSubAllocationsToDelete.ClearUpTo(completedSerial);
auto it = mUsedMemoryToDecrement.begin();
while (it != mUsedMemoryToDecrement.end() && it->first <= completedSerial) {
// Track the direct allocation memory as used memory that will be deallocated.
DAWN_ASSERT(mTotalUsedMemory >= it->second);
mTotalUsedMemory -= it->second;
it++;
}
// Erase the map serials up to the completed serial.
mUsedMemoryToDecrement.erase(mUsedMemoryToDecrement.begin(), it);
it = mAllocatedMemoryToDecrement.begin();
while (it != mAllocatedMemoryToDecrement.end() && it->first <= completedSerial) {
// Track the direct allocation memory as used memory that will be deallocated.
DAWN_ASSERT(mTotalAllocatedMemory >= it->second);
mTotalAllocatedMemory -= it->second;
it++;
}
// Erase the map serials up to the completed serial.
mAllocatedMemoryToDecrement.erase(mAllocatedMemoryToDecrement.begin(), it);
}
int ResourceMemoryAllocator::FindBestTypeIndex(VkMemoryRequirements requirements, MemoryKind kind) {
const VulkanDeviceInfo& info = mDevice->GetDeviceInfo();
bool mappable = IsMemoryKindMappable(kind);
VkMemoryPropertyFlags vkRequiredFlags = GetRequiredMemoryPropertyFlags(kind, mappable);
// Find a suitable memory type for this allocation
int bestType = -1;
for (size_t i = 0; i < info.memoryTypes.size(); ++i) {
// Resource must support this memory type
if ((requirements.memoryTypeBits & (1 << i)) == 0) {
continue;
}
// Memory type must have all the required memory properties.
if ((info.memoryTypes[i].propertyFlags & vkRequiredFlags) != vkRequiredFlags) {
continue;
}
// Found the first candidate memory type
if (bestType == -1) {
bestType = static_cast<int>(i);
continue;
}
// For non-mappable resources that can be lazily allocated, favor lazy
// allocation (note: this is a more important property than that of
// device local memory and hence is checked first).
bool currentLazilyAllocated =
(info.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0u;
bool bestLazilyAllocated = (info.memoryTypes[bestType].propertyFlags &
VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) != 0u;
if ((kind == MemoryKind::LazilyAllocated) &&
(currentLazilyAllocated != bestLazilyAllocated)) {
if (currentLazilyAllocated) {
bestType = static_cast<int>(i);
}
continue;
}
// For non-mappable, non-lazily-allocated resources, favor device local
// memory.
bool currentDeviceLocal =
(info.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0u;
bool bestDeviceLocal =
(info.memoryTypes[bestType].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0u;
if (!mappable && (currentDeviceLocal != bestDeviceLocal)) {
if (currentDeviceLocal) {
bestType = static_cast<int>(i);
}
continue;
}
// Cached memory is optimal for read-only access from CPU as host memory accesses to
// uncached memory are slower than to cached memory.
bool currentHostCached =
(info.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0u;
bool bestHostCached =
(info.memoryTypes[bestType].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0u;
if ((kind & MemoryKind::ReadMappable) && currentHostCached != bestHostCached) {
if (currentHostCached) {
bestType = static_cast<int>(i);
}
continue;
}
// All things equal favor the memory in the biggest heap
VkDeviceSize bestTypeHeapSize = info.memoryHeaps[info.memoryTypes[bestType].heapIndex].size;
VkDeviceSize candidateHeapSize = info.memoryHeaps[info.memoryTypes[i].heapIndex].size;
if (candidateHeapSize > bestTypeHeapSize) {
bestType = static_cast<int>(i);
continue;
}
}
return bestType;
}
void ResourceMemoryAllocator::FreeRecycledMemory() {
for (auto& alloc : mAllocatorsPerType) {
alloc->FreeRecycledMemory();
}
}
uint64_t ResourceMemoryAllocator::GetTotalUsedMemory() const {
return mTotalUsedMemory;
}
uint64_t ResourceMemoryAllocator::GetTotalAllocatedMemory() const {
return mTotalAllocatedMemory;
}
} // namespace dawn::native::vulkan