blob: dd111afe2a7f471e4e3bb3c83a50f830aaf5daec [file] [log] [blame]
// Copyright 2020 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn/native/d3d12/ShaderVisibleDescriptorAllocatorD3D12.h"
#include <algorithm>
#include <limits>
#include <utility>
#include "dawn/native/d3d/D3DError.h"
#include "dawn/native/d3d12/DeviceD3D12.h"
#include "dawn/native/d3d12/GPUDescriptorHeapAllocationD3D12.h"
#include "dawn/native/d3d12/ResidencyManagerD3D12.h"
namespace dawn::native::d3d12 {
// Limits the min/max heap size to always be some known value for testing.
// Thresholds should be adjusted (lower == faster) to avoid tests taking too long to complete.
// We change the value from {1024, 512} to {32, 16} because we use blending
// for D3D12DescriptorHeapTests.EncodeManyUBO and R16Float has limited range
// and low precision at big integer.
static constexpr const uint32_t kShaderVisibleSmallHeapSizes[] = {32, 16};
uint32_t GetD3D12ShaderVisibleHeapMinSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
if (useSmallSize) {
return kShaderVisibleSmallHeapSizes[heapType];
}
// Minimum heap size must be large enough to satisfy the largest descriptor allocation
// request and to amortize the cost of sub-allocation. But small enough to avoid wasting
// memory should only a tiny fraction ever be used.
// TODO(dawn:155): Figure out these values.
switch (heapType) {
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
return 4096;
case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
return 256;
default:
UNREACHABLE();
}
}
uint32_t GetD3D12ShaderVisibleHeapMaxSize(D3D12_DESCRIPTOR_HEAP_TYPE heapType, bool useSmallSize) {
if (useSmallSize) {
return kShaderVisibleSmallHeapSizes[heapType];
}
switch (heapType) {
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
return D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1;
case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
return D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
default:
UNREACHABLE();
}
}
D3D12_DESCRIPTOR_HEAP_FLAGS GetD3D12HeapFlags(D3D12_DESCRIPTOR_HEAP_TYPE heapType) {
switch (heapType) {
case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
return D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
default:
UNREACHABLE();
}
}
// static
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorAllocator>>
ShaderVisibleDescriptorAllocator::Create(Device* device, D3D12_DESCRIPTOR_HEAP_TYPE heapType) {
std::unique_ptr<ShaderVisibleDescriptorAllocator> allocator =
std::make_unique<ShaderVisibleDescriptorAllocator>(device, heapType);
DAWN_TRY(allocator->AllocateAndSwitchShaderVisibleHeap());
return std::move(allocator);
}
ShaderVisibleDescriptorAllocator::ShaderVisibleDescriptorAllocator(
Device* device,
D3D12_DESCRIPTOR_HEAP_TYPE heapType)
: mHeapType(heapType),
mDevice(device),
mSizeIncrement(device->GetD3D12Device()->GetDescriptorHandleIncrementSize(heapType)),
mDescriptorCount(GetD3D12ShaderVisibleHeapMinSize(
heapType,
mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting))) {
ASSERT(heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV ||
heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
}
bool ShaderVisibleDescriptorAllocator::AllocateGPUDescriptors(
uint32_t descriptorCount,
ExecutionSerial pendingSerial,
D3D12_CPU_DESCRIPTOR_HANDLE* baseCPUDescriptor,
GPUDescriptorHeapAllocation* allocation) {
ASSERT(mHeap != nullptr);
const uint64_t startOffset = mAllocator.Allocate(descriptorCount, pendingSerial);
if (startOffset == RingBufferAllocator::kInvalidOffset) {
return false;
}
ID3D12DescriptorHeap* descriptorHeap = mHeap->GetD3D12DescriptorHeap();
const uint64_t heapOffset = mSizeIncrement * startOffset;
// Check for 32-bit overflow since CPU heap start handle uses size_t.
const size_t cpuHeapStartPtr = descriptorHeap->GetCPUDescriptorHandleForHeapStart().ptr;
ASSERT(heapOffset <= std::numeric_limits<size_t>::max() - cpuHeapStartPtr);
*baseCPUDescriptor = {cpuHeapStartPtr + static_cast<size_t>(heapOffset)};
const D3D12_GPU_DESCRIPTOR_HANDLE baseGPUDescriptor = {
descriptorHeap->GetGPUDescriptorHandleForHeapStart().ptr + heapOffset};
// Record both the device and heap serials to determine later if the allocations are
// still valid.
*allocation = GPUDescriptorHeapAllocation{baseGPUDescriptor, pendingSerial, mHeapSerial};
return true;
}
ID3D12DescriptorHeap* ShaderVisibleDescriptorAllocator::GetShaderVisibleHeap() const {
return mHeap->GetD3D12DescriptorHeap();
}
void ShaderVisibleDescriptorAllocator::Tick(ExecutionSerial completedSerial) {
mAllocator.Deallocate(completedSerial);
}
ResultOrError<std::unique_ptr<ShaderVisibleDescriptorHeap>>
ShaderVisibleDescriptorAllocator::AllocateHeap(uint32_t descriptorCount) const {
// The size in bytes of a descriptor heap is best calculated by the increment size
// multiplied by the number of descriptors. In practice, this is only an estimate and
// the actual size may vary depending on the driver.
const uint64_t kSize = mSizeIncrement * descriptorCount;
DAWN_TRY(mDevice->GetResidencyManager()->EnsureCanAllocate(kSize, MemorySegment::Local));
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap;
D3D12_DESCRIPTOR_HEAP_DESC heapDescriptor;
heapDescriptor.Type = mHeapType;
heapDescriptor.NumDescriptors = descriptorCount;
heapDescriptor.Flags = GetD3D12HeapFlags(mHeapType);
heapDescriptor.NodeMask = 0;
DAWN_TRY(CheckOutOfMemoryHRESULT(mDevice->GetD3D12Device()->CreateDescriptorHeap(
&heapDescriptor, IID_PPV_ARGS(&d3d12DescriptorHeap)),
"ID3D12Device::CreateDescriptorHeap"));
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap =
std::make_unique<ShaderVisibleDescriptorHeap>(std::move(d3d12DescriptorHeap), kSize);
// We must track the allocation in the LRU when it is created, otherwise the residency
// manager will see the allocation as non-resident in the later call to LockAllocation.
mDevice->GetResidencyManager()->TrackResidentAllocation(descriptorHeap.get());
return std::move(descriptorHeap);
}
// Creates a GPU descriptor heap that manages descriptors in a FIFO queue.
MaybeError ShaderVisibleDescriptorAllocator::AllocateAndSwitchShaderVisibleHeap() {
std::unique_ptr<ShaderVisibleDescriptorHeap> descriptorHeap;
// Dynamically allocate using a two-phase allocation strategy.
// The first phase increasingly grows a small heap in binary sizes for light users while the
// second phase pool-allocates largest sized heaps for heavy users.
if (mHeap != nullptr) {
mDevice->GetResidencyManager()->UnlockAllocation(mHeap.get());
const uint32_t maxDescriptorCount = GetD3D12ShaderVisibleHeapMaxSize(
mHeapType, mDevice->IsToggleEnabled(Toggle::UseD3D12SmallShaderVisibleHeapForTesting));
if (mDescriptorCount < maxDescriptorCount) {
// Phase #1. Grow the heaps in powers-of-two.
mDevice->ReferenceUntilUnused(mHeap->GetD3D12DescriptorHeap());
mDescriptorCount = std::min(mDescriptorCount * 2, maxDescriptorCount);
} else {
// Phase #2. Pool-allocate heaps.
// Return the switched out heap to the pool and retrieve the oldest heap that is no
// longer used by GPU. This maintains a heap buffer to avoid frequently re-creating
// heaps for heavy users.
// TODO(dawn:256): Consider periodically triming to avoid OOM.
mPool.push_back({mDevice->GetPendingCommandSerial(), std::move(mHeap)});
if (mPool.front().heapSerial <= mDevice->GetCompletedCommandSerial()) {
descriptorHeap = std::move(mPool.front().heap);
mPool.pop_front();
}
}
}
if (descriptorHeap == nullptr) {
DAWN_TRY_ASSIGN(descriptorHeap, AllocateHeap(mDescriptorCount));
}
DAWN_TRY(mDevice->GetResidencyManager()->LockAllocation(descriptorHeap.get()));
// Create a FIFO buffer from the recently created heap.
mHeap = std::move(descriptorHeap);
mAllocator = RingBufferAllocator(mDescriptorCount);
// Invalidate all bindgroup allocations on previously bound heaps by incrementing the heap
// serial. When a bindgroup attempts to re-populate, it will compare with its recorded
// heap serial.
mHeapSerial++;
return {};
}
HeapVersionID ShaderVisibleDescriptorAllocator::GetShaderVisibleHeapSerialForTesting() const {
return mHeapSerial;
}
uint64_t ShaderVisibleDescriptorAllocator::GetShaderVisibleHeapSizeForTesting() const {
return mAllocator.GetSize();
}
uint64_t ShaderVisibleDescriptorAllocator::GetShaderVisiblePoolSizeForTesting() const {
return mPool.size();
}
bool ShaderVisibleDescriptorAllocator::IsShaderVisibleHeapLockedResidentForTesting() const {
return mHeap->IsResidencyLocked();
}
bool ShaderVisibleDescriptorAllocator::IsLastShaderVisibleHeapInLRUForTesting() const {
ASSERT(!mPool.empty());
return mPool.back().heap->IsInResidencyLRUCache();
}
bool ShaderVisibleDescriptorAllocator::IsAllocationStillValid(
const GPUDescriptorHeapAllocation& allocation) const {
// Descriptor allocations are only valid for the serial they were created for and are
// re-allocated every submit. For this reason, we view any descriptors allocated prior to the
// pending submit as invalid. We must also verify the descriptor heap has not switched (because
// a larger descriptor heap was needed).
return (allocation.GetLastUsageSerial() == mDevice->GetPendingCommandSerial() &&
allocation.GetHeapSerial() == mHeapSerial);
}
ShaderVisibleDescriptorHeap::ShaderVisibleDescriptorHeap(
ComPtr<ID3D12DescriptorHeap> d3d12DescriptorHeap,
uint64_t size)
: Pageable(d3d12DescriptorHeap, MemorySegment::Local, size),
mD3d12DescriptorHeap(std::move(d3d12DescriptorHeap)) {}
ID3D12DescriptorHeap* ShaderVisibleDescriptorHeap::GetD3D12DescriptorHeap() const {
return mD3d12DescriptorHeap.Get();
}
} // namespace dawn::native::d3d12