blob: 26db02485eb9399844c58439b203fd9a30cce304 [file] [log] [blame]
// Copyright 2018 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dawn/native/metal/DeviceMTL.h"
#include "dawn/common/GPUInfo.h"
#include "dawn/common/Platform.h"
#include "dawn/native/Adapter.h"
#include "dawn/native/BackendConnection.h"
#include "dawn/native/ChainUtils.h"
#include "dawn/native/Commands.h"
#include "dawn/native/ErrorData.h"
#include "dawn/native/EventManager.h"
#include "dawn/native/metal/BackendMTL.h"
#include "dawn/native/metal/BindGroupLayoutMTL.h"
#include "dawn/native/metal/BindGroupMTL.h"
#include "dawn/native/metal/BufferMTL.h"
#include "dawn/native/metal/CommandBufferMTL.h"
#include "dawn/native/metal/ComputePipelineMTL.h"
#include "dawn/native/metal/PhysicalDeviceMTL.h"
#include "dawn/native/metal/PipelineLayoutMTL.h"
#include "dawn/native/metal/QuerySetMTL.h"
#include "dawn/native/metal/QueueMTL.h"
#include "dawn/native/metal/RenderPipelineMTL.h"
#include "dawn/native/metal/SamplerMTL.h"
#include "dawn/native/metal/ShaderModuleMTL.h"
#include "dawn/native/metal/SharedFenceMTL.h"
#include "dawn/native/metal/SharedTextureMemoryMTL.h"
#include "dawn/native/metal/SwapChainMTL.h"
#include "dawn/native/metal/TextureMTL.h"
#include "dawn/native/metal/UtilsMetal.h"
#include "dawn/platform/DawnPlatform.h"
#include "dawn/platform/tracing/TraceEvent.h"
#include <type_traits>
namespace dawn::native::metal {
struct KalmanInfo {
float filterValue; // The estimation value
float kalmanGain; // The kalman gain
float R; // The covariance of the observation noise
float P; // The a posteriori estimate covariance
};
namespace {
// The time interval for each round of kalman filter
static constexpr uint64_t kFilterIntervalInMs = static_cast<uint64_t>(NSEC_PER_SEC / 10);
// A simplified kalman filter for estimating timestamp period based on measured values
float KalmanFilter(KalmanInfo* info, float measuredValue) {
// Optimize kalman gain
info->kalmanGain = info->P / (info->P + info->R);
// Correct filter value
info->filterValue =
info->kalmanGain * measuredValue + (1.0 - info->kalmanGain) * info->filterValue;
// Update estimate covariance
info->P = (1.0f - info->kalmanGain) * info->P;
return info->filterValue;
}
void API_AVAILABLE(macos(10.15), ios(14)) UpdateTimestampPeriod(id<MTLDevice> device,
KalmanInfo* info,
MTLTimestamp* cpuTimestampStart,
MTLTimestamp* gpuTimestampStart,
float* timestampPeriod) {
// The filter value is converged to an optimal value when the kalman gain is less than
// 0.01. At this time, the weight of the measured value is too small to change the next
// filter value, the sampling and calculations do not need to continue anymore.
if (info->kalmanGain < 0.01f) {
return;
}
MTLTimestamp cpuTimestampEnd = 0, gpuTimestampEnd = 0;
[device sampleTimestamps:&cpuTimestampEnd gpuTimestamp:&gpuTimestampEnd];
// Update the timestamp start values when timestamp reset happens
if (cpuTimestampEnd < *cpuTimestampStart || gpuTimestampEnd < *gpuTimestampStart) {
*cpuTimestampStart = cpuTimestampEnd;
*gpuTimestampStart = gpuTimestampEnd;
return;
}
if (cpuTimestampEnd - *cpuTimestampStart >= kFilterIntervalInMs) {
// The measured timestamp period
float measurement = (cpuTimestampEnd - *cpuTimestampStart) /
static_cast<float>(gpuTimestampEnd - *gpuTimestampStart);
// Measurement update
*timestampPeriod = KalmanFilter(info, measurement);
*cpuTimestampStart = cpuTimestampEnd;
*gpuTimestampStart = gpuTimestampEnd;
}
}
} // namespace
// static
ResultOrError<Ref<Device>> Device::Create(AdapterBase* adapter,
NSPRef<id<MTLDevice>> mtlDevice,
const UnpackedPtr<DeviceDescriptor>& descriptor,
const TogglesState& deviceToggles,
Ref<DeviceBase::DeviceLostEvent>&& lostEvent) {
@autoreleasepool {
Ref<Device> device = AcquireRef(new Device(adapter, std::move(mtlDevice), descriptor,
deviceToggles, std::move(lostEvent)));
DAWN_TRY(device->Initialize(descriptor));
return device;
}
}
Device::Device(AdapterBase* adapter,
NSPRef<id<MTLDevice>> mtlDevice,
const UnpackedPtr<DeviceDescriptor>& descriptor,
const TogglesState& deviceToggles,
Ref<DeviceBase::DeviceLostEvent>&& lostEvent)
: DeviceBase(adapter, descriptor, deviceToggles, std::move(lostEvent)),
mMtlDevice(std::move(mtlDevice)) {
// On macOS < 11.0, we only can check whether counter sampling is supported, and the counter
// only can be sampled between command boundary using sampleCountersInBuffer API if it's
// supported.
if (@available(macOS 11.0, iOS 14.0, *)) {
mCounterSamplingAtCommandBoundary = SupportCounterSamplingAtCommandBoundary(GetMTLDevice());
mCounterSamplingAtStageBoundary = SupportCounterSamplingAtStageBoundary(GetMTLDevice());
} else {
mCounterSamplingAtCommandBoundary = true;
mCounterSamplingAtStageBoundary = false;
}
mIsTimestampQueryEnabled = HasFeature(Feature::TimestampQuery) ||
HasFeature(Feature::ChromiumExperimentalTimestampQueryInsidePasses);
}
Device::~Device() {
Destroy();
}
MaybeError Device::Initialize(const UnpackedPtr<DeviceDescriptor>& descriptor) {
Ref<Queue> queue;
DAWN_TRY_ASSIGN(queue, Queue::Create(this, &descriptor->defaultQueue));
if (mIsTimestampQueryEnabled && !IsToggleEnabled(Toggle::DisableTimestampQueryConversion)) {
// Make a best guess of timestamp period based on device vendor info, and converge it to
// an accurate value by the following calculations.
mTimestampPeriod = gpu_info::IsIntel(GetPhysicalDevice()->GetVendorId()) ? 83.333f : 1.0f;
// Initialize kalman filter parameters
mKalmanInfo = std::make_unique<KalmanInfo>();
mKalmanInfo->filterValue = 0.0f;
mKalmanInfo->kalmanGain = 0.5f;
mKalmanInfo->R = 0.0001f; // The smaller this value is, the smaller the error of measured
// value is, the more we can trust the measured value.
mKalmanInfo->P = 1.0f;
if (@available(macOS 10.15, iOS 14.0, *)) {
// Sample CPU timestamp and GPU timestamp for first time at device creation
[*mMtlDevice sampleTimestamps:&mCpuTimestamp gpuTimestamp:&mGpuTimestamp];
}
}
return DeviceBase::Initialize(std::move(queue));
}
ResultOrError<Ref<BindGroupBase>> Device::CreateBindGroupImpl(
const BindGroupDescriptor* descriptor) {
return BindGroup::Create(this, descriptor);
}
ResultOrError<Ref<BindGroupLayoutInternalBase>> Device::CreateBindGroupLayoutImpl(
const BindGroupLayoutDescriptor* descriptor) {
return BindGroupLayout::Create(this, descriptor);
}
ResultOrError<Ref<BufferBase>> Device::CreateBufferImpl(
const UnpackedPtr<BufferDescriptor>& descriptor) {
return Buffer::Create(this, descriptor);
}
ResultOrError<Ref<CommandBufferBase>> Device::CreateCommandBuffer(
CommandEncoder* encoder,
const CommandBufferDescriptor* descriptor) {
return CommandBuffer::Create(encoder, descriptor);
}
Ref<ComputePipelineBase> Device::CreateUninitializedComputePipelineImpl(
const UnpackedPtr<ComputePipelineDescriptor>& descriptor) {
return ComputePipeline::CreateUninitialized(this, descriptor);
}
ResultOrError<Ref<PipelineLayoutBase>> Device::CreatePipelineLayoutImpl(
const UnpackedPtr<PipelineLayoutDescriptor>& descriptor) {
return PipelineLayout::Create(this, descriptor);
}
ResultOrError<Ref<QuerySetBase>> Device::CreateQuerySetImpl(const QuerySetDescriptor* descriptor) {
return QuerySet::Create(this, descriptor);
}
Ref<RenderPipelineBase> Device::CreateUninitializedRenderPipelineImpl(
const UnpackedPtr<RenderPipelineDescriptor>& descriptor) {
return RenderPipeline::CreateUninitialized(this, descriptor);
}
ResultOrError<Ref<SamplerBase>> Device::CreateSamplerImpl(const SamplerDescriptor* descriptor) {
return Sampler::Create(this, descriptor);
}
ResultOrError<Ref<ShaderModuleBase>> Device::CreateShaderModuleImpl(
const UnpackedPtr<ShaderModuleDescriptor>& descriptor,
ShaderModuleParseResult* parseResult,
OwnedCompilationMessages* compilationMessages) {
return ShaderModule::Create(this, descriptor, parseResult, compilationMessages);
}
ResultOrError<Ref<SwapChainBase>> Device::CreateSwapChainImpl(Surface* surface,
SwapChainBase* previousSwapChain,
const SurfaceConfiguration* config) {
return SwapChain::Create(this, surface, previousSwapChain, config);
}
ResultOrError<Ref<TextureBase>> Device::CreateTextureImpl(
const UnpackedPtr<TextureDescriptor>& descriptor) {
return Texture::Create(this, descriptor);
}
ResultOrError<Ref<TextureViewBase>> Device::CreateTextureViewImpl(
TextureBase* texture,
const UnpackedPtr<TextureViewDescriptor>& descriptor) {
return TextureView::Create(texture, descriptor);
}
void Device::InitializeComputePipelineAsyncImpl(Ref<CreateComputePipelineAsyncEvent> event) {
PhysicalDevice* physicalDevice = ToBackend(GetPhysicalDevice());
if (physicalDevice->IsMetalValidationEnabled() &&
gpu_info::IsAMD(physicalDevice->GetVendorId())) {
event->InitializeSync();
return;
}
event->InitializeAsync();
}
void Device::InitializeRenderPipelineAsyncImpl(Ref<CreateRenderPipelineAsyncEvent> event) {
PhysicalDevice* physicalDevice = ToBackend(GetPhysicalDevice());
if (physicalDevice->IsMetalValidationEnabled() &&
gpu_info::IsAMD(physicalDevice->GetVendorId())) {
event->InitializeSync();
return;
}
event->InitializeAsync();
}
ResultOrError<wgpu::TextureUsage> Device::GetSupportedSurfaceUsageImpl(
const Surface* surface) const {
wgpu::TextureUsage usages = wgpu::TextureUsage::RenderAttachment |
wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopySrc |
wgpu::TextureUsage::CopyDst;
return usages;
}
ResultOrError<Ref<SharedTextureMemoryBase>> Device::ImportSharedTextureMemoryImpl(
const SharedTextureMemoryDescriptor* baseDescriptor) {
UnpackedPtr<SharedTextureMemoryDescriptor> unpacked;
DAWN_TRY_ASSIGN(unpacked, ValidateAndUnpack(baseDescriptor));
wgpu::SType type;
DAWN_TRY_ASSIGN(type,
(unpacked.ValidateBranches<Branch<SharedTextureMemoryIOSurfaceDescriptor>>()));
DAWN_ASSERT(type == wgpu::SType::SharedTextureMemoryIOSurfaceDescriptor);
const auto* descriptor = unpacked.Get<SharedTextureMemoryIOSurfaceDescriptor>();
DAWN_ASSERT(descriptor != nullptr);
DAWN_INVALID_IF(!HasFeature(Feature::SharedTextureMemoryIOSurface), "%s is not enabled.",
wgpu::FeatureName::SharedTextureMemoryIOSurface);
return SharedTextureMemory::Create(this, baseDescriptor->label, descriptor);
}
ResultOrError<Ref<SharedFenceBase>> Device::ImportSharedFenceImpl(
const SharedFenceDescriptor* baseDescriptor) {
UnpackedPtr<SharedFenceDescriptor> unpacked;
DAWN_TRY_ASSIGN(unpacked, ValidateAndUnpack(baseDescriptor));
wgpu::SType type;
DAWN_TRY_ASSIGN(type,
(unpacked.ValidateBranches<Branch<SharedFenceMTLSharedEventDescriptor>>()));
DAWN_ASSERT(type == wgpu::SType::SharedFenceMTLSharedEventDescriptor);
const auto* descriptor = unpacked.Get<SharedFenceMTLSharedEventDescriptor>();
DAWN_ASSERT(descriptor != nullptr);
DAWN_INVALID_IF(!HasFeature(Feature::SharedFenceMTLSharedEvent), "%s is not enabled.",
wgpu::FeatureName::SharedFenceMTLSharedEvent);
if (@available(macOS 10.14, ios 12.0, *)) {
return SharedFence::Create(this, baseDescriptor->label, descriptor);
}
DAWN_UNREACHABLE();
}
MaybeError Device::TickImpl() {
DAWN_TRY(ToBackend(GetQueue())->SubmitPendingCommandBuffer());
// Just run timestamp period calculation when timestamp feature is enabled and timestamp
// conversion is not disabled.
if (mIsTimestampQueryEnabled && !IsToggleEnabled(Toggle::DisableTimestampQueryConversion)) {
if (@available(macOS 10.15, iOS 14.0, *)) {
UpdateTimestampPeriod(GetMTLDevice(), mKalmanInfo.get(), &mCpuTimestamp, &mGpuTimestamp,
&mTimestampPeriod);
}
}
return {};
}
id<MTLDevice> Device::GetMTLDevice() const {
return mMtlDevice.Get();
}
MaybeError Device::CopyFromStagingToBufferImpl(BufferBase* source,
uint64_t sourceOffset,
BufferBase* destination,
uint64_t destinationOffset,
uint64_t size) {
// Metal validation layers forbid 0-sized copies, assert it is skipped prior to calling
// this function.
DAWN_ASSERT(size != 0);
ToBackend(destination)
->EnsureDataInitializedAsDestination(
ToBackend(GetQueue())->GetPendingCommandContext(QueueBase::SubmitMode::Passive),
destinationOffset, size);
id<MTLBuffer> uploadBuffer = ToBackend(source)->GetMTLBuffer();
Buffer* buffer = ToBackend(destination);
buffer->TrackUsage();
[ToBackend(GetQueue())->GetPendingCommandContext(QueueBase::SubmitMode::Passive)->EnsureBlit()
copyFromBuffer:uploadBuffer
sourceOffset:sourceOffset
toBuffer:buffer->GetMTLBuffer()
destinationOffset:destinationOffset
size:size];
return {};
}
// In Metal we don't write from the CPU to the texture directly which can be done using the
// replaceRegion function, because the function requires a non-private storage mode and Dawn
// sets the private storage mode by default for all textures except IOSurfaces on macOS.
MaybeError Device::CopyFromStagingToTextureImpl(const BufferBase* source,
const TextureDataLayout& dataLayout,
const TextureCopy& dst,
const Extent3D& copySizePixels) {
Texture* texture = ToBackend(dst.texture.Get());
texture->SynchronizeTextureBeforeUse(ToBackend(GetQueue())->GetPendingCommandContext());
DAWN_TRY(EnsureDestinationTextureInitialized(
ToBackend(GetQueue())->GetPendingCommandContext(QueueBase::SubmitMode::Passive), texture,
dst, copySizePixels));
RecordCopyBufferToTexture(
ToBackend(GetQueue())->GetPendingCommandContext(QueueBase::SubmitMode::Passive),
ToBackend(source)->GetMTLBuffer(), source->GetSize(), dataLayout.offset,
dataLayout.bytesPerRow, dataLayout.rowsPerImage, texture, dst.mipLevel, dst.origin,
dst.aspect, copySizePixels);
return {};
}
void Device::DestroyImpl() {
DAWN_ASSERT(GetState() == State::Disconnected);
// TODO(crbug.com/dawn/831): DestroyImpl is called from two places.
// - It may be called if the device is explicitly destroyed with APIDestroy.
// This case is NOT thread-safe and needs proper synchronization with other
// simultaneous uses of the device.
// - It may be called when the last ref to the device is dropped and the device
// is implicitly destroyed. This case is thread-safe because there are no
// other threads using the device since there are no other live refs.
mMtlDevice = nullptr;
mMockBlitMtlBuffer = nullptr;
}
uint32_t Device::GetOptimalBytesPerRowAlignment() const {
return 1;
}
uint64_t Device::GetOptimalBufferToTextureCopyOffsetAlignment() const {
return 1;
}
float Device::GetTimestampPeriodInNS() const {
return mTimestampPeriod;
}
bool Device::IsResolveTextureBlitWithDrawSupported() const {
return true;
}
bool Device::UseCounterSamplingAtCommandBoundary() const {
return mCounterSamplingAtCommandBoundary;
}
bool Device::UseCounterSamplingAtStageBoundary() const {
return mCounterSamplingAtStageBoundary;
}
id<MTLBuffer> Device::GetMockBlitMtlBuffer() {
if (mMockBlitMtlBuffer == nullptr) {
mMockBlitMtlBuffer.Acquire(
[GetMTLDevice() newBufferWithLength:1 options:MTLResourceStorageModePrivate]);
}
return mMockBlitMtlBuffer.Get();
}
} // namespace dawn::native::metal