| // Copyright 2018 The Dawn Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "dawn_native/metal/DeviceMTL.h" |
| |
| #include "common/GPUInfo.h" |
| #include "common/Platform.h" |
| #include "dawn_native/BackendConnection.h" |
| #include "dawn_native/BindGroupLayout.h" |
| #include "dawn_native/Commands.h" |
| #include "dawn_native/ErrorData.h" |
| #include "dawn_native/metal/BindGroupLayoutMTL.h" |
| #include "dawn_native/metal/BindGroupMTL.h" |
| #include "dawn_native/metal/BufferMTL.h" |
| #include "dawn_native/metal/CommandBufferMTL.h" |
| #include "dawn_native/metal/ComputePipelineMTL.h" |
| #include "dawn_native/metal/PipelineLayoutMTL.h" |
| #include "dawn_native/metal/QuerySetMTL.h" |
| #include "dawn_native/metal/QueueMTL.h" |
| #include "dawn_native/metal/RenderPipelineMTL.h" |
| #include "dawn_native/metal/SamplerMTL.h" |
| #include "dawn_native/metal/ShaderModuleMTL.h" |
| #include "dawn_native/metal/StagingBufferMTL.h" |
| #include "dawn_native/metal/SwapChainMTL.h" |
| #include "dawn_native/metal/TextureMTL.h" |
| #include "dawn_native/metal/UtilsMetal.h" |
| #include "dawn_platform/DawnPlatform.h" |
| #include "dawn_platform/tracing/TraceEvent.h" |
| |
| #include <type_traits> |
| |
| namespace dawn_native { namespace metal { |
| |
| // static |
| ResultOrError<Device*> Device::Create(AdapterBase* adapter, |
| NSPRef<id<MTLDevice>> mtlDevice, |
| const DeviceDescriptor* descriptor) { |
| Ref<Device> device = AcquireRef(new Device(adapter, std::move(mtlDevice), descriptor)); |
| DAWN_TRY(device->Initialize()); |
| return device.Detach(); |
| } |
| |
| Device::Device(AdapterBase* adapter, |
| NSPRef<id<MTLDevice>> mtlDevice, |
| const DeviceDescriptor* descriptor) |
| : DeviceBase(adapter, descriptor), mMtlDevice(std::move(mtlDevice)), mCompletedSerial(0) { |
| } |
| |
| Device::~Device() { |
| ShutDownBase(); |
| } |
| |
| MaybeError Device::Initialize() { |
| InitTogglesFromDriver(); |
| |
| if (!IsRobustnessEnabled()) { |
| ForceSetToggle(Toggle::MetalEnableVertexPulling, false); |
| } |
| |
| mCommandQueue.Acquire([*mMtlDevice newCommandQueue]); |
| |
| return DeviceBase::Initialize(new Queue(this)); |
| } |
| |
| void Device::InitTogglesFromDriver() { |
| { |
| bool haveStoreAndMSAAResolve = false; |
| #if defined(DAWN_PLATFORM_MACOS) |
| if (@available(macOS 10.12, *)) { |
| haveStoreAndMSAAResolve = |
| [*mMtlDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v2]; |
| } |
| #elif defined(DAWN_PLATFORM_IOS) |
| haveStoreAndMSAAResolve = |
| [*mMtlDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v2]; |
| #endif |
| // On tvOS, we would need MTLFeatureSet_tvOS_GPUFamily2_v1. |
| SetToggle(Toggle::EmulateStoreAndMSAAResolve, !haveStoreAndMSAAResolve); |
| |
| bool haveSamplerCompare = true; |
| #if defined(DAWN_PLATFORM_IOS) |
| haveSamplerCompare = [*mMtlDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]; |
| #endif |
| // TODO(crbug.com/dawn/342): Investigate emulation -- possibly expensive. |
| SetToggle(Toggle::MetalDisableSamplerCompare, !haveSamplerCompare); |
| |
| bool haveBaseVertexBaseInstance = true; |
| #if defined(DAWN_PLATFORM_IOS) |
| haveBaseVertexBaseInstance = |
| [*mMtlDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]; |
| #endif |
| // TODO(crbug.com/dawn/343): Investigate emulation. |
| SetToggle(Toggle::DisableBaseVertex, !haveBaseVertexBaseInstance); |
| SetToggle(Toggle::DisableBaseInstance, !haveBaseVertexBaseInstance); |
| } |
| |
| // TODO(jiawei.shao@intel.com): tighten this workaround when the driver bug is fixed. |
| SetToggle(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true); |
| |
| // TODO(hao.x.li@intel.com): Use MTLStorageModeShared instead of MTLStorageModePrivate when |
| // creating MTLCounterSampleBuffer in QuerySet on Intel platforms, otherwise it fails to |
| // create the buffer. Change to use MTLStorageModePrivate when the bug is fixed. |
| if (@available(macOS 10.15, iOS 14.0, *)) { |
| bool useSharedMode = gpu_info::IsIntel(this->GetAdapter()->GetPCIInfo().vendorId); |
| SetToggle(Toggle::MetalUseSharedModeForCounterSampleBuffer, useSharedMode); |
| } |
| } |
| |
| ResultOrError<Ref<BindGroupBase>> Device::CreateBindGroupImpl( |
| const BindGroupDescriptor* descriptor) { |
| return BindGroup::Create(this, descriptor); |
| } |
| ResultOrError<Ref<BindGroupLayoutBase>> Device::CreateBindGroupLayoutImpl( |
| const BindGroupLayoutDescriptor* descriptor) { |
| return BindGroupLayout::Create(this, descriptor); |
| } |
| ResultOrError<Ref<BufferBase>> Device::CreateBufferImpl(const BufferDescriptor* descriptor) { |
| return Buffer::Create(this, descriptor); |
| } |
| ResultOrError<Ref<CommandBufferBase>> Device::CreateCommandBuffer( |
| CommandEncoder* encoder, |
| const CommandBufferDescriptor* descriptor) { |
| return CommandBuffer::Create(encoder, descriptor); |
| } |
| ResultOrError<Ref<ComputePipelineBase>> Device::CreateComputePipelineImpl( |
| const ComputePipelineDescriptor* descriptor) { |
| return ComputePipeline::Create(this, descriptor); |
| } |
| ResultOrError<Ref<PipelineLayoutBase>> Device::CreatePipelineLayoutImpl( |
| const PipelineLayoutDescriptor* descriptor) { |
| return PipelineLayout::Create(this, descriptor); |
| } |
| ResultOrError<Ref<QuerySetBase>> Device::CreateQuerySetImpl( |
| const QuerySetDescriptor* descriptor) { |
| return QuerySet::Create(this, descriptor); |
| } |
| ResultOrError<Ref<RenderPipelineBase>> Device::CreateRenderPipelineImpl( |
| const RenderPipelineDescriptor2* descriptor) { |
| return RenderPipeline::Create(this, descriptor); |
| } |
| ResultOrError<Ref<SamplerBase>> Device::CreateSamplerImpl(const SamplerDescriptor* descriptor) { |
| return Sampler::Create(this, descriptor); |
| } |
| ResultOrError<Ref<ShaderModuleBase>> Device::CreateShaderModuleImpl( |
| const ShaderModuleDescriptor* descriptor, |
| ShaderModuleParseResult* parseResult) { |
| return ShaderModule::Create(this, descriptor, parseResult); |
| } |
| ResultOrError<Ref<SwapChainBase>> Device::CreateSwapChainImpl( |
| const SwapChainDescriptor* descriptor) { |
| return OldSwapChain::Create(this, descriptor); |
| } |
| ResultOrError<Ref<NewSwapChainBase>> Device::CreateSwapChainImpl( |
| Surface* surface, |
| NewSwapChainBase* previousSwapChain, |
| const SwapChainDescriptor* descriptor) { |
| return SwapChain::Create(this, surface, previousSwapChain, descriptor); |
| } |
| ResultOrError<Ref<TextureBase>> Device::CreateTextureImpl(const TextureDescriptor* descriptor) { |
| return Texture::Create(this, descriptor); |
| } |
| ResultOrError<Ref<TextureViewBase>> Device::CreateTextureViewImpl( |
| TextureBase* texture, |
| const TextureViewDescriptor* descriptor) { |
| return TextureView::Create(texture, descriptor); |
| } |
| |
| ResultOrError<ExecutionSerial> Device::CheckAndUpdateCompletedSerials() { |
| uint64_t frontendCompletedSerial{GetCompletedCommandSerial()}; |
| if (frontendCompletedSerial > mCompletedSerial) { |
| // sometimes we increase the serials, in which case the completed serial in |
| // the device base will surpass the completed serial we have in the metal backend, so we |
| // must update ours when we see that the completed serial from device base has |
| // increased. |
| mCompletedSerial = frontendCompletedSerial; |
| } |
| return ExecutionSerial(mCompletedSerial.load()); |
| } |
| |
| MaybeError Device::TickImpl() { |
| if (mCommandContext.GetCommands() != nullptr) { |
| SubmitPendingCommandBuffer(); |
| } |
| |
| return {}; |
| } |
| |
| id<MTLDevice> Device::GetMTLDevice() { |
| return mMtlDevice.Get(); |
| } |
| |
| id<MTLCommandQueue> Device::GetMTLQueue() { |
| return mCommandQueue.Get(); |
| } |
| |
| CommandRecordingContext* Device::GetPendingCommandContext() { |
| if (mCommandContext.GetCommands() == nullptr) { |
| TRACE_EVENT0(GetPlatform(), General, "[MTLCommandQueue commandBuffer]"); |
| // The MTLCommandBuffer will be autoreleased by default. |
| // The autorelease pool may drain before the command buffer is submitted. Retain so it |
| // stays alive. |
| mCommandContext = CommandRecordingContext([*mCommandQueue commandBuffer]); |
| } |
| return &mCommandContext; |
| } |
| |
| void Device::SubmitPendingCommandBuffer() { |
| if (mCommandContext.GetCommands() == nullptr) { |
| return; |
| } |
| |
| IncrementLastSubmittedCommandSerial(); |
| |
| // Acquire the pending command buffer, which is retained. It must be released later. |
| NSPRef<id<MTLCommandBuffer>> pendingCommands = mCommandContext.AcquireCommands(); |
| |
| // Replace mLastSubmittedCommands with the mutex held so we avoid races between the |
| // schedule handler and this code. |
| { |
| std::lock_guard<std::mutex> lock(mLastSubmittedCommandsMutex); |
| mLastSubmittedCommands = pendingCommands; |
| } |
| |
| // Make a local copy of the pointer to the commands because it's not clear how ObjC blocks |
| // handle types with copy / move constructors being referenced in the block.. |
| id<MTLCommandBuffer> pendingCommandsPointer = pendingCommands.Get(); |
| [*pendingCommands addScheduledHandler:^(id<MTLCommandBuffer>) { |
| // This is DRF because we hold the mutex for mLastSubmittedCommands and pendingCommands |
| // is a local value (and not the member itself). |
| std::lock_guard<std::mutex> lock(mLastSubmittedCommandsMutex); |
| if (this->mLastSubmittedCommands.Get() == pendingCommandsPointer) { |
| this->mLastSubmittedCommands = nullptr; |
| } |
| }]; |
| |
| // Update the completed serial once the completed handler is fired. Make a local copy of |
| // mLastSubmittedSerial so it is captured by value. |
| ExecutionSerial pendingSerial = GetLastSubmittedCommandSerial(); |
| // this ObjC block runs on a different thread |
| [*pendingCommands addCompletedHandler:^(id<MTLCommandBuffer>) { |
| TRACE_EVENT_ASYNC_END0(GetPlatform(), GPUWork, "DeviceMTL::SubmitPendingCommandBuffer", |
| uint64_t(pendingSerial)); |
| ASSERT(uint64_t(pendingSerial) > mCompletedSerial.load()); |
| this->mCompletedSerial = uint64_t(pendingSerial); |
| }]; |
| |
| TRACE_EVENT_ASYNC_BEGIN0(GetPlatform(), GPUWork, "DeviceMTL::SubmitPendingCommandBuffer", |
| uint64_t(pendingSerial)); |
| [*pendingCommands commit]; |
| } |
| |
| ResultOrError<std::unique_ptr<StagingBufferBase>> Device::CreateStagingBuffer(size_t size) { |
| std::unique_ptr<StagingBufferBase> stagingBuffer = |
| std::make_unique<StagingBuffer>(size, this); |
| DAWN_TRY(stagingBuffer->Initialize()); |
| return std::move(stagingBuffer); |
| } |
| |
| MaybeError Device::CopyFromStagingToBuffer(StagingBufferBase* source, |
| uint64_t sourceOffset, |
| BufferBase* destination, |
| uint64_t destinationOffset, |
| uint64_t size) { |
| // Metal validation layers forbid 0-sized copies, assert it is skipped prior to calling |
| // this function. |
| ASSERT(size != 0); |
| |
| ToBackend(destination) |
| ->EnsureDataInitializedAsDestination(GetPendingCommandContext(), destinationOffset, |
| size); |
| |
| id<MTLBuffer> uploadBuffer = ToBackend(source)->GetBufferHandle(); |
| id<MTLBuffer> buffer = ToBackend(destination)->GetMTLBuffer(); |
| [GetPendingCommandContext()->EnsureBlit() copyFromBuffer:uploadBuffer |
| sourceOffset:sourceOffset |
| toBuffer:buffer |
| destinationOffset:destinationOffset |
| size:size]; |
| return {}; |
| } |
| |
| // In Metal we don't write from the CPU to the texture directly which can be done using the |
| // replaceRegion function, because the function requires a non-private storage mode and Dawn |
| // sets the private storage mode by default for all textures except IOSurfaces on macOS. |
| MaybeError Device::CopyFromStagingToTexture(const StagingBufferBase* source, |
| const TextureDataLayout& dataLayout, |
| TextureCopy* dst, |
| const Extent3D& copySizePixels) { |
| Texture* texture = ToBackend(dst->texture.Get()); |
| |
| // This function assumes data is perfectly aligned. Otherwise, it might be necessary |
| // to split copying to several stages: see ComputeTextureBufferCopySplit. |
| const TexelBlockInfo& blockInfo = texture->GetFormat().GetAspectInfo(dst->aspect).block; |
| ASSERT(dataLayout.rowsPerImage == copySizePixels.height / blockInfo.height); |
| ASSERT(dataLayout.bytesPerRow == |
| copySizePixels.width / blockInfo.width * blockInfo.byteSize); |
| |
| EnsureDestinationTextureInitialized(texture, *dst, copySizePixels); |
| |
| // Metal validation layer requires that if the texture's pixel format is a compressed |
| // format, the sourceSize must be a multiple of the pixel format's block size or be |
| // clamped to the edge of the texture if the block extends outside the bounds of a |
| // texture. |
| const Extent3D clampedSize = |
| texture->ClampToMipLevelVirtualSize(dst->mipLevel, dst->origin, copySizePixels); |
| const uint32_t copyBaseLayer = dst->origin.z; |
| const uint32_t copyLayerCount = copySizePixels.depthOrArrayLayers; |
| const uint64_t bytesPerImage = dataLayout.rowsPerImage * dataLayout.bytesPerRow; |
| |
| MTLBlitOption blitOption = ComputeMTLBlitOption(texture->GetFormat(), dst->aspect); |
| |
| uint64_t bufferOffset = dataLayout.offset; |
| for (uint32_t copyLayer = copyBaseLayer; copyLayer < copyBaseLayer + copyLayerCount; |
| ++copyLayer) { |
| [GetPendingCommandContext()->EnsureBlit() |
| copyFromBuffer:ToBackend(source)->GetBufferHandle() |
| sourceOffset:bufferOffset |
| sourceBytesPerRow:dataLayout.bytesPerRow |
| sourceBytesPerImage:bytesPerImage |
| sourceSize:MTLSizeMake(clampedSize.width, clampedSize.height, 1) |
| toTexture:texture->GetMTLTexture() |
| destinationSlice:copyLayer |
| destinationLevel:dst->mipLevel |
| destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0) |
| options:blitOption]; |
| |
| bufferOffset += bytesPerImage; |
| } |
| |
| return {}; |
| } |
| |
| TextureBase* Device::CreateTextureWrappingIOSurface(const ExternalImageDescriptor* descriptor, |
| IOSurfaceRef ioSurface, |
| uint32_t plane) { |
| const TextureDescriptor* textureDescriptor = |
| reinterpret_cast<const TextureDescriptor*>(descriptor->cTextureDescriptor); |
| |
| if (ConsumedError(ValidateTextureDescriptor(this, textureDescriptor))) { |
| return nullptr; |
| } |
| if (ConsumedError( |
| ValidateIOSurfaceCanBeWrapped(this, textureDescriptor, ioSurface, plane))) { |
| return nullptr; |
| } |
| |
| return new Texture(this, descriptor, ioSurface, plane); |
| } |
| |
| void Device::WaitForCommandsToBeScheduled() { |
| SubmitPendingCommandBuffer(); |
| |
| // Only lock the object while we take a reference to it, otherwise we could block further |
| // progress if the driver calls the scheduled handler (which also acquires the lock) before |
| // finishing the waitUntilScheduled. |
| NSPRef<id<MTLCommandBuffer>> lastSubmittedCommands; |
| { |
| std::lock_guard<std::mutex> lock(mLastSubmittedCommandsMutex); |
| lastSubmittedCommands = mLastSubmittedCommands; |
| } |
| [*lastSubmittedCommands waitUntilScheduled]; |
| } |
| |
| MaybeError Device::WaitForIdleForDestruction() { |
| // Forget all pending commands. |
| mCommandContext.AcquireCommands(); |
| DAWN_TRY(CheckPassedSerials()); |
| |
| // Wait for all commands to be finished so we can free resources |
| while (GetCompletedCommandSerial() != GetLastSubmittedCommandSerial()) { |
| usleep(100); |
| DAWN_TRY(CheckPassedSerials()); |
| } |
| |
| return {}; |
| } |
| |
| void Device::ShutDownImpl() { |
| ASSERT(GetState() == State::Disconnected); |
| |
| // Forget all pending commands. |
| mCommandContext.AcquireCommands(); |
| |
| mCommandQueue = nullptr; |
| mMtlDevice = nullptr; |
| } |
| |
| uint32_t Device::GetOptimalBytesPerRowAlignment() const { |
| return 1; |
| } |
| |
| uint64_t Device::GetOptimalBufferToTextureCopyOffsetAlignment() const { |
| return 1; |
| } |
| |
| float Device::GetTimestampPeriodInNS() const { |
| return 1.0f; |
| } |
| |
| }} // namespace dawn_native::metal |