blob: e931af351d7efbe8b36705bc1e56e5df99bdeee8 [file] [log] [blame] [edit]
// Copyright 2019 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn_native/metal/BackendMTL.h"
#include "common/CoreFoundationRef.h"
#include "common/GPUInfo.h"
#include "common/NSRef.h"
#include "common/Platform.h"
#include "common/SystemUtils.h"
#include "dawn_native/Instance.h"
#include "dawn_native/MetalBackend.h"
#include "dawn_native/metal/BufferMTL.h"
#include "dawn_native/metal/DeviceMTL.h"
#if defined(DAWN_PLATFORM_MACOS)
# import <IOKit/IOKitLib.h>
# include "common/IOKitRef.h"
#endif
#include <vector>
namespace dawn_native { namespace metal {
namespace {
struct PCIIDs {
uint32_t vendorId;
uint32_t deviceId;
};
struct Vendor {
const char* trademark;
uint32_t vendorId;
};
#if defined(DAWN_PLATFORM_MACOS)
const Vendor kVendors[] = {{"AMD", gpu_info::kVendorID_AMD},
{"Radeon", gpu_info::kVendorID_AMD},
{"Intel", gpu_info::kVendorID_Intel},
{"Geforce", gpu_info::kVendorID_Nvidia},
{"Quadro", gpu_info::kVendorID_Nvidia}};
// Find vendor ID from MTLDevice name.
MaybeError GetVendorIdFromVendors(id<MTLDevice> device, PCIIDs* ids) {
uint32_t vendorId = 0;
const char* deviceName = [device.name UTF8String];
for (const auto& it : kVendors) {
if (strstr(deviceName, it.trademark) != nullptr) {
vendorId = it.vendorId;
break;
}
}
if (vendorId == 0) {
return DAWN_INTERNAL_ERROR("Failed to find vendor id with the device");
}
// Set vendor id with 0
*ids = PCIIDs{vendorId, 0};
return {};
}
// Extracts an integer property from a registry entry.
uint32_t GetEntryProperty(io_registry_entry_t entry, CFStringRef name) {
uint32_t value = 0;
// Recursively search registry entry and its parents for property name
// The data should release with CFRelease
CFRef<CFDataRef> data =
AcquireCFRef(static_cast<CFDataRef>(IORegistryEntrySearchCFProperty(
entry, kIOServicePlane, name, kCFAllocatorDefault,
kIORegistryIterateRecursively | kIORegistryIterateParents)));
if (data == nullptr) {
return value;
}
// CFDataGetBytePtr() is guaranteed to return a read-only pointer
value = *reinterpret_cast<const uint32_t*>(CFDataGetBytePtr(data.Get()));
return value;
}
// Queries the IO Registry to find the PCI device and vendor IDs of the MTLDevice.
// The registry entry correponding to [device registryID] doesn't contain the exact PCI ids
// because it corresponds to a driver. However its parent entry corresponds to the device
// itself and has uint32_t "device-id" and "registry-id" keys. For example on a dual-GPU
// MacBook Pro 2017 the IORegistry explorer shows the following tree (simplified here):
//
// - PCI0@0
// | - AppleACPIPCI
// | | - IGPU@2 (type IOPCIDevice)
// | | | - IntelAccelerator (type IOGraphicsAccelerator2)
// | | - PEG0@1
// | | | - IOPP
// | | | | - GFX0@0 (type IOPCIDevice)
// | | | | | - AMDRadeonX4000_AMDBaffinGraphicsAccelerator (type IOGraphicsAccelerator2)
//
// [device registryID] is the ID for one of the IOGraphicsAccelerator2 and we can see that
// their parent always is an IOPCIDevice that has properties for the device and vendor IDs.
MaybeError API_AVAILABLE(macos(10.13))
GetDeviceIORegistryPCIInfo(id<MTLDevice> device, PCIIDs* ids) {
// Get a matching dictionary for the IOGraphicsAccelerator2
CFRef<CFMutableDictionaryRef> matchingDict =
AcquireCFRef(IORegistryEntryIDMatching([device registryID]));
if (matchingDict == nullptr) {
return DAWN_INTERNAL_ERROR("Failed to create the matching dict for the device");
}
// IOServiceGetMatchingService will consume the reference on the matching dictionary,
// so we don't need to release the dictionary.
IORef<io_registry_entry_t> acceleratorEntry = AcquireIORef(
IOServiceGetMatchingService(kIOMasterPortDefault, matchingDict.Detach()));
if (acceleratorEntry == IO_OBJECT_NULL) {
return DAWN_INTERNAL_ERROR(
"Failed to get the IO registry entry for the accelerator");
}
// Get the parent entry that will be the IOPCIDevice
IORef<io_registry_entry_t> deviceEntry;
if (IORegistryEntryGetParentEntry(acceleratorEntry.Get(), kIOServicePlane,
deviceEntry.InitializeInto()) != kIOReturnSuccess) {
return DAWN_INTERNAL_ERROR("Failed to get the IO registry entry for the device");
}
ASSERT(deviceEntry != IO_OBJECT_NULL);
uint32_t vendorId = GetEntryProperty(deviceEntry.Get(), CFSTR("vendor-id"));
uint32_t deviceId = GetEntryProperty(deviceEntry.Get(), CFSTR("device-id"));
*ids = PCIIDs{vendorId, deviceId};
return {};
}
MaybeError GetDevicePCIInfo(id<MTLDevice> device, PCIIDs* ids) {
// [device registryID] is introduced on macOS 10.13+, otherwise workaround to get vendor
// id by vendor name on old macOS
if (@available(macos 10.13, *)) {
return GetDeviceIORegistryPCIInfo(device, ids);
} else {
return GetVendorIdFromVendors(device, ids);
}
}
bool IsMetalSupported() {
// Metal was first introduced in macOS 10.11
// WebGPU is targeted at macOS 10.12+
// TODO(dawn:1181): Dawn native should allow non-conformant WebGPU on macOS 10.11
return IsMacOSVersionAtLeast(10, 12);
}
#elif defined(DAWN_PLATFORM_IOS)
MaybeError GetDevicePCIInfo(id<MTLDevice> device, PCIIDs* ids) {
DAWN_UNUSED(device);
*ids = PCIIDs{0, 0};
return {};
}
bool IsMetalSupported() {
return true;
}
#else
# error "Unsupported Apple platform."
#endif
bool IsCounterSamplingBoundarySupport(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
bool isBlitBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary];
bool isDispatchBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary];
bool isDrawBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary];
return isBlitBoundarySupported && isDispatchBoundarySupported &&
isDrawBoundarySupported;
}
bool IsGPUCounterSupported(id<MTLDevice> device,
MTLCommonCounterSet counterSetName,
std::vector<MTLCommonCounter> counters)
API_AVAILABLE(macos(10.15), ios(14.0)) {
// MTLDevice’s counterSets property declares which counter sets it supports. Check
// whether it's available on the device before requesting a counter set.
id<MTLCounterSet> counterSet = nil;
if (device.counterSets != nil) {
for (id<MTLCounterSet> set in device.counterSets) {
if ([set.name caseInsensitiveCompare:counterSetName] == NSOrderedSame) {
counterSet = set;
break;
}
}
}
// The counter set is not supported.
if (counterSet == nil) {
return false;
}
// A GPU might support a counter set, but only support a subset of the counters in that
// set, check if the counter set supports all specific counters we need. Return false
// if there is a counter unsupported.
std::vector<NSString*> supportedCounters;
for (id<MTLCounter> counter in counterSet.counters) {
supportedCounters.push_back(counter.name);
}
for (const auto& counterName : counters) {
if (std::find(supportedCounters.begin(), supportedCounters.end(), counterName) ==
supportedCounters.end()) {
return false;
}
}
if (@available(macOS 11.0, iOS 14.0, *)) {
// Check whether it can read GPU counters at the specified command boundary. Apple
// family GPUs do not support sampling between different Metal commands, because
// they defer fragment processing until after the GPU processes all the primitives
// in the render pass.
if (!IsCounterSamplingBoundarySupport(device)) {
return false;
}
}
return true;
}
} // anonymous namespace
// The Metal backend's Adapter.
class Adapter : public AdapterBase {
public:
Adapter(InstanceBase* instance, id<MTLDevice> device)
: AdapterBase(instance, wgpu::BackendType::Metal), mDevice(device) {
mPCIInfo.name = std::string([[*mDevice name] UTF8String]);
PCIIDs ids;
if (!instance->ConsumedError(GetDevicePCIInfo(device, &ids))) {
mPCIInfo.vendorId = ids.vendorId;
mPCIInfo.deviceId = ids.deviceId;
}
#if defined(DAWN_PLATFORM_IOS)
mAdapterType = wgpu::AdapterType::IntegratedGPU;
const char* systemName = "iOS ";
#elif defined(DAWN_PLATFORM_MACOS)
if ([device isLowPower]) {
mAdapterType = wgpu::AdapterType::IntegratedGPU;
} else {
mAdapterType = wgpu::AdapterType::DiscreteGPU;
}
const char* systemName = "macOS ";
#else
# error "Unsupported Apple platform."
#endif
NSString* osVersion = [[NSProcessInfo processInfo] operatingSystemVersionString];
mDriverDescription =
"Metal driver on " + std::string(systemName) + [osVersion UTF8String];
}
// AdapterBase Implementation
bool SupportsExternalImages() const override {
// Via dawn_native::metal::WrapIOSurface
return true;
}
private:
ResultOrError<DeviceBase*> CreateDeviceImpl(
const DawnDeviceDescriptor* descriptor) override {
return Device::Create(this, mDevice, descriptor);
}
MaybeError InitializeImpl() override {
return {};
}
MaybeError InitializeSupportedFeaturesImpl() override {
#if defined(DAWN_PLATFORM_MACOS)
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
mSupportedFeatures.EnableFeature(Feature::TextureCompressionBC);
}
#endif
if (@available(macOS 10.15, iOS 14.0, *)) {
if (IsGPUCounterSupported(
*mDevice, MTLCommonCounterSetStatistic,
{MTLCommonCounterVertexInvocations, MTLCommonCounterClipperInvocations,
MTLCommonCounterClipperPrimitivesOut, MTLCommonCounterFragmentInvocations,
MTLCommonCounterComputeKernelInvocations})) {
mSupportedFeatures.EnableFeature(Feature::PipelineStatisticsQuery);
}
if (IsGPUCounterSupported(*mDevice, MTLCommonCounterSetTimestamp,
{MTLCommonCounterTimestamp})) {
bool enableTimestampQuery = true;
#if defined(DAWN_PLATFORM_MACOS)
// Disable timestamp query on macOS 10.15 on AMD GPU because WriteTimestamp
// fails to call without any copy commands on MTLBlitCommandEncoder. This issue
// has been fixed on macOS 11.0. See crbug.com/dawn/545
enableTimestampQuery &=
!(gpu_info::IsAMD(GetPCIInfo().vendorId) && IsMacOSVersionAtLeast(11));
#endif
if (enableTimestampQuery) {
mSupportedFeatures.EnableFeature(Feature::TimestampQuery);
}
}
}
if (@available(macOS 10.11, iOS 11.0, *)) {
mSupportedFeatures.EnableFeature(Feature::DepthClamping);
}
return {};
}
enum class MTLGPUFamily {
Apple1,
Apple2,
Apple3,
Apple4,
Apple5,
Apple6,
Apple7,
Mac1,
Mac2,
};
ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
// https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc
if (@available(macOS 10.15, iOS 10.13, *)) {
if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
return MTLGPUFamily::Mac2;
}
if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
return MTLGPUFamily::Mac1;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
return MTLGPUFamily::Apple7;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
return MTLGPUFamily::Apple6;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
return MTLGPUFamily::Apple5;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
return MTLGPUFamily::Apple4;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
return MTLGPUFamily::Apple3;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
return MTLGPUFamily::Apple2;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
return MTLGPUFamily::Apple1;
}
}
#if TARGET_OS_OSX
if (@available(macOS 10.14, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
return MTLGPUFamily::Mac2;
}
}
if (@available(macOS 10.11, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
return MTLGPUFamily::Mac1;
}
}
#elif TARGET_OS_IOS
if (@available(iOS 10.11, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
return MTLGPUFamily::Apple4;
}
}
if (@available(iOS 9.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
return MTLGPUFamily::Apple3;
}
}
if (@available(iOS 8.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
return MTLGPUFamily::Apple2;
}
}
if (@available(iOS 8.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
return MTLGPUFamily::Apple1;
}
}
#endif
return DAWN_INTERNAL_ERROR("Unsupported Metal device");
}
MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
struct MTLDeviceLimits {
uint32_t maxVertexAttribsPerDescriptor;
uint32_t maxBufferArgumentEntriesPerFunc;
uint32_t maxTextureArgumentEntriesPerFunc;
uint32_t maxSamplerStateArgumentEntriesPerFunc;
uint32_t maxThreadsPerThreadgroup;
uint32_t maxTotalThreadgroupMemory;
uint32_t maxFragmentInputComponents;
uint32_t max1DTextureSize;
uint32_t max2DTextureSize;
uint32_t max3DTextureSize;
uint32_t maxTextureArrayLayers;
uint32_t minBufferOffsetAlignment;
};
struct LimitsForFamily {
uint32_t MTLDeviceLimits::*limit;
ityp::array<MTLGPUFamily, uint32_t, 9> values;
};
// clang-format off
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
// Apple Mac
// 1, 2, 3, 4, 5, 6, 7, 1, 2
constexpr LimitsForFamily kMTLLimits[12] = {
{&MTLDeviceLimits::maxVertexAttribsPerDescriptor, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
{&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
{&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc, { 31u, 31u, 31u, 96u, 96u, 128u, 128u, 128u, 128u }},
{&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u }},
{&MTLDeviceLimits::maxThreadsPerThreadgroup, { 512u, 512u, 512u, 1024u, 1024u, 1024u, 1024u, 1024u, 1024u }},
{&MTLDeviceLimits::maxTotalThreadgroupMemory, { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u, 32768u, 32768u }},
{&MTLDeviceLimits::maxFragmentInputComponents, { 60u, 60u, 60u, 124u, 124u, 124u, 124u, 124u, 124u }},
{&MTLDeviceLimits::max1DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
{&MTLDeviceLimits::max2DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
{&MTLDeviceLimits::max3DTextureSize, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
{&MTLDeviceLimits::maxTextureArrayLayers, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
{&MTLDeviceLimits::minBufferOffsetAlignment, { 4u, 4u, 4u, 4u, 4u, 4u, 4u, 256u, 256u }},
};
// clang-format on
MTLGPUFamily mtlGPUFamily;
DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());
MTLDeviceLimits mtlLimits;
for (const auto& limitsForFamily : kMTLLimits) {
mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
}
GetDefaultLimits(&limits->v1);
limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
maxBuffersPerStage -= 1; // One slot is reserved to store buffer lengths.
uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxUniformBuffersPerShaderStage +
limits->v1.maxVertexBuffers;
ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
{
uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
}
uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
{
uint32_t additional =
mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
}
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
// Metal limits are per-function, so the layout limits are the same as the stage
// limits. Note: this should likely change if the implementation uses Metal argument
// buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
// buffers may be set directly.
// Mac GPU families with tier 1 argument buffers support 64
// buffers, 128 textures, and 16 samplers. Mac GPU families
// with tier 2 argument buffers support 500000 buffers and
// textures, and 1024 unique samplers
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
limits->v1.maxUniformBuffersPerShaderStage;
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
limits->v1.maxStorageBuffersPerShaderStage;
// The WebGPU limit is the limit across all vertex buffers, combined.
limits->v1.maxVertexAttributes =
limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;
limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;
limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
// Metal has no documented limit on the size of a binding. Use the maximum
// buffer size.
limits->v1.maxUniformBufferBindingSize = maxBufferSize;
limits->v1.maxStorageBufferBindingSize = maxBufferSize;
// TODO(crbug.com/dawn/685):
// LIMITS NOT SET:
// - maxBindGroups
// - maxVertexBufferArrayStride
return {};
}
NSPRef<id<MTLDevice>> mDevice;
};
// Implementation of the Metal backend's BackendConnection
Backend::Backend(InstanceBase* instance)
: BackendConnection(instance, wgpu::BackendType::Metal) {
if (GetInstance()->IsBackendValidationEnabled()) {
setenv("METAL_DEVICE_WRAPPER_TYPE", "1", 1);
}
}
std::vector<std::unique_ptr<AdapterBase>> Backend::DiscoverDefaultAdapters() {
std::vector<std::unique_ptr<AdapterBase>> adapters;
BOOL supportedVersion = NO;
#if defined(DAWN_PLATFORM_MACOS)
if (@available(macOS 10.11, *)) {
supportedVersion = YES;
NSRef<NSArray<id<MTLDevice>>> devices = AcquireNSRef(MTLCopyAllDevices());
for (id<MTLDevice> device in devices.Get()) {
std::unique_ptr<Adapter> adapter = std::make_unique<Adapter>(GetInstance(), device);
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
adapters.push_back(std::move(adapter));
}
}
}
#endif
#if defined(DAWN_PLATFORM_IOS)
if (@available(iOS 8.0, *)) {
supportedVersion = YES;
// iOS only has a single device so MTLCopyAllDevices doesn't exist there.
std::unique_ptr<Adapter> adapter =
std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
adapters.push_back(std::move(adapter));
}
}
#endif
if (!supportedVersion) {
UNREACHABLE();
}
return adapters;
}
BackendConnection* Connect(InstanceBase* instance) {
if (!IsMetalSupported()) {
return nullptr;
}
return new Backend(instance);
}
}} // namespace dawn_native::metal