blob: 9a091b88faf298f54b42eccb32c259ce63c7b939 [file] [log] [blame] [edit]
// Copyright 2019 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn/native/metal/UtilsMetal.h"
#include "dawn/common/Assert.h"
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/Pipeline.h"
#include "dawn/native/ShaderModule.h"
namespace dawn::native::metal {
namespace {
// A helper struct to track state while doing workarounds for Metal render passes. It
// contains a temporary texture and information about the attachment it replaces.
// Helper methods encode copies between the two textures.
struct SavedMetalAttachment {
id<MTLTexture> texture = nil;
NSUInteger level;
NSUInteger slice;
NSPRef<id<MTLTexture>> temporary;
void CopyFromTemporaryToAttachment(CommandRecordingContext* commandContext) {
[commandContext->EnsureBlit()
copyFromTexture:temporary.Get()
sourceSlice:0
sourceLevel:0
sourceOrigin:MTLOriginMake(0, 0, 0)
sourceSize:MTLSizeMake([temporary.Get() width], [temporary.Get() height], 1)
toTexture:texture
destinationSlice:slice
destinationLevel:level
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
void CopyFromAttachmentToTemporary(CommandRecordingContext* commandContext) {
[commandContext->EnsureBlit()
copyFromTexture:texture
sourceSlice:slice
sourceLevel:level
sourceOrigin:MTLOriginMake(0, 0, 0)
sourceSize:MTLSizeMake([temporary.Get() width], [temporary.Get() height], 1)
toTexture:temporary.Get()
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
};
// Common code between both kinds of attachments swaps.
ResultOrError<SavedMetalAttachment> SaveAttachmentCreateTemporary(Device* device,
id<MTLTexture> attachmentTexture,
NSUInteger attachmentLevel,
NSUInteger attachmentSlice) {
// Save the attachment.
SavedMetalAttachment result;
result.texture = attachmentTexture;
result.level = attachmentLevel;
result.slice = attachmentSlice;
// Create the temporary texture.
NSRef<MTLTextureDescriptor> mtlDescRef = AcquireNSRef([MTLTextureDescriptor new]);
MTLTextureDescriptor* mtlDesc = mtlDescRef.Get();
mtlDesc.textureType = MTLTextureType2D;
mtlDesc.usage = MTLTextureUsageRenderTarget;
mtlDesc.pixelFormat = [result.texture pixelFormat];
mtlDesc.width = std::max([result.texture width] >> attachmentLevel, NSUInteger(1));
mtlDesc.height = std::max([result.texture height] >> attachmentLevel, NSUInteger(1));
mtlDesc.depth = 1;
mtlDesc.mipmapLevelCount = 1;
mtlDesc.arrayLength = 1;
mtlDesc.storageMode = MTLStorageModePrivate;
mtlDesc.sampleCount = [result.texture sampleCount];
result.temporary = AcquireNSPRef([device->GetMTLDevice() newTextureWithDescriptor:mtlDesc]);
if (result.temporary == nil) {
return DAWN_OUT_OF_MEMORY_ERROR("Allocation of temporary texture failed.");
}
return result;
}
// Patches the render pass attachment to replace it with a temporary texture. Returns a
// SavedMetalAttachment that can be used to easily copy between the original attachment and
// the temporary.
ResultOrError<SavedMetalAttachment> PatchAttachmentWithTemporary(
Device* device,
MTLRenderPassAttachmentDescriptor* attachment) {
SavedMetalAttachment result;
DAWN_TRY_ASSIGN(result, SaveAttachmentCreateTemporary(device, attachment.texture,
attachment.level, attachment.slice));
// Replace the attachment with the temporary
attachment.texture = result.temporary.Get();
attachment.level = 0;
attachment.slice = 0;
return result;
}
// Same as PatchAttachmentWithTemporary but for the resolve attachment.
ResultOrError<SavedMetalAttachment> PatchResolveAttachmentWithTemporary(
Device* device,
MTLRenderPassAttachmentDescriptor* attachment) {
SavedMetalAttachment result;
DAWN_TRY_ASSIGN(
result, SaveAttachmentCreateTemporary(device, attachment.resolveTexture,
attachment.resolveLevel, attachment.resolveSlice));
// Replace the resolve attachment with the tempoary.
attachment.resolveTexture = result.temporary.Get();
attachment.resolveLevel = 0;
attachment.resolveSlice = 0;
return result;
}
// Helper function for Toggle EmulateStoreAndMSAAResolve
void ResolveInAnotherRenderPass(
CommandRecordingContext* commandContext,
const MTLRenderPassDescriptor* mtlRenderPass,
const std::array<id<MTLTexture>, kMaxColorAttachments>& resolveTextures) {
// Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef
NSRef<MTLRenderPassDescriptor> mtlRenderPassForResolveRef =
[MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassDescriptor* mtlRenderPassForResolve = mtlRenderPassForResolveRef.Get();
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (resolveTextures[i] == nullptr) {
continue;
}
mtlRenderPassForResolve.colorAttachments[i].texture =
mtlRenderPass.colorAttachments[i].texture;
mtlRenderPassForResolve.colorAttachments[i].loadAction = MTLLoadActionLoad;
mtlRenderPassForResolve.colorAttachments[i].storeAction = MTLStoreActionMultisampleResolve;
mtlRenderPassForResolve.colorAttachments[i].resolveTexture = resolveTextures[i];
mtlRenderPassForResolve.colorAttachments[i].resolveLevel =
mtlRenderPass.colorAttachments[i].resolveLevel;
mtlRenderPassForResolve.colorAttachments[i].resolveSlice =
mtlRenderPass.colorAttachments[i].resolveSlice;
}
commandContext->BeginRender(mtlRenderPassForResolve);
commandContext->EndRender();
}
} // anonymous namespace
MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction) {
switch (compareFunction) {
case wgpu::CompareFunction::Never:
return MTLCompareFunctionNever;
case wgpu::CompareFunction::Less:
return MTLCompareFunctionLess;
case wgpu::CompareFunction::LessEqual:
return MTLCompareFunctionLessEqual;
case wgpu::CompareFunction::Greater:
return MTLCompareFunctionGreater;
case wgpu::CompareFunction::GreaterEqual:
return MTLCompareFunctionGreaterEqual;
case wgpu::CompareFunction::NotEqual:
return MTLCompareFunctionNotEqual;
case wgpu::CompareFunction::Equal:
return MTLCompareFunctionEqual;
case wgpu::CompareFunction::Always:
return MTLCompareFunctionAlways;
case wgpu::CompareFunction::Undefined:
UNREACHABLE();
}
}
TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
uint32_t mipLevel,
Origin3D origin,
Extent3D copyExtent,
uint64_t bufferSize,
uint64_t bufferOffset,
uint32_t bytesPerRow,
uint32_t rowsPerImage,
Aspect aspect) {
TextureBufferCopySplit copy;
const Format textureFormat = texture->GetFormat();
const TexelBlockInfo& blockInfo = textureFormat.GetAspectInfo(aspect).block;
// When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
// compute the correct range when checking if the buffer is big enough to contain the
// data for the whole copy. Instead of looking at the position of the last texel in the
// buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
// format.blockHeight) * copySize.depthOrArrayLayers. For example considering the pixel
// buffer below where in memory, each row data (D) of the texture is followed by some
// padding data (P):
// |DDDDDDD|PP|
// |DDDDDDD|PP|
// |DDDDDDD|PP|
// |DDDDDDD|PP|
// |DDDDDDA|PP|
// The last pixel read will be A, but the driver will think it is the whole last padding
// row, causing it to generate an error when the pixel buffer is just big enough.
// We work around this limitation by detecting when Metal would complain and copy the
// last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
uint32_t bytesPerImage = bytesPerRow * rowsPerImage;
// Metal validation layer requires that if the texture's pixel format is a compressed
// format, the sourceSize must be a multiple of the pixel format's block size or be
// clamped to the edge of the texture if the block extends outside the bounds of a
// texture.
const Extent3D clampedCopyExtent =
texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
// Check whether buffer size is big enough.
bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depthOrArrayLayers;
if (!needWorkaround) {
copy.count = 1;
copy.copies[0].bufferOffset = bufferOffset;
copy.copies[0].bytesPerRow = bytesPerRow;
copy.copies[0].bytesPerImage = bytesPerImage;
copy.copies[0].textureOrigin = origin;
copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
copyExtent.depthOrArrayLayers};
return copy;
}
uint64_t currentOffset = bufferOffset;
// Doing all the copy except the last image.
if (copyExtent.depthOrArrayLayers > 1) {
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerImage;
copy.copies[copy.count].textureOrigin = origin;
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
copyExtent.depthOrArrayLayers - 1};
++copy.count;
// Update offset to copy to the last image.
currentOffset += (copyExtent.depthOrArrayLayers - 1) * bytesPerImage;
}
// Doing all the copy in last image except the last row.
uint32_t copyBlockRowCount = copyExtent.height / blockInfo.height;
if (copyBlockRowCount > 1) {
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = bytesPerRow;
copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
origin.z + copyExtent.depthOrArrayLayers - 1};
ASSERT(copyExtent.height - blockInfo.height <
texture->GetMipLevelSingleSubresourceVirtualSize(mipLevel).height);
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width,
copyExtent.height - blockInfo.height, 1};
++copy.count;
// Update offset to copy to the last row.
currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
}
// Doing the last row copy with the exact number of bytes in last row.
// Workaround this issue in a way just like the copy to a 1D texture.
uint32_t lastRowDataSize = (copyExtent.width / blockInfo.width) * blockInfo.byteSize;
uint32_t lastRowCopyExtentHeight =
blockInfo.height + clampedCopyExtent.height - copyExtent.height;
ASSERT(lastRowCopyExtentHeight <= blockInfo.height);
copy.copies[copy.count].bufferOffset = currentOffset;
copy.copies[copy.count].bytesPerRow = lastRowDataSize;
copy.copies[copy.count].bytesPerImage = lastRowDataSize;
copy.copies[copy.count].textureOrigin = {origin.x,
origin.y + copyExtent.height - blockInfo.height,
origin.z + copyExtent.depthOrArrayLayers - 1};
copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
++copy.count;
return copy;
}
void EnsureDestinationTextureInitialized(CommandRecordingContext* commandContext,
Texture* texture,
const TextureCopy& dst,
const Extent3D& size) {
ASSERT(texture == dst.texture.Get());
SubresourceRange range = GetSubresourcesAffectedByCopy(dst, size);
if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), size, dst.mipLevel)) {
texture->SetIsSubresourceContentInitialized(true, range);
} else {
texture->EnsureSubresourceContentInitialized(commandContext, range);
}
}
MTLBlitOption ComputeMTLBlitOption(const Format& format, Aspect aspect) {
ASSERT(HasOneBit(aspect));
ASSERT(format.aspects & aspect);
if (IsSubset(Aspect::Depth | Aspect::Stencil, format.aspects)) {
// We only provide a blit option if the format has both depth and stencil.
// It is invalid to provide a blit option otherwise.
switch (aspect) {
case Aspect::Depth:
return MTLBlitOptionDepthFromDepthStencil;
case Aspect::Stencil:
return MTLBlitOptionStencilFromDepthStencil;
default:
UNREACHABLE();
}
}
return MTLBlitOptionNone;
}
MaybeError EncodeMetalRenderPass(Device* device,
CommandRecordingContext* commandContext,
MTLRenderPassDescriptor* mtlRenderPass,
uint32_t width,
uint32_t height,
EncodeInsideRenderPass encodeInside,
BeginRenderPassCmd* renderPassCmd) {
// This function handles multiple workarounds. Because some cases requires multiple
// workarounds to happen at the same time, it handles workarounds one by one and calls
// itself recursively to handle the next workaround if needed.
// Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. We must handle this before applying
// the store + MSAA resolve workaround, otherwise this toggle will never be handled because
// the resolve texture is removed when applying the store + MSAA resolve workaround.
if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
std::array<SavedMetalAttachment, kMaxColorAttachments> trueResolveAttachments = {};
bool workaroundUsed = false;
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (mtlRenderPass.colorAttachments[i].resolveTexture == nullptr) {
continue;
}
if (mtlRenderPass.colorAttachments[i].resolveLevel == 0 &&
mtlRenderPass.colorAttachments[i].resolveSlice == 0) {
continue;
}
DAWN_TRY_ASSIGN(
trueResolveAttachments[i],
PatchResolveAttachmentWithTemporary(device, mtlRenderPass.colorAttachments[i]));
workaroundUsed = true;
}
// If we need to use a temporary resolve texture we need to copy the result of MSAA
// resolve back to the true resolve targets.
if (workaroundUsed) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside), renderPassCmd));
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (trueResolveAttachments[i].texture == nullptr) {
continue;
}
trueResolveAttachments[i].CopyFromTemporaryToAttachment(commandContext);
}
return {};
}
}
// Handles the workaround for r8unorm rg8unorm mipmap rendering being broken on some
// devices. Render to a temporary texture instead and then copy back to the attachment.
if (device->IsToggleEnabled(Toggle::MetalRenderR8RG8UnormSmallMipToTempTexture)) {
std::array<SavedMetalAttachment, kMaxColorAttachments> originalAttachments;
bool workaroundUsed = false;
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (mtlRenderPass.colorAttachments[i].texture == nullptr) {
continue;
}
if ([mtlRenderPass.colorAttachments[i].texture pixelFormat] != MTLPixelFormatR8Unorm &&
[mtlRenderPass.colorAttachments[i].texture pixelFormat] != MTLPixelFormatRG8Unorm) {
continue;
}
if (mtlRenderPass.colorAttachments[i].level < 2) {
continue;
}
DAWN_TRY_ASSIGN(originalAttachments[i], PatchAttachmentWithTemporary(
device, mtlRenderPass.colorAttachments[i]));
workaroundUsed = true;
if (mtlRenderPass.colorAttachments[i].loadAction == MTLLoadActionLoad) {
originalAttachments[i].CopyFromAttachmentToTemporary(commandContext);
}
}
if (workaroundUsed) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside), renderPassCmd));
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (originalAttachments[i].texture == nullptr) {
continue;
}
originalAttachments[i].CopyFromTemporaryToAttachment(commandContext);
}
return {};
}
}
// Handle Store + MSAA resolve workaround (Toggle EmulateStoreAndMSAAResolve).
// Done after the workarounds that modify the non-resolve attachments so that
// ResolveInAnotherRenderPass uses the temporary attachments if needed instead of the
// original ones.
if (device->IsToggleEnabled(Toggle::EmulateStoreAndMSAAResolve)) {
bool hasStoreAndMSAAResolve = false;
// Remove any store + MSAA resolve and remember them.
std::array<id<MTLTexture>, kMaxColorAttachments> resolveTextures = {};
for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
if (mtlRenderPass.colorAttachments[i].storeAction ==
kMTLStoreActionStoreAndMultisampleResolve) {
hasStoreAndMSAAResolve = true;
resolveTextures[i] = mtlRenderPass.colorAttachments[i].resolveTexture;
mtlRenderPass.colorAttachments[i].storeAction = MTLStoreActionStore;
mtlRenderPass.colorAttachments[i].resolveTexture = nullptr;
}
}
// If we found a store + MSAA resolve we need to resolve in a different render pass.
if (hasStoreAndMSAAResolve) {
DAWN_TRY(EncodeMetalRenderPass(device, commandContext, mtlRenderPass, width, height,
std::move(encodeInside), renderPassCmd));
ResolveInAnotherRenderPass(commandContext, mtlRenderPass, resolveTextures);
return {};
}
}
// No (more) workarounds needed! We can finally encode the actual render pass.
commandContext->EndBlit();
DAWN_TRY(encodeInside(commandContext->BeginRender(mtlRenderPass), renderPassCmd));
commandContext->EndRender();
return {};
}
MaybeError EncodeEmptyMetalRenderPass(Device* device,
CommandRecordingContext* commandContext,
MTLRenderPassDescriptor* mtlRenderPass,
Extent3D size) {
return EncodeMetalRenderPass(
device, commandContext, mtlRenderPass, size.width, size.height,
[&](id<MTLRenderCommandEncoder>, BeginRenderPassCmd*) -> MaybeError { return {}; });
}
DAWN_NOINLINE bool SupportCounterSamplingAtCommandBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
bool isBlitBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary];
bool isDispatchBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary];
bool isDrawBoundarySupported =
[device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary];
return isBlitBoundarySupported && isDispatchBoundarySupported && isDrawBoundarySupported;
}
DAWN_NOINLINE bool SupportCounterSamplingAtStageBoundary(id<MTLDevice> device)
API_AVAILABLE(macos(11.0), ios(14.0)) {
return [device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary];
}
} // namespace dawn::native::metal