blob: c706f6af4a677461450881631f57807a74c55a52 [file] [log] [blame]
// Copyright 2023 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "src/tint/lang/msl/writer/raise/builtin_polyfill.h"
#include <atomic>
#include <cstdint>
#include <utility>
#include "src/tint/lang/core/fluent_types.h"
#include "src/tint/lang/core/ir/builder.h"
#include "src/tint/lang/core/ir/constant.h"
#include "src/tint/lang/core/ir/core_builtin_call.h"
#include "src/tint/lang/core/ir/function.h"
#include "src/tint/lang/core/ir/validator.h"
#include "src/tint/lang/core/type/depth_multisampled_texture.h"
#include "src/tint/lang/core/type/multisampled_texture.h"
#include "src/tint/lang/core/type/scalar.h"
#include "src/tint/lang/core/type/storage_texture.h"
#include "src/tint/lang/core/type/texture.h"
#include "src/tint/lang/core/type/texture_dimension.h"
#include "src/tint/lang/core/type/vector.h"
#include "src/tint/lang/msl/barrier_type.h"
#include "src/tint/lang/msl/builtin_fn.h"
#include "src/tint/lang/msl/ir/binary.h"
#include "src/tint/lang/msl/ir/builtin_call.h"
#include "src/tint/lang/msl/ir/component.h"
#include "src/tint/lang/msl/ir/member_builtin_call.h"
#include "src/tint/lang/msl/ir/memory_order.h"
#include "src/tint/lang/msl/type/bias.h"
#include "src/tint/lang/msl/type/gradient.h"
#include "src/tint/lang/msl/type/level.h"
#include "src/tint/utils/containers/hashmap.h"
#include "src/tint/utils/ice/ice.h"
namespace tint::msl::writer::raise {
namespace {
using namespace tint::core::fluent_types; // NOLINT
/// PIMPL state for the transform.
struct State {
/// The IR module.
core::ir::Module& ir;
/// The IR builder.
core::ir::Builder b{ir};
/// The type manager.
core::type::Manager& ty{ir.Types()};
/// A map from an atomic pointer type to an atomicCompareExchangeWeak polyfill.
Hashmap<const core::type::Type*, core::ir::Function*, 2> atomic_compare_exchange_polyfills{};
/// A map from an integer vector type to a dot polyfill.
Hashmap<const core::type::Vector*, core::ir::Function*, 4> integer_dot_polyfills{};
/// Process the module.
void Process() {
// Find the builtins that need replacing.
Vector<core::ir::CoreBuiltinCall*, 4> builtin_worklist;
for (auto* inst : ir.Instructions()) {
if (auto* builtin = inst->As<core::ir::CoreBuiltinCall>()) {
switch (builtin->Func()) {
case core::BuiltinFn::kAtomicAdd:
case core::BuiltinFn::kAtomicAnd:
case core::BuiltinFn::kAtomicCompareExchangeWeak:
case core::BuiltinFn::kAtomicExchange:
case core::BuiltinFn::kAtomicLoad:
case core::BuiltinFn::kAtomicMax:
case core::BuiltinFn::kAtomicMin:
case core::BuiltinFn::kAtomicOr:
case core::BuiltinFn::kAtomicStore:
case core::BuiltinFn::kAtomicSub:
case core::BuiltinFn::kAtomicXor:
case core::BuiltinFn::kDistance:
case core::BuiltinFn::kDot:
case core::BuiltinFn::kFrexp:
case core::BuiltinFn::kLength:
case core::BuiltinFn::kModf:
case core::BuiltinFn::kPack2X16Float:
case core::BuiltinFn::kQuadSwapDiagonal:
case core::BuiltinFn::kQuadSwapX:
case core::BuiltinFn::kQuadSwapY:
case core::BuiltinFn::kQuantizeToF16:
case core::BuiltinFn::kSign:
case core::BuiltinFn::kTextureDimensions:
case core::BuiltinFn::kTextureGather:
case core::BuiltinFn::kTextureGatherCompare:
case core::BuiltinFn::kTextureLoad:
case core::BuiltinFn::kTextureNumLayers:
case core::BuiltinFn::kTextureNumLevels:
case core::BuiltinFn::kTextureNumSamples:
case core::BuiltinFn::kTextureSample:
case core::BuiltinFn::kTextureSampleBias:
case core::BuiltinFn::kTextureSampleCompare:
case core::BuiltinFn::kTextureSampleCompareLevel:
case core::BuiltinFn::kTextureSampleGrad:
case core::BuiltinFn::kTextureSampleLevel:
case core::BuiltinFn::kTextureStore:
case core::BuiltinFn::kStorageBarrier:
case core::BuiltinFn::kWorkgroupBarrier:
case core::BuiltinFn::kTextureBarrier:
case core::BuiltinFn::kUnpack2X16Float:
builtin_worklist.Push(builtin);
break;
default:
break;
}
}
}
// Replace the builtins that we found.
for (auto* builtin : builtin_worklist) {
switch (builtin->Func()) {
// Atomics.
case core::BuiltinFn::kAtomicAdd:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchAddExplicit);
break;
case core::BuiltinFn::kAtomicAnd:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchAndExplicit);
break;
case core::BuiltinFn::kAtomicCompareExchangeWeak:
AtomicCompareExchangeWeak(builtin);
break;
case core::BuiltinFn::kAtomicExchange:
AtomicCall(builtin, msl::BuiltinFn::kAtomicExchangeExplicit);
break;
case core::BuiltinFn::kAtomicLoad:
AtomicCall(builtin, msl::BuiltinFn::kAtomicLoadExplicit);
break;
case core::BuiltinFn::kAtomicMax:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchMaxExplicit);
break;
case core::BuiltinFn::kAtomicMin:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchMinExplicit);
break;
case core::BuiltinFn::kAtomicOr:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchOrExplicit);
break;
case core::BuiltinFn::kAtomicStore:
AtomicCall(builtin, msl::BuiltinFn::kAtomicStoreExplicit);
break;
case core::BuiltinFn::kAtomicSub:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchSubExplicit);
break;
case core::BuiltinFn::kAtomicXor:
AtomicCall(builtin, msl::BuiltinFn::kAtomicFetchXorExplicit);
break;
// Arithmetic builtins.
case core::BuiltinFn::kDistance:
Distance(builtin);
break;
case core::BuiltinFn::kDot:
Dot(builtin);
break;
case core::BuiltinFn::kFrexp:
Frexp(builtin);
break;
case core::BuiltinFn::kLength:
Length(builtin);
break;
case core::BuiltinFn::kModf:
Modf(builtin);
break;
case core::BuiltinFn::kQuantizeToF16:
QuantizeToF16(builtin);
break;
case core::BuiltinFn::kSign:
Sign(builtin);
break;
// Texture builtins.
case core::BuiltinFn::kTextureDimensions:
TextureDimensions(builtin);
break;
case core::BuiltinFn::kTextureGather:
TextureGather(builtin);
break;
case core::BuiltinFn::kTextureGatherCompare:
TextureGatherCompare(builtin);
break;
case core::BuiltinFn::kTextureLoad:
TextureLoad(builtin);
break;
case core::BuiltinFn::kTextureNumLayers:
TextureNumLayers(builtin);
break;
case core::BuiltinFn::kTextureNumLevels:
TextureNumLevels(builtin);
break;
case core::BuiltinFn::kTextureNumSamples:
TextureNumSamples(builtin);
break;
case core::BuiltinFn::kTextureSample:
TextureSample(builtin);
break;
case core::BuiltinFn::kTextureSampleBias:
TextureSampleBias(builtin);
break;
case core::BuiltinFn::kTextureSampleCompare:
TextureSampleCompare(builtin);
break;
case core::BuiltinFn::kTextureSampleCompareLevel:
TextureSampleCompareLevel(builtin);
break;
case core::BuiltinFn::kTextureSampleGrad:
TextureSampleGrad(builtin);
break;
case core::BuiltinFn::kTextureSampleLevel:
TextureSampleLevel(builtin);
break;
case core::BuiltinFn::kTextureStore:
TextureStore(builtin);
break;
// Barriers.
case core::BuiltinFn::kStorageBarrier:
ThreadgroupBarrier(builtin, BarrierType::kDevice);
break;
case core::BuiltinFn::kWorkgroupBarrier:
ThreadgroupBarrier(builtin, BarrierType::kThreadGroup);
break;
case core::BuiltinFn::kTextureBarrier:
ThreadgroupBarrier(builtin, BarrierType::kTexture);
break;
// QuadSwap builtins.
case core::BuiltinFn::kQuadSwapDiagonal:
QuadSwap(builtin, 0b11);
break;
case core::BuiltinFn::kQuadSwapX:
QuadSwap(builtin, 0b01);
break;
case core::BuiltinFn::kQuadSwapY:
QuadSwap(builtin, 0b10);
break;
// Pack/unpack builtins.
case core::BuiltinFn::kPack2X16Float:
Pack2x16Float(builtin);
break;
case core::BuiltinFn::kUnpack2X16Float:
Unpack2x16Float(builtin);
break;
default:
break;
}
}
}
/// Replace an atomic builtin call with an equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void AtomicCall(core::ir::CoreBuiltinCall* builtin, msl::BuiltinFn intrinsic) {
auto args = Vector<core::ir::Value*, 4>{builtin->Args()};
args.Push(
ir.CreateValue<msl::ir::MemoryOrder>(b.ConstantValue(u32(std::memory_order_relaxed))));
auto* call = b.CallWithResult<msl::ir::BuiltinCall>(builtin->DetachResult(), intrinsic,
std::move(args));
call->InsertBefore(builtin);
builtin->Destroy();
}
/// Replace an atomicCompareExchangeWeak builtin call with an equivalent MSL polyfill.
/// @param builtin the builtin call instruction
void AtomicCompareExchangeWeak(core::ir::CoreBuiltinCall* builtin) {
// Get or generate a polyfill function.
auto* atomic_ptr = builtin->Args()[0]->Type();
auto* polyfill = atomic_compare_exchange_polyfills.GetOrAdd(atomic_ptr, [&] {
// The polyfill function performs the equivalent to the following:
// int old_value = cmp;
// bool exchanged = atomic_compare_exchange_weak_explicit(
// atomic_ptr, old_value, val,
// memory_order_relaxed, memory_order_relaxed);
// return __atomic_compare_exchange_result_i32(old_value, exchanged);
auto* ptr = b.FunctionParam("atomic_ptr", atomic_ptr);
auto* cmp = b.FunctionParam("cmp", builtin->Args()[1]->Type());
auto* val = b.FunctionParam("val", builtin->Args()[2]->Type());
auto* func = b.Function(builtin->Result(0)->Type());
func->SetParams({ptr, cmp, val});
b.Append(func->Block(), [&] {
auto* old_value = b.Var<function>("old_value", cmp)->Result(0);
auto* order = ir.CreateValue<msl::ir::MemoryOrder>(
b.ConstantValue(u32(std::memory_order_relaxed)));
auto* call = b.Call<msl::ir::BuiltinCall>(
ty.bool_(), BuiltinFn::kAtomicCompareExchangeWeakExplicit,
Vector{ptr, old_value, val, order, order});
auto* result =
b.Construct(builtin->Result(0)->Type(), Vector{
b.Load(old_value)->Result(0),
call->Result(0),
});
b.Return(func, result);
});
return func;
});
// Call the polyfill function.
auto args = Vector<core::ir::Value*, 4>{builtin->Args()};
auto* call = b.CallWithResult(builtin->DetachResult(), polyfill, std::move(args));
call->InsertBefore(builtin);
builtin->Destroy();
}
/// Polyfill a distance call if necessary.
/// @param builtin the builtin call instruction
void Distance(core::ir::CoreBuiltinCall* builtin) {
b.InsertBefore(builtin, [&] {
auto* arg0 = builtin->Args()[0];
auto* arg1 = builtin->Args()[1];
if (arg0->Type()->Is<core::type::Scalar>()) {
// Calls to `distance` with a scalar argument are replaced with `abs(a - b)`.
auto* sub = b.Subtract(builtin->Result(0)->Type(), arg0, arg1);
b.CallWithResult(builtin->DetachResult(), core::BuiltinFn::kAbs, sub);
} else {
b.CallWithResult<msl::ir::BuiltinCall>(builtin->DetachResult(),
msl::BuiltinFn::kDistance, arg0, arg1);
}
});
builtin->Destroy();
}
/// Polyfill a dot call if necessary.
/// @param builtin the builtin call instruction
void Dot(core::ir::CoreBuiltinCall* builtin) {
b.InsertBefore(builtin, [&] {
auto* arg0 = builtin->Args()[0];
auto* arg1 = builtin->Args()[1];
auto* vec = arg0->Type()->As<core::type::Vector>();
if (vec->Type()->IsIntegerScalar()) {
// Calls to `dot` with a integer arguments are replaced with helper functions, as
// MSL's `dot` builtin only supports floating point arguments.
auto* polyfill = integer_dot_polyfills.GetOrAdd(vec, [&] {
// Generate a helper function that performs the following:
// fn tint_integer_dot(lhs: vec4i, rhs: vec4i) {
// let mul = lhs * rhs;
// return mul[0] + mul[1] + mul[2] + mul[3];
// }
auto* el_ty = vec->Type();
auto* lhs = b.FunctionParam("lhs", vec);
auto* rhs = b.FunctionParam("rhs", vec);
auto* func = b.Function("tint_dot", el_ty);
func->SetParams({lhs, rhs});
b.Append(func->Block(), [&] {
auto* mul = b.Multiply(vec, lhs, rhs);
auto* sum = b.Access(el_ty, mul, u32(0))->Result(0);
for (uint32_t i = 1; i < vec->Width(); i++) {
sum = b.Add(el_ty, sum, b.Access(el_ty, mul, u32(i)))->Result(0);
}
b.Return(func, sum);
});
return func;
});
b.CallWithResult(builtin->DetachResult(), polyfill, arg0, arg1);
} else {
b.CallWithResult<msl::ir::BuiltinCall>(builtin->DetachResult(),
msl::BuiltinFn::kDot, arg0, arg1);
}
});
builtin->Destroy();
}
/// Polyfill a frexp call.
/// @param builtin the builtin call instruction
void Frexp(core::ir::CoreBuiltinCall* builtin) {
b.InsertBefore(builtin, [&] {
// MSL's frexp returns `fract` and outputs `exp` as an output parameter.
// Polyfill it by declaring the result struct and then setting the values:
// __frexp_result result = {};
// result.fract = frexp(arg, result.exp);
//
// Note: We need to use a `load` instruction to pass `result.exp`, as the intrinsic
// definition expects a value type (as we do not have reference types in the IR). The
// printer will just fold away the load, which achieves the pass-by-reference semantics
// that we want.
//
auto* result_type = builtin->Result(0)->Type();
auto* float_type = result_type->Element(0);
auto* i32_type = result_type->Element(1);
auto* result = b.Var(ty.ptr(function, result_type));
auto* exp = b.Access(ty.ptr(function, i32_type), result, u32(1));
auto args = Vector<core::ir::Value*, 2>{builtin->Args()[0], b.Load(exp)->Result(0)};
auto* call =
b.Call<msl::ir::BuiltinCall>(float_type, msl::BuiltinFn::kFrexp, std::move(args));
b.Store(b.Access(ty.ptr(function, float_type), result, u32(0)), call);
builtin->Result(0)->ReplaceAllUsesWith(b.Load(result)->Result(0));
});
builtin->Destroy();
}
/// Polyfill a length call if necessary.
/// @param builtin the builtin call instruction
void Length(core::ir::CoreBuiltinCall* builtin) {
auto* arg = builtin->Args()[0];
if (arg->Type()->Is<core::type::Scalar>()) {
// Calls to `length` with a scalar argument are replaced with `abs`.
auto* call = b.CallWithResult(builtin->DetachResult(), core::BuiltinFn::kAbs, arg);
call->InsertBefore(builtin);
} else {
auto* call = b.CallWithResult<msl::ir::BuiltinCall>(builtin->DetachResult(),
msl::BuiltinFn::kLength, arg);
call->InsertBefore(builtin);
}
builtin->Destroy();
}
/// Polyfill a modf call.
/// @param builtin the builtin call instruction
void Modf(core::ir::CoreBuiltinCall* builtin) {
b.InsertBefore(builtin, [&] {
// MSL's modf returns `fract` and outputs `whole` as an output parameter.
// Polyfill it by declaring the result struct and then setting the values:
// __modf_result result = {};
// result.fract = modf(arg, result.whole);
//
// Note: We need to use a `load` instruction to pass `result.whole`, as the intrinsic
// definition expects a value type (as we do not have reference types in the IR). The
// printer will just fold away the load, which achieves the pass-by-reference semantics
// that we want.
//
auto* result_type = builtin->Result(0)->Type();
auto* element_type = result_type->Element(0);
auto* result = b.Var(ty.ptr(function, result_type));
auto* whole = b.Access(ty.ptr(function, element_type), result, u32(1));
auto args = Vector<core::ir::Value*, 2>{builtin->Args()[0], b.Load(whole)->Result(0)};
auto* call =
b.Call<msl::ir::BuiltinCall>(element_type, msl::BuiltinFn::kModf, std::move(args));
b.Store(b.Access(ty.ptr(function, element_type), result, u32(0)), call);
builtin->Result(0)->ReplaceAllUsesWith(b.Load(result)->Result(0));
});
builtin->Destroy();
}
/// Polyfill an Pack2x16Float call.
/// @param builtin the builtin call instruction
void Pack2x16Float(core::ir::CoreBuiltinCall* builtin) {
// Replace the call with `as_type<uint>(half2(value))`.
b.InsertBefore(builtin, [&] {
auto* convert = b.Convert<vec2<f16>>(builtin->Args()[0]);
auto* bitcast = b.Bitcast(ty.u32(), convert);
bitcast->SetResults(Vector{builtin->DetachResult()});
});
builtin->Destroy();
}
/// Polyfill a quantizeToF16 call.
/// @param builtin the builtin call instruction
void QuantizeToF16(core::ir::CoreBuiltinCall* builtin) {
auto* arg = builtin->Args()[0];
// Convert the argument to f16 and then back again.
b.InsertBefore(builtin, [&] {
b.ConvertWithResult(builtin->DetachResult(),
b.Convert(ty.match_width(ty.f16(), arg->Type()), arg));
});
builtin->Destroy();
}
/// Polyfill a sign call if necessary.
/// @param builtin the builtin call instruction
void Sign(core::ir::CoreBuiltinCall* builtin) {
auto* arg = builtin->Args()[0];
auto* type = arg->Type();
b.InsertBefore(builtin, [&] {
// Calls to `sign` with an integer argument are replaced with select operations:
// result = select(select(-1, 1, arg > 0), 0, arg == 0);
if (type->IsIntegerScalarOrVector()) {
core::ir::Value* pos_one = b.MatchWidth(i32(1), type);
core::ir::Value* neg_one = b.MatchWidth(i32(-1), type);
const core::type::Type* bool_type = ty.match_width(ty.bool_(), type);
auto* zero = b.Zero(type);
auto* sign = b.Call(type, core::BuiltinFn::kSelect, neg_one, pos_one,
b.GreaterThan(bool_type, arg, zero));
b.CallWithResult(builtin->DetachResult(), core::BuiltinFn::kSelect, sign, zero,
b.Equal(bool_type, arg, zero));
} else {
b.CallWithResult<msl::ir::BuiltinCall>(builtin->DetachResult(),
msl::BuiltinFn::kSign, arg);
}
});
builtin->Destroy();
}
/// Replace a textureDimensions call with the equivalent MSL intrinsics.
/// @param builtin the builtin call instruction
void TextureDimensions(core::ir::CoreBuiltinCall* builtin) {
auto* tex = builtin->Args()[0];
auto* type = tex->Type()->As<core::type::Texture>();
bool needs_lod_arg = type->Dim() != core::type::TextureDimension::k1d &&
!type->Is<core::type::MultisampledTexture>() &&
!type->Is<core::type::DepthMultisampledTexture>();
b.InsertBefore(builtin, [&] {
// If we need a LOD argument, use the one provided or default to 0.
core::ir::Value* lod = nullptr;
if (needs_lod_arg) {
if (builtin->Args().Length() == 1) {
lod = b.Value(u32(0));
} else {
lod = builtin->Args()[1];
if (lod->Type()->IsSignedIntegerScalar()) {
lod = b.Convert<u32>(lod)->Result(0);
}
}
}
// Call MSL member functions to get the dimensions of the image.
Vector<core::ir::InstructionResult*, 4> values;
auto get_dim = [&](msl::BuiltinFn fn) {
auto* call = b.MemberCall<msl::ir::MemberBuiltinCall>(ty.u32(), fn, tex);
if (lod) {
call->AppendArg(lod);
}
values.Push(call->Result(0));
};
get_dim(msl::BuiltinFn::kGetWidth);
if (type->Dim() != core::type::TextureDimension::k1d) {
get_dim(msl::BuiltinFn::kGetHeight);
if (type->Dim() == core::type::TextureDimension::k3d) {
get_dim(msl::BuiltinFn::kGetDepth);
}
}
// Reconstruct the original result type from the individual dimensions.
b.ConstructWithResult(builtin->DetachResult(), std::move(values));
});
builtin->Destroy();
}
/// Replace a textureGather call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureGather(core::ir::CoreBuiltinCall* builtin) {
// If there is a component argument it will always be first followed by the texture object.
// Otherwise, the texture object will be first.
core::ir::Value* tex = nullptr;
Vector<core::ir::Value*, 4> args;
auto* component = builtin->Args()[0]->As<core::ir::Constant>();
if (component) {
tex = builtin->Args()[1];
args = builtin->Args().Offset(2);
} else {
tex = builtin->Args()[0];
args = builtin->Args().Offset(1);
}
auto* tex_type = tex->Type()->As<core::type::Texture>();
// Add an offset argument if it was not provided.
const bool has_offset = args.Back()->Type()->IsSignedIntegerVector();
const bool needs_offset = tex_type->Dim() == core::type::TextureDimension::k2d ||
tex_type->Dim() == core::type::TextureDimension::k2dArray;
if (needs_offset && !has_offset) {
args.Push(b.Zero<vec2<i32>>());
}
// Add the component argument if needed, converting it to u32 if necessary.
if (component) {
if (component->Type()->Is<core::type::I32>()) {
component = b.Constant(component->Value()->ValueAs<u32>());
}
args.Push(ir.CreateValue<msl::ir::Component>(component->Value()));
}
// Call the `gather()` member function.
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kGather, tex, std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureGatherCompare call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureGatherCompare(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kGatherCompare, builtin->Args()[0],
std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureLoad call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureLoad(core::ir::CoreBuiltinCall* builtin) {
uint32_t next_arg = 0;
auto* tex = builtin->Args()[next_arg++];
auto* tex_type = tex->Type()->As<core::type::Texture>();
// Extract the arguments from the core builtin call.
auto* coords = builtin->Args()[next_arg++];
core::ir::Value* index = nullptr;
core::ir::Value* lod_or_sample = nullptr;
if (tex_type->Dim() == core::type::TextureDimension::k2dArray) {
index = builtin->Args()[next_arg++];
}
if (tex_type->Dim() != core::type::TextureDimension::k1d &&
!tex_type->Is<core::type::StorageTexture>()) {
lod_or_sample = builtin->Args()[next_arg++];
}
b.InsertBefore(builtin, [&] {
// Convert the coordinates to unsigned integers if necessary.
if (coords->Type()->IsSignedIntegerScalarOrVector()) {
coords = b.Convert(ty.match_width(ty.u32(), coords->Type()), coords)->Result(0);
}
// Call the `read()` member function.
Vector<core::ir::Value*, 4> args{coords};
if (index) {
args.Push(index);
}
if (lod_or_sample) {
args.Push(lod_or_sample);
}
b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kRead, tex, std::move(args));
});
builtin->Destroy();
}
/// Replace a textureNum* call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
/// @param fn the MSL intrinsic function
void TextureNumHelper(core::ir::CoreBuiltinCall* builtin, msl::BuiltinFn fn) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto* tex = builtin->Args()[0];
auto* call =
b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(builtin->DetachResult(), fn, tex);
call->InsertBefore(builtin);
builtin->Destroy();
}
/// Replace texture sample call signed integer array argument with clamped code.
/// @param builtin the builtin call instruction
void TextureSampleClampArrayIndexHelper(msl::ir::MemberBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the
// object.
b.InsertBefore(builtin, [&] {
auto* tex = builtin->Object();
auto* tex_type = tex->Type()->As<core::type::Texture>();
if (IsTextureArray(tex_type->Dim())) {
const uint32_t kArrayIndex = 2;
auto* index_arg = builtin->Args()[kArrayIndex];
if (index_arg->Type()->IsSignedIntegerScalar()) {
builtin->SetArg(kArrayIndex, b.Call(ty.i32(), core::BuiltinFn::kMax, index_arg,
b.Zero<i32>())
->Result(0));
}
}
});
}
/// Replace a textureNumLayers call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureNumLayers(core::ir::CoreBuiltinCall* builtin) {
TextureNumHelper(builtin, msl::BuiltinFn::kGetArraySize);
}
/// Replace a textureNumLevels call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureNumLevels(core::ir::CoreBuiltinCall* builtin) {
TextureNumHelper(builtin, msl::BuiltinFn::kGetNumMipLevels);
}
/// Replace a textureNumSamples call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureNumSamples(core::ir::CoreBuiltinCall* builtin) {
TextureNumHelper(builtin, msl::BuiltinFn::kGetNumSamples);
}
/// Replace a textureSample call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSample(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSample, builtin->Args()[0], std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureSampleBias call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSampleBias(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto* tex = builtin->Args()[0];
auto* tex_type = tex->Type()->As<core::type::Texture>();
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
b.InsertBefore(builtin, [&] {
// Wrap the bias argument in a constructor for the MSL `bias` builtin type.
uint32_t bias_idx = 2;
if (tex_type->Dim() == core::type::TextureDimension::k2dArray ||
tex_type->Dim() == core::type::TextureDimension::kCubeArray) {
bias_idx = 3;
}
args[bias_idx] = b.Construct(ty.Get<msl::type::Bias>(), args[bias_idx])->Result(0);
});
// Call the `sample()` member function.
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSample, tex, std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureSampleCompare call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSampleCompare(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSampleCompare, builtin->Args()[0],
std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureSampleCompareLevel call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSampleCompareLevel(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto* tex = builtin->Args()[0];
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
// The overloads that don't use an offset all have the depth_ref as their final argument.
const bool has_offset = !args.Back()->Type()->Is<core::type::F32>();
b.InsertBefore(builtin, [&] {
// Insert a constant zero LOD argument.
// The LOD goes before the offset if there is one, otherwise at the end.
auto* lod = b.Construct(ty.Get<msl::type::Level>(), u32(0))->Result(0);
if (has_offset) {
args.Insert(args.Length() - 1, lod);
} else {
args.Push(lod);
}
});
// Call the `sample_compare()` member function.
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSampleCompare, tex, std::move(args));
call->InsertBefore(builtin);
TextureSampleClampArrayIndexHelper(call);
builtin->Destroy();
}
/// Replace a textureSampleGrad call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSampleGrad(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto* tex = builtin->Args()[0];
auto* tex_type = tex->Type()->As<core::type::Texture>();
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
b.InsertBefore(builtin, [&] {
// Find the ddx and ddy arguments.
uint32_t grad_idx = 2;
if (tex_type->Dim() == core::type::TextureDimension::k2dArray ||
tex_type->Dim() == core::type::TextureDimension::kCubeArray) {
grad_idx = 3;
}
auto* ddx = args[grad_idx];
auto* ddy = args[grad_idx + 1];
// Wrap the ddx and ddy arguments in a constructor for the MSL `gradient` builtin type.
enum type::Gradient::Dim dim;
switch (tex_type->Dim()) {
case core::type::TextureDimension::k2d:
case core::type::TextureDimension::k2dArray:
dim = type::Gradient::Dim::k2d;
break;
case core::type::TextureDimension::k3d:
dim = type::Gradient::Dim::k3d;
break;
case core::type::TextureDimension::kCube:
case core::type::TextureDimension::kCubeArray:
dim = type::Gradient::Dim::kCube;
break;
case core::type::TextureDimension::k1d:
case core::type::TextureDimension::kNone:
TINT_UNREACHABLE();
}
args[grad_idx] = b.Construct(ty.Get<msl::type::Gradient>(dim), ddx, ddy)->Result(0);
// Resize the argument list as the gradient argument only takes up one argument.
// Move the offset argument back one place if present.
const bool has_offset = args.Back()->Type()->IsSignedIntegerVector();
if (has_offset) {
args[args.Length() - 2] = args.Back();
}
args.Resize(args.Length() - 1);
// Call the `sample()` member function.
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSample, tex, std::move(args));
TextureSampleClampArrayIndexHelper(call);
});
builtin->Destroy();
}
/// Replace a textureSampleLevel call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureSampleLevel(core::ir::CoreBuiltinCall* builtin) {
// The MSL intrinsic is a member function, so we split the first argument off as the object.
auto* tex = builtin->Args()[0];
auto* tex_type = tex->Type()->As<core::type::Texture>();
auto args = Vector<core::ir::Value*, 4>(builtin->Args().Offset(1));
b.InsertBefore(builtin, [&] {
// Wrap the LOD argument in a constructor for the MSL `level` builtin type.
uint32_t lod_idx = 2;
if (tex_type->Dim() == core::type::TextureDimension::k2dArray ||
tex_type->Dim() == core::type::TextureDimension::kCubeArray) {
lod_idx = 3;
}
args[lod_idx] = b.Construct(ty.Get<msl::type::Level>(), args[lod_idx])->Result(0);
// Call the `sample()` member function.
auto* call = b.MemberCallWithResult<msl::ir::MemberBuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kSample, tex, std::move(args));
TextureSampleClampArrayIndexHelper(call);
});
builtin->Destroy();
}
/// Replace a textureStore call with the equivalent MSL intrinsic.
/// @param builtin the builtin call instruction
void TextureStore(core::ir::CoreBuiltinCall* builtin) {
auto* tex = builtin->Args()[0];
auto* tex_type = tex->Type()->As<core::type::StorageTexture>();
// Extract the arguments from the core builtin call.
auto* coords = builtin->Args()[1];
core::ir::Value* value = nullptr;
core::ir::Value* index = nullptr;
if (tex_type->Dim() == core::type::TextureDimension::k2dArray) {
index = builtin->Args()[2];
value = builtin->Args()[3];
} else {
value = builtin->Args()[2];
}
b.InsertBefore(builtin, [&] {
// Convert the coordinates to unsigned integers if necessary.
if (coords->Type()->IsSignedIntegerScalarOrVector()) {
coords = b.Convert(ty.match_width(ty.u32(), coords->Type()), coords)->Result(0);
}
// Call the `write()` member function.
Vector<core::ir::Value*, 4> args;
args.Push(value);
args.Push(coords);
if (index) {
args.Push(index);
}
b.MemberCall<msl::ir::MemberBuiltinCall>(ty.void_(), msl::BuiltinFn::kWrite, tex,
std::move(args));
// If we are writing to a read-write texture, add a fence to ensure that the written
// values are visible to subsequent reads from the same thread.
if (tex_type->Access() == core::Access::kReadWrite) {
b.MemberCall<msl::ir::MemberBuiltinCall>(ty.void_(), msl::BuiltinFn::kFence, tex);
}
});
builtin->Destroy();
}
/// Replace a barrier builtin with the `threadgroupBarrier()` intrinsic.
/// @param builtin the builtin call instruction
/// @param type the barrier type
void ThreadgroupBarrier(core::ir::CoreBuiltinCall* builtin, BarrierType type) {
// Replace the builtin call with a call to the msl.threadgroup_barrier intrinsic.
auto args = Vector<core::ir::Value*, 1>{b.Constant(u32(type))};
auto* call = b.CallWithResult<msl::ir::BuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kThreadgroupBarrier, std::move(args));
call->InsertBefore(builtin);
builtin->Destroy();
}
/// Replace a quadSwap* builtin with the `quad_shuffle_xor()` intrinsic.
/// @param builtin the builtin call instruction
/// @param mask the shuffle mask
void QuadSwap(core::ir::CoreBuiltinCall* builtin, uint32_t mask) {
// Replace the builtin call with a call to the msl.quad_shuffle_xor intrinsic.
auto args = Vector<core::ir::Value*, 2>{builtin->Args()[0], b.Constant(u32(mask))};
auto* call = b.CallWithResult<msl::ir::BuiltinCall>(
builtin->DetachResult(), msl::BuiltinFn::kQuadShuffleXor, std::move(args));
call->InsertBefore(builtin);
builtin->Destroy();
}
/// Polyfill an Unpack2x16Float call.
/// @param builtin the builtin call instruction
void Unpack2x16Float(core::ir::CoreBuiltinCall* builtin) {
// Replace the call with `float2(as_type<half2>(value))`.
b.InsertBefore(builtin, [&] {
auto* bitcast = b.Bitcast<vec2<f16>>(builtin->Args()[0]);
b.ConvertWithResult(builtin->DetachResult(), bitcast);
});
builtin->Destroy();
}
};
} // namespace
Result<SuccessType> BuiltinPolyfill(core::ir::Module& ir) {
auto result = ValidateAndDumpIfNeeded(ir, "BuiltinPolyfill transform",
core::ir::Capabilities{
core::ir::Capability::kAllowPointersInStructures,
});
if (result != Success) {
return result.Failure();
}
State{ir}.Process();
return Success;
}
} // namespace tint::msl::writer::raise