blob: 19a0b2c9e39c8110f18be6f93878a3963732db9a [file] [log] [blame]
// Copyright 2023 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "src/tint/lang/core/ir/transform/builtin_polyfill.h"
#include "src/tint/lang/core/binary_op.h"
#include "src/tint/lang/core/ir/builder.h"
#include "src/tint/lang/core/ir/module.h"
#include "src/tint/lang/core/ir/validator.h"
#include "src/tint/lang/core/type/sampled_texture.h"
#include "src/tint/lang/core/type/texture.h"
using namespace tint::core::fluent_types; // NOLINT
using namespace tint::core::number_suffixes; // NOLINT
namespace tint::core::ir::transform {
namespace {
/// Constant value used to polyfill the radians() builtin.
static constexpr double kDegToRad = 0.017453292519943295474;
/// Constant value used to polyfill the degrees() builtin.
static constexpr double kRadToDeg = 57.295779513082322865;
/// PIMPL state for the transform.
struct State {
/// The polyfill config.
const BuiltinPolyfillConfig& config;
/// The IR module.
Module& ir;
/// The IR builder.
Builder b{ir};
/// The type manager.
core::type::Manager& ty{ir.Types()};
/// The symbol table.
SymbolTable& sym{ir.symbols};
/// Process the module.
void Process() {
// Find the builtin call instructions that may need to be polyfilled.
Vector<ir::CoreBuiltinCall*, 4> worklist;
for (auto* inst : ir.Instructions()) {
if (auto* builtin = inst->As<ir::CoreBuiltinCall>()) {
switch (builtin->Func()) {
case core::BuiltinFn::kClamp:
if (config.clamp_int &&
builtin->Result()->Type()->IsIntegerScalarOrVector()) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kAbs:
if (config.abs_signed_int &&
builtin->Result()->Type()->IsSignedIntegerScalarOrVector()) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kCountLeadingZeros:
if (config.count_leading_zeros) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kCountTrailingZeros:
if (config.count_trailing_zeros) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kDegrees:
if (config.degrees) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kSmoothstep:
worklist.Push(builtin);
break;
case core::BuiltinFn::kExtractBits:
if (config.extract_bits != BuiltinPolyfillLevel::kNone) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kFirstLeadingBit:
if (config.first_leading_bit) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kFirstTrailingBit:
if (config.first_trailing_bit) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kFwidthFine:
if (config.fwidth_fine) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kInsertBits:
if (config.insert_bits != BuiltinPolyfillLevel::kNone) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kRadians:
if (config.radians) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kReflect:
if (config.reflect_vec2_f32) {
// Polyfill for vec2<f32>. See crbug.com/tint/1798
auto* vec_ty = builtin->Result()->Type()->As<core::type::Vector>();
if (vec_ty->Width() == 2 && vec_ty->Type()->Is<core::type::F32>()) {
worklist.Push(builtin);
}
}
break;
case core::BuiltinFn::kSaturate:
if (config.saturate) {
worklist.Push(builtin);
}
break;
case core::BuiltinFn::kTextureSampleBias:
worklist.Push(builtin);
break;
case core::BuiltinFn::kTextureSampleBaseClampToEdge:
if (config.texture_sample_base_clamp_to_edge_2d_f32) {
auto* tex =
builtin->Args()[0]->Type()->As<core::type::SampledTexture>();
if (tex && tex->Dim() == core::type::TextureDimension::k2d &&
tex->Type()->Is<core::type::F32>()) {
worklist.Push(builtin);
}
}
break;
case core::BuiltinFn::kDot4U8Packed:
case core::BuiltinFn::kDot4I8Packed: {
if (config.dot_4x8_packed) {
worklist.Push(builtin);
}
break;
}
case core::BuiltinFn::kPack4XI8:
case core::BuiltinFn::kPack4XU8:
case core::BuiltinFn::kPack4XI8Clamp:
case core::BuiltinFn::kUnpack4XI8:
case core::BuiltinFn::kUnpack4XU8: {
if (config.pack_unpack_4x8) {
worklist.Push(builtin);
}
break;
}
case core::BuiltinFn::kPack4XU8Clamp: {
if (config.pack_4xu8_clamp) {
worklist.Push(builtin);
}
break;
}
case core::BuiltinFn::kPack4X8Snorm:
case core::BuiltinFn::kPack4X8Unorm:
case core::BuiltinFn::kUnpack4X8Snorm:
case core::BuiltinFn::kUnpack4X8Unorm:
if (config.pack_unpack_4x8_norm) {
worklist.Push(builtin);
}
break;
default:
break;
}
}
}
// Polyfill the builtin call instructions that we found.
for (auto* builtin : worklist) {
switch (builtin->Func()) {
case core::BuiltinFn::kClamp:
ClampInt(builtin);
break;
case core::BuiltinFn::kAbs:
AbsSignedInt(builtin);
break;
case core::BuiltinFn::kCountLeadingZeros:
CountLeadingZeros(builtin);
break;
case core::BuiltinFn::kCountTrailingZeros:
CountTrailingZeros(builtin);
break;
case core::BuiltinFn::kDegrees:
Degrees(builtin);
break;
case core::BuiltinFn::kSmoothstep:
SmoothStep(builtin);
break;
case core::BuiltinFn::kExtractBits:
ExtractBits(builtin);
break;
case core::BuiltinFn::kFirstLeadingBit:
FirstLeadingBit(builtin);
break;
case core::BuiltinFn::kFirstTrailingBit:
FirstTrailingBit(builtin);
break;
case core::BuiltinFn::kFwidthFine:
FwidthFine(builtin);
break;
case core::BuiltinFn::kInsertBits:
InsertBits(builtin);
break;
case core::BuiltinFn::kRadians:
Radians(builtin);
break;
case core::BuiltinFn::kReflect:
Reflect(builtin);
break;
case core::BuiltinFn::kSaturate:
Saturate(builtin);
break;
case core::BuiltinFn::kTextureSampleBaseClampToEdge:
TextureSampleBaseClampToEdge_2d_f32(builtin);
break;
case core::BuiltinFn::kTextureSampleBias:
TextureSampleBiasClamp(builtin);
break;
case core::BuiltinFn::kDot4I8Packed:
Dot4I8Packed(builtin);
break;
case core::BuiltinFn::kDot4U8Packed:
Dot4U8Packed(builtin);
break;
case core::BuiltinFn::kPack4XI8:
Pack4xI8(builtin);
break;
case core::BuiltinFn::kPack4XU8:
Pack4xU8(builtin);
break;
case core::BuiltinFn::kPack4XI8Clamp:
Pack4xI8Clamp(builtin);
break;
case core::BuiltinFn::kPack4XU8Clamp:
Pack4xU8Clamp(builtin);
break;
case core::BuiltinFn::kUnpack4XI8:
Unpack4xI8(builtin);
break;
case core::BuiltinFn::kUnpack4XU8:
Unpack4xU8(builtin);
break;
case core::BuiltinFn::kPack4X8Snorm:
Pack4x8Snorm(builtin);
break;
case core::BuiltinFn::kPack4X8Unorm:
Pack4x8Unorm(builtin);
break;
case core::BuiltinFn::kUnpack4X8Snorm:
Unpack4x8Snorm(builtin);
break;
case core::BuiltinFn::kUnpack4X8Unorm:
Unpack4x8Unorm(builtin);
break;
default:
break;
}
}
}
/// Polyfill a `pack4x8snorm` builtin call
void Pack4x8Snorm(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4f = ty.vec4<f32>();
auto* vec4u = ty.vec4<u32>();
auto* neg_one = b.Splat(vec4f, -1_f);
auto* one = b.Splat(vec4f, 1_f);
core::ir::Value* v =
b.Call(vec4f, core::BuiltinFn::kClamp, Vector{arg, neg_one, one})->Result();
v = b.Multiply(vec4f, b.Splat(vec4f, 127_f), v)->Result();
v = b.Add(vec4f, b.Splat(vec4f, 0.5_f), v)->Result();
v = b.Call(vec4f, core::BuiltinFn::kFloor, Vector{v})->Result();
v = b.Convert(ty.vec4<i32>(), v)->Result();
v = b.Bitcast(vec4u, v)->Result();
v = b.And(vec4u, v, b.Splat(vec4u, 0xff_u))->Result();
v = b.ShiftLeft(vec4u, v, b.Construct(vec4u, 0_u, 8_u, 16_u, 24_u))->Result();
auto* x = b.Access(ty.u32(), v, 0_u);
auto* y = b.Access(ty.u32(), v, 1_u);
auto* z = b.Access(ty.u32(), v, 2_u);
auto* w = b.Access(ty.u32(), v, 3_u);
v = b.Or(ty.u32(), x, b.Or(ty.u32(), y, b.Or(ty.u32(), z, w)))->Result();
call->Result()->ReplaceAllUsesWith(v);
});
call->Destroy();
}
/// Polyfill a `pack4x8unorm` builtin call
void Pack4x8Unorm(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4f = ty.vec4<f32>();
auto* vec4u = ty.vec4<u32>();
auto* zero = b.Zero(vec4f);
auto* one = b.Splat(vec4f, 1_f);
auto* v = b.Call(vec4f, core::BuiltinFn::kClamp, Vector{arg, zero, one})->Result();
v = b.Multiply(vec4f, b.Splat(vec4f, 255_f), v)->Result();
v = b.Add(vec4f, b.Splat(vec4f, 0.5_f), v)->Result();
v = b.Call(vec4f, core::BuiltinFn::kFloor, Vector{v})->Result();
v = b.Convert(vec4u, v)->Result();
v = b.And(vec4u, v, b.Splat(vec4u, 0xff_u))->Result();
v = b.ShiftLeft(vec4u, v, b.Construct(vec4u, 0_u, 8_u, 16_u, 24_u))->Result();
auto* x = b.Access(ty.u32(), v, 0_u);
auto* y = b.Access(ty.u32(), v, 1_u);
auto* z = b.Access(ty.u32(), v, 2_u);
auto* w = b.Access(ty.u32(), v, 3_u);
v = b.Or(ty.u32(), x, b.Or(ty.u32(), y, b.Or(ty.u32(), z, w)))->Result();
call->Result()->ReplaceAllUsesWith(v);
});
call->Destroy();
}
/// Polyfill a `unpack4x8snorm` builtin call
void Unpack4x8Snorm(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4f = ty.vec4<f32>();
auto* vec4u = ty.vec4<u32>();
auto* vec4i = ty.vec4<i32>();
auto* v = b.Construct(vec4u, arg)->Result();
// Shift left to put the 8th bit of each number into the sign bit location, we then
// convert to an i32 and shift back, so the sign bit will be set as needed. The bits
// outside the bottom 8 are then masked off.
v = b.ShiftLeft(vec4u, v, b.Construct(vec4u, 24_u, 16_u, 8_u, 0_u))->Result();
v = b.Bitcast(vec4i, v)->Result();
v = b.ShiftRight(vec4i, v, b.Splat(vec4u, 24_u))->Result();
v = b.Convert(vec4f, v)->Result();
v = b.Divide(vec4f, v, b.Splat(vec4f, 127_f))->Result();
v = b.Call(vec4f, core::BuiltinFn::kMax, v, b.Splat(vec4f, -1_f))->Result();
call->Result()->ReplaceAllUsesWith(v);
});
call->Destroy();
}
/// Polyfill a `unpack4x8unorm` builtin call
void Unpack4x8Unorm(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4f = ty.vec4<f32>();
auto* vec4u = ty.vec4<u32>();
auto* v = b.Construct(vec4u, arg)->Result();
v = b.ShiftRight(vec4u, v, b.Construct(vec4u, 0_u, 8_u, 16_u, 24_u))->Result();
v = b.And(vec4u, v, b.Splat(vec4u, 0xff_u))->Result();
v = b.Convert(vec4f, v)->Result();
v = b.Divide(vec4f, v, b.Splat(vec4f, 255_f))->Result();
call->Result()->ReplaceAllUsesWith(v);
});
call->Destroy();
}
/// Polyfill a `clamp()` builtin call for integers.
/// @param call the builtin call instruction
void ClampInt(ir::CoreBuiltinCall* call) {
auto* type = call->Result()->Type();
auto* e = call->Args()[0];
auto* low = call->Args()[1];
auto* high = call->Args()[2];
b.InsertBefore(call, [&] {
auto* max = b.Call(type, core::BuiltinFn::kMax, e, low);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kMin, max, high);
});
call->Destroy();
}
/// Polyfill a `abs()` builtin call for signed integers.
/// @param call the builtin call instruction
void AbsSignedInt(ir::CoreBuiltinCall* call) {
auto* type = call->Result()->Type();
auto* e = call->Args()[0];
b.InsertBefore(call, [&] {
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kMax, e, b.Negation(type, e));
});
call->Destroy();
}
/// Polyfill a `countLeadingZeros()` builtin call.
/// @param call the builtin call instruction
void CountLeadingZeros(ir::CoreBuiltinCall* call) {
auto* input = call->Args()[0];
auto* result_ty = input->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
auto* bool_ty = ty.MatchWidth(ty.bool_(), result_ty);
// Make an u32 constant with the same component count as result_ty.
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
// %x = %input;
// if (%x is signed) {
// %x = bitcast<u32>(%x)
// }
// %b16 = select(0, 16, %x <= 0x0000ffff);
// %x <<= %b16;
// %b8 = select(0, 8, %x <= 0x00ffffff);
// %x <<= %b8;
// %b4 = select(0, 4, %x <= 0x0fffffff);
// %x <<= %b4;
// %b2 = select(0, 2, %x <= 0x3fffffff);
// %x <<= %b2;
// %b1 = select(0, 1, %x <= 0x7fffffff);
// %b0 = select(0, 1, %x == 0);
// %result = (%b16 | %b8 | %b4 | %b2 | %b1) + %b0;
auto* x = input;
if (result_ty->IsSignedIntegerScalarOrVector()) {
x = b.Bitcast(uint_ty, x)->Result();
}
auto* b16 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(16),
b.LessThanEqual(bool_ty, x, V(0x0000ffff)));
x = b.ShiftLeft(uint_ty, x, b16)->Result();
auto* b8 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(8),
b.LessThanEqual(bool_ty, x, V(0x00ffffff)));
x = b.ShiftLeft(uint_ty, x, b8)->Result();
auto* b4 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(4),
b.LessThanEqual(bool_ty, x, V(0x0fffffff)));
x = b.ShiftLeft(uint_ty, x, b4)->Result();
auto* b2 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(2),
b.LessThanEqual(bool_ty, x, V(0x3fffffff)));
x = b.ShiftLeft(uint_ty, x, b2)->Result();
auto* b1 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(1),
b.LessThanEqual(bool_ty, x, V(0x7fffffff)));
auto* b0 =
b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(1), b.Equal(bool_ty, x, V(0)));
Instruction* result = b.Add(
uint_ty,
b.Or(
uint_ty, b16,
b.Or(uint_ty, b8, b.Or(uint_ty, b4, b.Or(uint_ty, b2, b.Or(uint_ty, b1, b0))))),
b0);
if (result_ty->IsSignedIntegerScalarOrVector()) {
result = b.Bitcast(result_ty, result);
}
result->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill a `countTrailingZeros()` builtin call.
/// @param call the builtin call instruction
void CountTrailingZeros(ir::CoreBuiltinCall* call) {
auto* input = call->Args()[0];
auto* result_ty = input->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
auto* bool_ty = ty.MatchWidth(ty.bool_(), result_ty);
// Make an u32 constant with the same component count as result_ty.
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
// %x = %input;
// if (%x is signed) {
// %x = bitcast<u32>(%x)
// }
// %b16 = select(0, 16, (%x & 0x0000ffff) == 0);
// %x >>= %b16;
// %b8 = select(0, 8, (%x & 0x000000ff) == 0);
// %x >>= %b8;
// %b4 = select(0, 4, (%x & 0x0000000f) == 0);
// %x >>= %b4;
// %b2 = select(0, 2, (%x & 0x00000003) == 0);
// %x >>= %b2;
// %b1 = select(0, 1, (%x & 0x00000001) == 0);
// %b0 = select(0, 1, (%x & 0x00000001) == 0);
// %result = (%b16 | %b8 | %b4 | %b2 | %b1) + %b0;
auto* x = input;
if (result_ty->IsSignedIntegerScalarOrVector()) {
x = b.Bitcast(uint_ty, x)->Result();
}
auto* b16 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(16),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000ffff)), V(0)));
x = b.ShiftRight(uint_ty, x, b16)->Result();
auto* b8 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(8),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x000000ff)), V(0)));
x = b.ShiftRight(uint_ty, x, b8)->Result();
auto* b4 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(4),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000000f)), V(0)));
x = b.ShiftRight(uint_ty, x, b4)->Result();
auto* b2 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(2),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x00000003)), V(0)));
x = b.ShiftRight(uint_ty, x, b2)->Result();
auto* b1 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(1),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x00000001)), V(0)));
auto* b0 =
b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(1), b.Equal(bool_ty, x, V(0)));
Instruction* result = b.Add(
uint_ty,
b.Or(uint_ty, b16, b.Or(uint_ty, b8, b.Or(uint_ty, b4, b.Or(uint_ty, b2, b1)))),
b0);
if (result_ty->IsSignedIntegerScalarOrVector()) {
result = b.Bitcast(result_ty, result);
}
result->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill an `degrees()` builtin call.
/// @param call the builtin call instruction
void Degrees(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
auto* type = arg->Type()->DeepestElement();
ir::Value* value = nullptr;
if (type->Is<core::type::F16>()) {
value = b.Constant(f16(kRadToDeg));
} else if (type->Is<core::type::F32>()) {
value = b.Constant(f32(kRadToDeg));
}
b.InsertBefore(call, [&] {
auto* mul = b.Multiply(arg->Type(), arg, value);
mul->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill an `smoothStep()` builtin call.
/// @param call the builtin call instruction
void SmoothStep(ir::CoreBuiltinCall* call) {
auto* edge0_arg = call->Args()[0];
auto* edge1_arg = call->Args()[1];
auto* x_arg = call->Args()[2];
auto* type = x_arg->Type();
ir::Constant* zero = nullptr;
ir::Constant* one = nullptr;
ir::Constant* two = nullptr;
ir::Constant* three = nullptr;
if (type->DeepestElement()->Is<core::type::F32>()) {
zero = b.MatchWidth(0_f, type);
one = b.MatchWidth(1_f, type);
two = b.MatchWidth(2_f, type);
three = b.MatchWidth(3_f, type);
} else if (type->DeepestElement()->Is<core::type::F16>()) {
zero = b.MatchWidth(0_h, type);
one = b.MatchWidth(1_h, type);
two = b.MatchWidth(2_h, type);
three = b.MatchWidth(3_h, type);
}
b.InsertBefore(call, [&] {
auto* dividend = b.Subtract(type, x_arg, edge0_arg);
auto* divisor = b.Subtract(type, edge1_arg, edge0_arg);
auto* quotient = b.Divide(type, dividend, divisor);
auto* t_clamped = b.Call(type, core::BuiltinFn::kClamp, quotient, zero, one);
// Smoothstep is a well defined function.
// result = t * t * (3.0 - 2.0 * t);
auto* smooth_result =
b.Multiply(type, t_clamped,
b.Multiply(type, t_clamped,
b.Subtract(type, three, b.Multiply(type, two, t_clamped))));
smooth_result->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill an `extractBits()` builtin call.
/// @param call the builtin call instruction
void ExtractBits(ir::CoreBuiltinCall* call) {
auto* offset = call->Args()[1];
auto* count = call->Args()[2];
switch (config.extract_bits) {
case BuiltinPolyfillLevel::kClampOrRangeCheck: {
b.InsertBefore(call, [&] {
// Replace:
// extractBits(e, offset, count)
// With:
// let o = min(offset, 32);
// let c = min(count, w - o);
// extractBits(e, o, c);
auto* o = b.Call(ty.u32(), core::BuiltinFn::kMin, offset, 32_u);
auto* c = b.Call(ty.u32(), core::BuiltinFn::kMin, count,
b.Subtract(ty.u32(), 32_u, o));
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 1, o->Result());
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 2, c->Result());
});
} break;
case BuiltinPolyfillLevel::kFull: {
// Replace:
// result = extractBits(e, offset, count)
// With:
// let s = min(offset, 32u);
// let t = min(32u, (s + count));
// let shl = (32u - t);
// let shr = (shl + s
// let shl_result = select(i32(), (e << shl), (shl < 32u));
// result = select(((shl_result >> 31u) >> 1u), (shl_result >> shr), (shr < 32u));
// }
auto* e = call->Args()[0];
auto* result_ty = e->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
auto* s = b.Call<u32>(core::BuiltinFn::kMin, offset, 32_u);
auto* t = b.Call<u32>(core::BuiltinFn::kMin, 32_u, b.Add(ty.u32(), s, count));
auto* shl = b.Subtract<u32>(32_u, t);
auto* shr = b.Add<u32>(shl, s);
auto* f1 = b.Zero(result_ty);
auto* t1 = b.ShiftLeft(result_ty, e, b.Construct(uint_ty, shl));
auto* shl_result = b.Call(result_ty, core::BuiltinFn::kSelect, f1, t1,
b.LessThan<bool>(shl, 32_u));
auto* f2 =
b.ShiftRight(result_ty, b.ShiftRight(result_ty, shl_result, V(31)), V(1));
auto* t2 = b.ShiftRight(result_ty, shl_result, b.Construct(uint_ty, shr));
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kSelect, f2, t2,
b.LessThan<bool>(shr, 32_u));
});
call->Destroy();
} break;
default:
TINT_UNIMPLEMENTED() << "extractBits polyfill level";
}
}
/// Polyfill a `firstLeadingBit()` builtin call.
/// @param call the builtin call instruction
void FirstLeadingBit(ir::CoreBuiltinCall* call) {
auto* input = call->Args()[0];
auto* result_ty = input->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
auto* bool_ty = ty.MatchWidth(ty.bool_(), result_ty);
// Make an u32 constant with the same component count as result_ty.
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
// %x = %input;
// if (%x is signed) {
// %x = select(u32(%x), ~u32(%x), x > 0x80000000);
// }
// %b16 = select(16, 0, (%x & 0xffff0000) == 0);
// %x >>= %b16;
// %b8 = select(8, 0, (%x & 0x0000ff00) == 0);
// %x >>= %b8;
// %b4 = select(4, 0, (%x & 0x000000f0) == 0);
// %x >>= %b4;
// %b2 = select(2, 0, (%x & 0x0000000c) == 0);
// %x >>= %b2;
// %b1 = select(1, 0, (%x & 0x00000002) == 0);
// %result = %b16 | %b8 | %b4 | %b2 | %b1;
// %result = select(%result, 0xffffffff, %x == 0);
auto* x = input;
if (result_ty->IsSignedIntegerScalarOrVector()) {
x = b.Bitcast(uint_ty, x)->Result();
auto* inverted = b.Complement(uint_ty, x);
x = b.Call(uint_ty, core::BuiltinFn::kSelect, inverted, x,
b.LessThan(bool_ty, x, V(0x80000000)))
->Result();
}
auto* b16 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(16), V(0),
b.Equal(bool_ty, b.And(uint_ty, x, V(0xffff0000)), V(0)));
x = b.ShiftRight(uint_ty, x, b16)->Result();
auto* b8 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(8), V(0),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000ff00)), V(0)));
x = b.ShiftRight(uint_ty, x, b8)->Result();
auto* b4 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(4), V(0),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x000000f0)), V(0)));
x = b.ShiftRight(uint_ty, x, b4)->Result();
auto* b2 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(2), V(0),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000000c)), V(0)));
x = b.ShiftRight(uint_ty, x, b2)->Result();
auto* b1 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(1), V(0),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x00000002)), V(0)));
Instruction* result =
b.Or(uint_ty, b16, b.Or(uint_ty, b8, b.Or(uint_ty, b4, b.Or(uint_ty, b2, b1))));
result = b.Call(uint_ty, core::BuiltinFn::kSelect, result, V(0xffffffff),
b.Equal(bool_ty, x, V(0)));
if (result_ty->IsSignedIntegerScalarOrVector()) {
result = b.Bitcast(result_ty, result);
}
result->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill a `firstTrailingBit()` builtin call.
/// @param call the builtin call instruction
void FirstTrailingBit(ir::CoreBuiltinCall* call) {
auto* input = call->Args()[0];
auto* result_ty = input->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
auto* bool_ty = ty.MatchWidth(ty.bool_(), result_ty);
// Make an u32 constant with the same component count as result_ty.
auto V = [&](uint32_t u) { return b.MatchWidth(u32(u), result_ty); };
b.InsertBefore(call, [&] {
// %x = %input;
// if (%x is signed) {
// %x = bitcast<u32>(%x)
// }
// %b16 = select(0, 16, (%x & 0x0000ffff) == 0);
// %x >>= %b16;
// %b8 = select(0, 8, (%x & 0x000000ff) == 0);
// %x >>= %b8;
// %b4 = select(0, 4, (%x & 0x0000000f) == 0);
// %x >>= %b4;
// %b2 = select(0, 2, (%x & 0x00000003) == 0);
// %x >>= %b2;
// %b1 = select(0, 1, (%x & 0x00000001) == 0);
// %result = %b16 | %b8 | %b4 | %b2 | %b1;
// %result = select(%result, 0xffffffff, %x == 0);
auto* x = input;
if (result_ty->IsSignedIntegerScalarOrVector()) {
x = b.Bitcast(uint_ty, x)->Result();
}
auto* b16 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(16),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000ffff)), V(0)));
x = b.ShiftRight(uint_ty, x, b16)->Result();
auto* b8 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(8),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x000000ff)), V(0)));
x = b.ShiftRight(uint_ty, x, b8)->Result();
auto* b4 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(4),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x0000000f)), V(0)));
x = b.ShiftRight(uint_ty, x, b4)->Result();
auto* b2 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(2),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x00000003)), V(0)));
x = b.ShiftRight(uint_ty, x, b2)->Result();
auto* b1 = b.Call(uint_ty, core::BuiltinFn::kSelect, V(0), V(1),
b.Equal(bool_ty, b.And(uint_ty, x, V(0x00000001)), V(0)));
Instruction* result =
b.Or(uint_ty, b16, b.Or(uint_ty, b8, b.Or(uint_ty, b4, b.Or(uint_ty, b2, b1))));
result = b.Call(uint_ty, core::BuiltinFn::kSelect, result, V(0xffffffff),
b.Equal(bool_ty, x, V(0)));
if (result_ty->IsSignedIntegerScalarOrVector()) {
result = b.Bitcast(result_ty, result);
}
result->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill a `fwidthFine()` builtin call.
/// @param call the builtin call instruction
void FwidthFine(ir::CoreBuiltinCall* call) {
auto* value = call->Args()[0];
auto* type = value->Type();
b.InsertBefore(call, [&] {
auto* dpdx = b.Call(type, core::BuiltinFn::kDpdxFine, value);
auto* dpdy = b.Call(type, core::BuiltinFn::kDpdyFine, value);
auto* abs_dpdx = b.Call(type, core::BuiltinFn::kAbs, dpdx);
auto* abs_dpdy = b.Call(type, core::BuiltinFn::kAbs, dpdy);
auto* result = b.Add(type, abs_dpdx, abs_dpdy);
call->Result()->ReplaceAllUsesWith(result->Result());
});
call->Destroy();
}
/// Polyfill an `insertBits()` builtin call.
/// @param call the builtin call instruction
void InsertBits(ir::CoreBuiltinCall* call) {
auto* offset = call->Args()[2];
auto* count = call->Args()[3];
switch (config.insert_bits) {
case BuiltinPolyfillLevel::kClampOrRangeCheck: {
b.InsertBefore(call, [&] {
// Replace:
// insertBits(e, newbits, offset, count)
// With:
// let o = min(offset, 32);
// let c = min(count, w - o);
// insertBits(e, newbits, o, c);
auto* o = b.Call(ty.u32(), core::BuiltinFn::kMin, offset, 32_u);
auto* c = b.Call(ty.u32(), core::BuiltinFn::kMin, count,
b.Subtract(ty.u32(), 32_u, o));
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 2, o->Result());
call->SetOperand(ir::CoreBuiltinCall::kArgsOperandOffset + 3, c->Result());
});
} break;
case BuiltinPolyfillLevel::kFull: {
// Replace:
// result = insertBits(e, newbits, offset, count)
// With:
// let oc = (offset + count);
// let mask = ((select(0u, (1u << offset), (offset < 32u)) - 1u)
// ^ (select(0u, (1u << oc), (oc < 32u)) - 1u));
// result = ((select(i32(), (newbits << offset), (offset < 32u)) & i32(mask))
// | (e & i32(~(mask))));
auto* e = call->Args()[0];
auto* newbits = call->Args()[1];
auto* result_ty = e->Type();
auto* uint_ty = ty.MatchWidth(ty.u32(), result_ty);
b.InsertBefore(call, [&] {
auto* oc = b.Add<u32>(offset, count);
auto* t1 = b.ShiftLeft<u32>(1_u, offset);
auto* s1 = b.Call<u32>(core::BuiltinFn::kSelect, b.Zero<u32>(), t1,
b.LessThan<bool>(offset, 32_u));
auto* t2 = b.ShiftLeft<u32>(1_u, oc);
auto* s2 = b.Call<u32>(core::BuiltinFn::kSelect, b.Zero<u32>(), t2,
b.LessThan<bool>(oc, 32_u));
auto* mask_lhs = b.Subtract<u32>(s1, 1_u);
auto* mask_rhs = b.Subtract<u32>(s2, 1_u);
auto* mask = b.Xor<u32>(mask_lhs, mask_rhs);
auto* f3 = b.Zero(result_ty);
auto* t3 = b.ShiftLeft(result_ty, newbits, b.Construct(uint_ty, offset));
auto* s3 = b.Call(result_ty, core::BuiltinFn::kSelect, f3, t3,
b.LessThan<bool>(offset, 32_u));
auto* result_lhs = b.And(result_ty, s3, b.Construct(result_ty, mask));
auto* result_rhs =
b.And(result_ty, e, b.Construct(result_ty, b.Complement<u32>(mask)));
auto* result = b.Or(result_ty, result_lhs, result_rhs);
result->SetResult(call->DetachResult());
});
call->Destroy();
} break;
default:
TINT_UNIMPLEMENTED() << "insertBits polyfill level";
}
}
/// Polyfill an `radians()` builtin call.
/// @param call the builtin call instruction
void Radians(ir::CoreBuiltinCall* call) {
auto* arg = call->Args()[0];
auto* type = arg->Type()->DeepestElement();
ir::Value* value = nullptr;
if (type->Is<core::type::F16>()) {
value = b.Constant(f16(kDegToRad));
} else if (type->Is<core::type::F32>()) {
value = b.Constant(f32(kDegToRad));
}
b.InsertBefore(call, [&] {
auto* mul = b.Multiply(arg->Type(), arg, value);
mul->SetResult(call->DetachResult());
});
call->Destroy();
}
/// Polyfill a `reflect()` builtin call.
/// @param call the builtin call instruction
void Reflect(ir::CoreBuiltinCall* call) {
auto* e1 = call->Args()[0];
auto* e2 = call->Args()[1];
auto* vec_ty = e1->Type()->As<core::type::Vector>();
// Only polyfills vec2<f32> (crbug.com/tint/1798)
TINT_ASSERT(vec_ty && vec_ty->Width() == 2 && vec_ty->Type()->Is<core::type::F32>());
b.InsertBefore(call, [&] {
// The generated HLSL must effectively be emitted as:
// e1 + (-2 * dot(e1,e2) * e2)
// Rather than the mathemetically equivalent:
// e1 - 2 * dot(e2,e2) * e2
//
// When FXC compiles HLSL reflect, or the second case above,
// it emits a `dp4` instruction for `2 * dot(e1,e2)`, which is
// miscompiled by certain Intel drivers. The workaround (first
// case above) results in FXC emitting a `dp2` for the dot,
// followed by a `mul 2`, which works around the bug.
auto* dot = b.Call(ty.f32(), core::BuiltinFn::kDot, e1, e2);
auto* factor = b.Multiply(ty.f32(), -2.0_f, dot);
auto* vfactor = b.Construct(vec_ty, factor);
auto* mul = b.Multiply(vec_ty, vfactor, e2);
b.AddWithResult(call->DetachResult(), e1, mul);
});
call->Destroy();
}
/// Polyfill a `saturate()` builtin call.
/// @param call the builtin call instruction
void Saturate(ir::CoreBuiltinCall* call) {
// Replace `saturate(x)` with `clamp(x, 0., 1.)`.
auto* type = call->Result()->Type();
ir::Constant* zero = nullptr;
ir::Constant* one = nullptr;
if (type->DeepestElement()->Is<core::type::F32>()) {
zero = b.MatchWidth(0_f, type);
one = b.MatchWidth(1_f, type);
} else if (type->DeepestElement()->Is<core::type::F16>()) {
zero = b.MatchWidth(0_h, type);
one = b.MatchWidth(1_h, type);
}
auto* clamp = b.CallWithResult(call->DetachResult(), core::BuiltinFn::kClamp,
Vector{call->Args()[0], zero, one});
clamp->InsertBefore(call);
call->Destroy();
}
/// Polyfill a `textureSampleBaseClampToEdge()` builtin call for 2D F32 textures.
/// @param call the builtin call instruction
void TextureSampleBaseClampToEdge_2d_f32(ir::CoreBuiltinCall* call) {
// Replace `textureSampleBaseClampToEdge(%texture, %sample, %coords)` with:
// %dims = vec2f(textureDimensions(%texture));
// %half_texel = vec2f(0.5) / dims;
// %clamped = clamp(%coord, %half_texel, 1.0 - %half_texel);
// %result = textureSampleLevel(%texture, %sampler, %clamped, 0);
auto* texture = call->Args()[0];
auto* sampler = call->Args()[1];
auto* coords = call->Args()[2];
b.InsertBefore(call, [&] {
auto* dims = b.Call<vec2<u32>>(core::BuiltinFn::kTextureDimensions, texture);
auto* fdims = b.Convert<vec2<f32>>(dims);
auto* half_texel = b.Divide<vec2<f32>>(b.Splat<vec2<f32>>(0.5_f), fdims);
auto* one_minus_half_texel = b.Subtract<vec2<f32>>(b.Splat<vec2<f32>>(1_f), half_texel);
auto* clamped = b.Call<vec2<f32>>(core::BuiltinFn::kClamp, coords, half_texel,
one_minus_half_texel);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kTextureSampleLevel, texture,
sampler, clamped, 0_f);
});
call->Destroy();
}
/// Polyfill clamping for the (f32) bias parameter of TextureSampleBias
/// @param call the builtin call instruction
void TextureSampleBiasClamp(ir::CoreBuiltinCall* call) {
b.InsertBefore(call, [&] {
auto* texture_type = call->Args()[0]->Type()->As<core::type::Texture>();
bool is_array_texture = type::IsTextureArray(texture_type->Dim());
const uint32_t kBiasParameterIndex = is_array_texture ? 4 : 3;
auto* bias_parameter = call->Args()[kBiasParameterIndex];
// TODO(crbug.com/371033198): Consider applying clamp here if 'bias_parameter' is a
// constant. This might not be the most prudent idea for two reasons: 1. the platform
// compilers will perform this optimization 2. it will bifurcate the testing paths.
call->SetArg(kBiasParameterIndex, b.Call(ty.f32(), core::BuiltinFn::kClamp,
bias_parameter, -16.00_f, 15.99_f)
->Result());
});
}
/// Polyfill a `dot4I8Packed()` builtin call
/// @param call the builtin call instruction
void Dot4I8Packed(ir::CoreBuiltinCall* call) {
// Replace `dot4I8Packed(%x,%y)` with:
// %unpacked_x = unpack4xI8(%x);
// %unpacked_y = unpack4xI8(%y);
// %result = dot(%unpacked_x, %unpacked_y);
auto* x = call->Args()[0];
auto* y = call->Args()[1];
auto* unpacked_x = Unpack4xI8OnValue(call, x);
auto* unpacked_y = Unpack4xI8OnValue(call, y);
b.InsertBefore(call, [&] {
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, unpacked_x, unpacked_y);
});
call->Destroy();
}
/// Polyfill a `dot4U8Packed()` builtin call
/// @param call the builtin call instruction
void Dot4U8Packed(ir::CoreBuiltinCall* call) {
// Replace `dot4U8Packed(%x,%y)` with:
// %unpacked_x = unpack4xU8(%x);
// %unpacked_y = unpack4xU8(%y);
// %result = dot(%unpacked_x, %unpacked_y);
auto* x = call->Args()[0];
auto* y = call->Args()[1];
auto* unpacked_x = Unpack4xU8OnValue(call, x);
auto* unpacked_y = Unpack4xU8OnValue(call, y);
b.InsertBefore(call, [&] {
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, unpacked_x, unpacked_y);
});
call->Destroy();
}
/// Polyfill a `pack4xI8()` builtin call
/// @param call the builtin call instruction
void Pack4xI8(ir::CoreBuiltinCall* call) {
// Replace `pack4xI8(%x)` with:
// %n = vec4u(0, 8, 16, 24);
// %x_u32 = bitcast<vec4u>(%x)
// %x_u8 = (%x_u32 & vec4u(0xff)) << n;
// %result = dot(%x_u8, vec4u(1));
auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(0)), b.Constant(u32(8)),
b.Constant(u32(16)), b.Constant(u32(24)));
auto* x_u32 = b.Bitcast(vec4u, x);
auto* x_u8 = b.ShiftLeft(
vec4u, b.And(vec4u, x_u32, b.Construct(vec4u, b.Constant(u32(0xff)))), n);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, x_u8,
b.Construct(vec4u, (b.Constant(u32(1)))));
});
call->Destroy();
}
/// Polyfill a `pack4xU8()` builtin call
/// @param call the builtin call instruction
void Pack4xU8(ir::CoreBuiltinCall* call) {
// Replace `pack4xU8(%x)` with:
// %n = vec4u(0, 8, 16, 24);
// %x_i8 = (%x & vec4u(0xff)) << %n;
// %result = dot(%x_i8, vec4u(1));
auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(0)), b.Constant(u32(8)),
b.Constant(u32(16)), b.Constant(u32(24)));
auto* x_u8 =
b.ShiftLeft(vec4u, b.And(vec4u, x, b.Construct(vec4u, b.Constant(u32(0xff)))), n);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, x_u8,
b.Construct(vec4u, (b.Constant(u32(1)))));
});
call->Destroy();
}
/// Polyfill a `pack4xI8Clamp()` builtin call
/// @param call the builtin call instruction
void Pack4xI8Clamp(ir::CoreBuiltinCall* call) {
// Replace `pack4xI8Clamp(%x)` with:
// %n = vec4u(0, 8, 16, 24);
// %min_i8_vec4 = vec4i(-128);
// %max_i8_vec4 = vec4i(127);
// %x_clamp = clamp(%x, %min_i8_vec4, %max_i8_vec4);
// %x_u32 = bitcast<vec4u>(%x_clamp);
// %x_u8 = (%x_u32 & vec4u(0xff)) << n;
// %result = dot(%x_u8, vec4u(1));
auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4i = ty.vec4<i32>();
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(0)), b.Constant(u32(8)),
b.Constant(u32(16)), b.Constant(u32(24)));
auto* min_i8_vec4 = b.Construct(vec4i, b.Constant(i32(-128)));
auto* max_i8_vec4 = b.Construct(vec4i, b.Constant(i32(127)));
auto* x_clamp = b.Call(vec4i, core::BuiltinFn::kClamp, x, min_i8_vec4, max_i8_vec4);
auto* x_u32 = b.Bitcast(vec4u, x_clamp);
auto* x_u8 = b.ShiftLeft(
vec4u, b.And(vec4u, x_u32, b.Construct(vec4u, b.Constant(u32(0xff)))), n);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, x_u8,
b.Construct(vec4u, (b.Constant(u32(1)))));
});
call->Destroy();
}
/// Polyfill a `pack4xU8Clamp()` builtin call
/// @param call the builtin call instruction
void Pack4xU8Clamp(ir::CoreBuiltinCall* call) {
// Replace `pack4xU8Clamp(%x)` with:
// %n = vec4u(0, 8, 16, 24);
// %min_u8_vec4 = vec4u(0);
// %max_u8_vec4 = vec4u(255);
// %x_clamp = clamp(%x, vec4u(0), vec4u(255));
// %x_u8 = %x_clamp << n;
// %result = dot(%x_u8, vec4u(1));
auto* x = call->Args()[0];
b.InsertBefore(call, [&] {
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(0)), b.Constant(u32(8)),
b.Constant(u32(16)), b.Constant(u32(24)));
auto* min_u8_vec4 = b.Construct(vec4u, b.Constant(u32(0)));
auto* max_u8_vec4 = b.Construct(vec4u, b.Constant(u32(255)));
auto* x_clamp = b.Call(vec4u, core::BuiltinFn::kClamp, x, min_u8_vec4, max_u8_vec4);
auto* x_u8 = b.ShiftLeft(vec4u, x_clamp, n);
b.CallWithResult(call->DetachResult(), core::BuiltinFn::kDot, x_u8,
b.Construct(vec4u, (b.Constant(u32(1)))));
});
call->Destroy();
}
/// Emit code for `unpack4xI8` on u32 value `x`, before the given call.
/// @param call the instruction that should follow the emitted code
/// @param x the u32 value to be unpacked
ir::Instruction* Unpack4xI8OnValue(ir::CoreBuiltinCall* call, ir::Value* x) {
// Replace `unpack4xI8(%x)` with:
// %n = vec4u(24, 16, 8, 0);
// %x_splat = vec4u(%x); // splat the scalar to a vector
// %x_vec4i = bitcast<vec4i>(%x_splat << n);
// %result = %x_vec4i >> vec4u(24);
ir::Instruction* result = nullptr;
b.InsertBefore(call, [&] {
auto* vec4i = ty.vec4<i32>();
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(24)), b.Constant(u32(16)),
b.Constant(u32(8)), b.Constant(u32(0)));
auto* x_splat = b.Construct(vec4u, x);
auto* x_vec4i = b.Bitcast(vec4i, b.ShiftLeft(vec4u, x_splat, n));
result = b.ShiftRight(vec4i, x_vec4i, b.Construct(vec4u, b.Constant(u32(24))));
});
return result;
}
/// Polyfill a `unpack4xI8()` builtin call
/// @param call the builtin call instruction
void Unpack4xI8(ir::CoreBuiltinCall* call) {
auto* result = Unpack4xI8OnValue(call, call->Args()[0]);
result->SetResult(call->DetachResult());
call->Destroy();
}
/// Emit code for `unpack4xU8` on u32 value `x`, before the given call.
/// @param call the instruction that should follow the emitted code
/// @param x the u32 value to be unpacked
Instruction* Unpack4xU8OnValue(ir::CoreBuiltinCall* call, ir::Value* x) {
// Replace `unpack4xU8(%x)` with:
// %n = vec4u(0, 8, 16, 24);
// %x_splat = vec4u(%x); // splat the scalar to a vector
// %x_vec4u = %x_splat >> n;
// %result = %x_vec4u & vec4u(0xff);
ir::Instruction* result = nullptr;
b.InsertBefore(call, [&] {
auto* vec4u = ty.vec4<u32>();
auto* n = b.Construct(vec4u, b.Constant(u32(0)), b.Constant(u32(8)),
b.Constant(u32(16)), b.Constant(u32(24)));
auto* x_splat = b.Construct(vec4u, x);
auto* x_vec4u = b.ShiftRight(vec4u, x_splat, n);
result = b.And(vec4u, x_vec4u, b.Construct(vec4u, b.Constant(u32(0xff))));
});
return result;
}
/// Polyfill a `unpack4xU8()` builtin call
/// @param call the builtin call instruction
void Unpack4xU8(ir::CoreBuiltinCall* call) {
auto* result = Unpack4xU8OnValue(call, call->Args()[0]);
result->SetResult(call->DetachResult());
call->Destroy();
}
};
} // namespace
Result<SuccessType> BuiltinPolyfill(Module& ir, const BuiltinPolyfillConfig& config) {
auto result = ValidateAndDumpIfNeeded(ir, "core.BuiltinPolyfill", kBuiltinPolyfillCapabilities);
if (result != Success) {
return result;
}
State{config, ir}.Process();
return Success;
}
} // namespace tint::core::ir::transform