src/tint/lang/glsl/writer/raise/builtin_polyfill.cc - dawn - Git at Google

 // Copyright 2024 The Dawn & Tint Authors
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // 1. Redistributions of source code must retain the above copyright notice, this
 //    list of conditions and the following disclaimer.
 //
 // 2. Redistributions in binary form must reproduce the above copyright notice,
 //    this list of conditions and the following disclaimer in the documentation
 //    and/or other materials provided with the distribution.
 //
 // 3. Neither the name of the copyright holder nor the names of its
 //    contributors may be used to endorse or promote products derived from
 //    this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "src/tint/lang/glsl/writer/raise/builtin_polyfill.h"

 #include <string>
 #include <tuple>
 #include <utility>

 #include "src/tint/lang/core/fluent_types.h"  // IWYU pragma: export
 #include "src/tint/lang/core/ir/builder.h"
 #include "src/tint/lang/core/ir/module.h"
 #include "src/tint/lang/core/ir/validator.h"
 #include "src/tint/lang/core/type/depth_multisampled_texture.h"
 #include "src/tint/lang/core/type/depth_texture.h"
 #include "src/tint/lang/core/type/multisampled_texture.h"
 #include "src/tint/lang/core/type/sampled_texture.h"
 #include "src/tint/lang/core/type/storage_texture.h"
 #include "src/tint/lang/glsl/builtin_fn.h"
 #include "src/tint/lang/glsl/ir/builtin_call.h"
 #include "src/tint/lang/glsl/ir/member_builtin_call.h"
 #include "src/tint/lang/glsl/ir/ternary.h"

 namespace tint::glsl::writer::raise {
 namespace {

 using namespace tint::core::fluent_types;     // NOLINT
 using namespace tint::core::number_suffixes;  // NOLINT

 /// PIMPL state for the transform.
 struct State {
     /// The IR module.
     core::ir::Module& ir;

     /// The IR builder.
     core::ir::Builder b{ir};

     /// The type manager.
     core::type::Manager& ty{ir.Types()};

     /// Dot polyfills for non `f32`.
     Hashmap<const core::type::Type*, core::ir::Function*, 4> dot_funcs_{};
     /// Quantize polyfills
     Hashmap<const core::type::Type*, core::ir::Function*, 4> quantize_to_f16_funcs_{};

     /// Process the module.
     void Process() {
         Vector<core::ir::CoreBuiltinCall*, 4> call_worklist;
         for (auto* inst : ir.Instructions()) {
             if (auto* call = inst->As<core::ir::CoreBuiltinCall>()) {
                 switch (call->Func()) {
                     case core::BuiltinFn::kAbs:
                     case core::BuiltinFn::kAll:
                     case core::BuiltinFn::kAny:
                     case core::BuiltinFn::kArrayLength:
                     case core::BuiltinFn::kAtomicCompareExchangeWeak:
                     case core::BuiltinFn::kAtomicSub:
                     case core::BuiltinFn::kAtomicLoad:
                     case core::BuiltinFn::kCountOneBits:
                     case core::BuiltinFn::kDot:
                     case core::BuiltinFn::kExtractBits:
                     case core::BuiltinFn::kFma:
                     case core::BuiltinFn::kFrexp:
                     case core::BuiltinFn::kInsertBits:
                     case core::BuiltinFn::kModf:
                     case core::BuiltinFn::kQuantizeToF16:
                     case core::BuiltinFn::kSelect:
                     case core::BuiltinFn::kStorageBarrier:
                     case core::BuiltinFn::kTextureBarrier:
                     case core::BuiltinFn::kWorkgroupBarrier:
                         call_worklist.Push(call);
                         break;
                     default:
                         break;
                 }
                 continue;
             }
         }

         // Replace the builtin calls that we found
         for (auto* call : call_worklist) {
             switch (call->Func()) {
                 case core::BuiltinFn::kAbs:
                     Abs(call);
                     break;
                 case core::BuiltinFn::kAll:
                     All(call);
                     break;
                 case core::BuiltinFn::kAny:
                     Any(call);
                     break;
                 case core::BuiltinFn::kArrayLength:
                     ArrayLength(call);
                     break;
                 case core::BuiltinFn::kAtomicCompareExchangeWeak:
                     AtomicCompareExchangeWeak(call);
                     break;
                 case core::BuiltinFn::kAtomicSub:
                     AtomicSub(call);
                     break;
                 case core::BuiltinFn::kAtomicLoad:
                     AtomicLoad(call);
                     break;
                 case core::BuiltinFn::kCountOneBits:
                     CountOneBits(call);
                     break;
                 case core::BuiltinFn::kDot:
                     Dot(call);
                     break;
                 case core::BuiltinFn::kExtractBits:
                     ExtractBits(call);
                     break;
                 case core::BuiltinFn::kFma:
                     FMA(call);
                     break;
                 case core::BuiltinFn::kFrexp:
                     Frexp(call);
                     break;
                 case core::BuiltinFn::kInsertBits:
                     InsertBits(call);
                     break;
                 case core::BuiltinFn::kModf:
                     Modf(call);
                     break;
                 case core::BuiltinFn::kQuantizeToF16:
                     QuantizeToF16(call);
                     break;
                 case core::BuiltinFn::kSelect:
                     Select(call);
                     break;
                 case core::BuiltinFn::kStorageBarrier:
                 case core::BuiltinFn::kTextureBarrier:
                 case core::BuiltinFn::kWorkgroupBarrier:
                     Barrier(call);
                     break;
                 default:
                     TINT_UNREACHABLE();
             }
         }
     }

     void Abs(core::ir::BuiltinCall* call) {
         auto args = call->Args();

         if (args[0]->Type()->DeepestElement()->IsUnsignedIntegerScalarOrVector()) {
             // GLSL does not support `abs` on unsigned arguments, replace it with the arg.
             call->Result(0)->ReplaceAllUsesWith(args[0]);
         } else {
             b.InsertBefore(call, [&] {
                 b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAbs,
                                                         args[0]);
             });
         }
         call->Destroy();
     }

     void Any(core::ir::BuiltinCall* call) {
         auto args = call->Args();

         if (args[0]->Type()->Is<core::type::Scalar>()) {
             // GLSL has no scalar `any`, replace it with the arg.
             call->Result(0)->ReplaceAllUsesWith(args[0]);
         } else {
             b.InsertBefore(call, [&] {
                 b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAny,
                                                         args[0]);
             });
         }
         call->Destroy();
     }

     void All(core::ir::BuiltinCall* call) {
         auto args = call->Args();

         if (args[0]->Type()->Is<core::type::Scalar>()) {
             // GLSL has no scalar `all`, replace it with the arg.
             call->Result(0)->ReplaceAllUsesWith(args[0]);
         } else {
             b.InsertBefore(call, [&] {
                 b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAll,
                                                         args[0]);
             });
         }
         call->Destroy();
     }

     void ArrayLength(core::ir::Call* call) {
         b.InsertBefore(call, [&] {
             auto* len = b.MemberCall<glsl::ir::MemberBuiltinCall>(ty.i32(), BuiltinFn::kLength,
                                                                   call->Args()[0]);
             b.ConvertWithResult(call->DetachResult(), len->Result(0));
         });
         call->Destroy();
     }

     core::ir::Function* CreateDotPolyfill(const core::type::Vector* type) {
         auto* ret_ty = type->DeepestElement();

         return dot_funcs_.GetOrAdd(type, [&]() -> core::ir::Function* {
             auto* f = b.Function("tint_int_dot", ret_ty);
             auto* x = b.FunctionParam("x", type);
             auto* y = b.FunctionParam("y", type);
             f->SetParams({x, y});

             b.Append(f->Block(), [&] {
                 core::ir::Value* ret = nullptr;

                 for (uint32_t i = 0; i < type->Width(); ++i) {
                     auto* lhs = b.Swizzle(ret_ty, x, {i});
                     auto* rhs = b.Swizzle(ret_ty, y, {i});
                     auto* v = b.Multiply(ret_ty, lhs, rhs);

                     if (ret != nullptr) {
                         ret = b.Add(ret_ty, ret, v)->Result(0);
                     } else {
                         ret = v->Result(0);
                     }
                 }

                 b.Return(f, ret);
             });
             return f;
         });
     }

     // GLSL does not have a builtin for `dot` with integer vector types. Generate the helper
     // function if it hasn't been created already
     void Dot(core::ir::BuiltinCall* call) {
         auto args = call->Args();

         auto* vec_ty = call->Args()[0]->Type()->As<core::type::Vector>();
         TINT_ASSERT(vec_ty);

         b.InsertBefore(call, [&] {
             if (!vec_ty->DeepestElement()->IsIntegerScalar()) {
                 b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kDot,
                                                         args[0], args[1]);
             } else {
                 auto* func = CreateDotPolyfill(vec_ty);
                 b.CallWithResult(call->DetachResult(), func, args[0], args[1]);
             }
         });

         call->Destroy();
     }

     void Frexp(core::ir::BuiltinCall* call) {
         b.InsertBefore(call, [&] {
             // GLSL's frexp returns `fract` and outputs `whole` as an output parameter.
             // Polyfill it by declaring the result struct and then setting the values:
             //   __frexp_result result = {};
             //   result.fract = frexp(arg, result.exp);
             auto* result_type = call->Result(0)->Type();
             auto* float_type = result_type->Element(0);
             auto* i32_type = result_type->Element(1);
             auto* result = b.Var(ty.ptr(function, result_type));
             auto* exp = b.Access(ty.ptr(function, i32_type), result, u32(1));
             auto args = Vector<core::ir::Value*, 2>{call->Args()[0], exp->Result(0)};
             auto* res =
                 b.Call<glsl::ir::BuiltinCall>(float_type, glsl::BuiltinFn::kFrexp, std::move(args));
             b.Store(b.Access(ty.ptr(function, float_type), result, u32(0)), res);
             b.LoadWithResult(call->DetachResult(), result);
         });
         call->Destroy();
     }

     void Modf(core::ir::BuiltinCall* call) {
         b.InsertBefore(call, [&] {
             // GLSL's modf returns `fract` and outputs `whole` as an output parameter.
             // Polyfill it by declaring the result struct and then setting the values:
             //   __modf_result result = {};
             //   result.fract = modf(arg, result.whole);
             auto* result_type = call->Result(0)->Type();
             auto* element_type = result_type->Element(0);
             auto* result = b.Var(ty.ptr(function, result_type));
             auto* whole = b.Access(ty.ptr(function, element_type), result, u32(1));
             auto args = Vector<core::ir::Value*, 2>{call->Args()[0], whole->Result(0)};
             auto* res = b.Call<glsl::ir::BuiltinCall>(element_type, glsl::BuiltinFn::kModf,
                                                       std::move(args));
             b.Store(b.Access(ty.ptr(function, element_type), result, u32(0)), res);
             b.LoadWithResult(call->DetachResult(), result);
         });
         call->Destroy();
     }

     void ExtractBits(core::ir::Call* call) {
         b.InsertBefore(call, [&] {
             auto args = call->Args();
             auto* offset = b.Convert(ty.i32(), args[1]);
             auto* bits = b.Convert(ty.i32(), args[2]);

             b.CallWithResult<glsl::ir::BuiltinCall>(
                 call->DetachResult(), glsl::BuiltinFn::kBitfieldExtract, args[0], offset, bits);
         });
         call->Destroy();
     }

     void InsertBits(core::ir::Call* call) {
         b.InsertBefore(call, [&] {
             auto args = call->Args();
             auto* offset = b.Convert(ty.i32(), args[2]);
             auto* bits = b.Convert(ty.i32(), args[3]);

             b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(),
                                                     glsl::BuiltinFn::kBitfieldInsert, args[0],
                                                     args[1], offset, bits);
         });
         call->Destroy();
     }

     // There is no `fma` method in GLSL ES 3.10 so we emulate it. `fma` does exist in desktop after
     // 4.00 but we use the emulated version to be consistent. We could use the real one on desktop
     // if we decide too in the future.
     void FMA(core::ir::Call* call) {
         auto args = call->Args();

         b.InsertBefore(call, [&] {
             auto* res_ty = call->Result(0)->Type();
             auto* mul = b.Multiply(res_ty, args[0], args[1]);
             b.AddWithResult(call->DetachResult(), mul, args[2]);
         });
         call->Destroy();
     }

     // GLSL `bitCount` always returns an `i32` so we need to convert it. Convert to a `bitCount`
     // call to make it clear this isn't `countOneBits`.
     void CountOneBits(core::ir::Call* call) {
         auto* result_ty = call->Result(0)->Type();

         b.InsertBefore(call, [&] {
             auto* c = b.Call<glsl::ir::BuiltinCall>(ty.MatchWidth(ty.i32(), result_ty),
                                                     glsl::BuiltinFn::kBitCount, call->Args()[0]);
             b.ConvertWithResult(call->DetachResult(), c);
         });
         call->Destroy();
     }

     void AtomicCompareExchangeWeak(core::ir::BuiltinCall* call) {
         auto args = call->Args();
         auto* type = args[1]->Type();

         auto* dest = args[0];
         auto* compare_value = args[1];
         auto* value = args[2];

         auto* result_type = call->Result(0)->Type();

         b.InsertBefore(call, [&] {
             auto* bitcast_cmp_value = b.Bitcast(type, compare_value);
             auto* bitcast_value = b.Bitcast(type, value);

             auto* swap = b.Call<glsl::ir::BuiltinCall>(
                 type, glsl::BuiltinFn::kAtomicCompSwap,
                 Vector<core::ir::Value*, 3>{dest, bitcast_cmp_value->Result(0),
                                             bitcast_value->Result(0)});

             auto* exchanged = b.Equal(ty.bool_(), swap, compare_value);

             auto* result = b.Construct(result_type, swap, exchanged)->Result(0);
             call->Result(0)->ReplaceAllUsesWith(result);
         });
         call->Destroy();
     }

     void AtomicSub(core::ir::BuiltinCall* call) {
         b.InsertBefore(call, [&] {
             auto args = call->Args();

             if (args[1]->Type()->Is<core::type::I32>()) {
                 b.CallWithResult(call->DetachResult(), core::BuiltinFn::kAtomicAdd, args[0],
                                  b.Negation(args[1]->Type(), args[1]));
             } else {
                 // Negating a u32 isn't possible in the IR, so pass a fake GLSL function and
                 // handle in the printer.
                 b.CallWithResult<glsl::ir::BuiltinCall>(
                     call->DetachResult(), glsl::BuiltinFn::kAtomicSub,
                     Vector<core::ir::Value*, 2>{args[0], args[1]});
             }
         });
         call->Destroy();
     }

     void AtomicLoad(core::ir::CoreBuiltinCall* call) {
         // GLSL does not have an atomicLoad, so we emulate it with atomicOr using 0 as the OR
         // value
         b.InsertBefore(call, [&] {
             auto args = call->Args();
             b.CallWithResult(
                 call->DetachResult(), core::BuiltinFn::kAtomicOr, args[0],
                 b.Zero(args[0]->Type()->UnwrapPtr()->As<core::type::Atomic>()->Type()));
         });
         call->Destroy();
     }

     void Barrier(core::ir::CoreBuiltinCall* call) {
         b.InsertBefore(call, [&] {
             switch (call->Func()) {
                 case core::BuiltinFn::kStorageBarrier:
                     b.Call<glsl::ir::BuiltinCall>(ty.void_(),
                                                   glsl::BuiltinFn::kMemoryBarrierBuffer);
                     break;
                 case core::BuiltinFn::kTextureBarrier:
                     b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kMemoryBarrierImage);
                     break;
                 default:
                     break;
             }
             b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kBarrier);
         });

         call->Destroy();
     }

     void Select(core::ir::CoreBuiltinCall* call) {
         auto args = call->Args();

         // Implement as `mix` in GLSL. The one caveat is that `mix` requires the number of
         // parameters to match, so if we have a `vec2` for the results and a single `bool` value,
         // we need to splat the `bool`.
         auto bool_ty = args[2]->Type();
         auto val_ty = args[0]->Type();

         b.InsertBefore(call, [&] {
             core::ir::Value* cond = args[2];
             if (val_ty->Is<core::type::Vector>() && !bool_ty->Is<core::type::Vector>()) {
                 cond = b.Construct(ty.MatchWidth(ty.bool_(), val_ty), cond)->Result(0);
             }

             b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kMix,
                                                     args[0], args[1], cond);
         });
         call->Destroy();
     }

     core::ir::Function* CreateQuantizeToF16Polyfill(const core::type::Type* type) {
         return quantize_to_f16_funcs_.GetOrAdd(type, [&]() -> core::ir::Function* {
             auto* f = b.Function("tint_quantize_to_f16", type);
             auto* val = b.FunctionParam("val", type);
             f->SetParams({val});

             b.Append(f->Block(), [&] {
                 core::ir::Value* ret = nullptr;

                 auto* inner_ty = type->DeepestElement();
                 auto* v2 = ty.vec2(inner_ty);

                 auto pack_unpack = [&](core::ir::Value* item) {
                     auto* r = b.Call(ty.u32(), core::BuiltinFn::kPack2X16Float, item)->Result(0);
                     return b.Call(v2, core::BuiltinFn::kUnpack2X16Float, r)->Result(0);
                 };

                 if (auto* vec = type->As<core::type::Vector>()) {
                     switch (vec->Width()) {
                         case 2: {
                             ret = pack_unpack(val);
                             break;
                         }
                         case 3: {
                             core::ir::Value* lhs = b.Swizzle(v2, val, {0, 1})->Result(0);
                             lhs = pack_unpack(lhs);

                             core::ir::Value* rhs = b.Swizzle(v2, val, {2, 2})->Result(0);
                             rhs = pack_unpack(rhs);
                             rhs = b.Swizzle(inner_ty, rhs, {0})->Result(0);

                             ret = b.Construct(type, lhs, rhs)->Result(0);
                             break;
                         }
                         default: {
                             core::ir::Value* lhs = b.Swizzle(v2, val, {0, 1})->Result(0);
                             lhs = pack_unpack(lhs);

                             core::ir::Value* rhs = b.Swizzle(v2, val, {2, 3})->Result(0);
                             rhs = pack_unpack(rhs);

                             ret = b.Construct(type, lhs, rhs)->Result(0);
                             break;
                         }
                     }
                 } else {
                     ret = b.Construct(v2, val)->Result(0);
                     ret = pack_unpack(ret);
                     ret = b.Swizzle(type, ret, {0})->Result(0);
                 }
                 b.Return(f, ret);
             });
             return f;
         });
     }

     // Emulate by casting to f16 and back again.
     void QuantizeToF16(core::ir::BuiltinCall* call) {
         auto args = call->Args();

         b.InsertBefore(call, [&] {
             auto* func = CreateQuantizeToF16Polyfill(args[0]->Type());
             b.CallWithResult(call->DetachResult(), func, args[0]);
         });
         call->Destroy();
     }
 };

 }  // namespace

 Result<SuccessType> BuiltinPolyfill(core::ir::Module& ir) {
     auto result = ValidateAndDumpIfNeeded(ir, "BuiltinPolyfill transform");
     if (result != Success) {
         return result.Failure();
     }

     State{ir}.Process();

     return Success;
 }

 }  // namespace tint::glsl::writer::raise
	// Copyright 2024 The Dawn & Tint Authors
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// 1. Redistributions of source code must retain the above copyright notice, this
	// list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the copyright holder nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#include "src/tint/lang/glsl/writer/raise/builtin_polyfill.h"

	#include <string>
	#include <tuple>
	#include <utility>

	#include "src/tint/lang/core/fluent_types.h" // IWYU pragma: export
	#include "src/tint/lang/core/ir/builder.h"
	#include "src/tint/lang/core/ir/module.h"
	#include "src/tint/lang/core/ir/validator.h"
	#include "src/tint/lang/core/type/depth_multisampled_texture.h"
	#include "src/tint/lang/core/type/depth_texture.h"
	#include "src/tint/lang/core/type/multisampled_texture.h"
	#include "src/tint/lang/core/type/sampled_texture.h"
	#include "src/tint/lang/core/type/storage_texture.h"
	#include "src/tint/lang/glsl/builtin_fn.h"
	#include "src/tint/lang/glsl/ir/builtin_call.h"
	#include "src/tint/lang/glsl/ir/member_builtin_call.h"
	#include "src/tint/lang/glsl/ir/ternary.h"

	namespace tint::glsl::writer::raise {
	namespace {

	using namespace tint::core::fluent_types; // NOLINT
	using namespace tint::core::number_suffixes; // NOLINT

	/// PIMPL state for the transform.
	struct State {
	/// The IR module.
	core::ir::Module& ir;

	/// The IR builder.
	core::ir::Builder b{ir};

	/// The type manager.
	core::type::Manager& ty{ir.Types()};

	/// Dot polyfills for non `f32`.
	Hashmap<const core::type::Type, core::ir::Function, 4> dot_funcs_{};
	/// Quantize polyfills
	Hashmap<const core::type::Type, core::ir::Function, 4> quantize_to_f16_funcs_{};

	/// Process the module.
	void Process() {
	Vector<core::ir::CoreBuiltinCall*, 4> call_worklist;
	for (auto* inst : ir.Instructions()) {
	if (auto* call = inst->As<core::ir::CoreBuiltinCall>()) {
	switch (call->Func()) {
	case core::BuiltinFn::kAbs:
	case core::BuiltinFn::kAll:
	case core::BuiltinFn::kAny:
	case core::BuiltinFn::kArrayLength:
	case core::BuiltinFn::kAtomicCompareExchangeWeak:
	case core::BuiltinFn::kAtomicSub:
	case core::BuiltinFn::kAtomicLoad:
	case core::BuiltinFn::kCountOneBits:
	case core::BuiltinFn::kDot:
	case core::BuiltinFn::kExtractBits:
	case core::BuiltinFn::kFma:
	case core::BuiltinFn::kFrexp:
	case core::BuiltinFn::kInsertBits:
	case core::BuiltinFn::kModf:
	case core::BuiltinFn::kQuantizeToF16:
	case core::BuiltinFn::kSelect:
	case core::BuiltinFn::kStorageBarrier:
	case core::BuiltinFn::kTextureBarrier:
	case core::BuiltinFn::kWorkgroupBarrier:
	call_worklist.Push(call);
	break;
	default:
	break;
	}
	continue;
	}
	}

	// Replace the builtin calls that we found
	for (auto* call : call_worklist) {
	switch (call->Func()) {
	case core::BuiltinFn::kAbs:
	Abs(call);
	break;
	case core::BuiltinFn::kAll:
	All(call);
	break;
	case core::BuiltinFn::kAny:
	Any(call);
	break;
	case core::BuiltinFn::kArrayLength:
	ArrayLength(call);
	break;
	case core::BuiltinFn::kAtomicCompareExchangeWeak:
	AtomicCompareExchangeWeak(call);
	break;
	case core::BuiltinFn::kAtomicSub:
	AtomicSub(call);
	break;
	case core::BuiltinFn::kAtomicLoad:
	AtomicLoad(call);
	break;
	case core::BuiltinFn::kCountOneBits:
	CountOneBits(call);
	break;
	case core::BuiltinFn::kDot:
	Dot(call);
	break;
	case core::BuiltinFn::kExtractBits:
	ExtractBits(call);
	break;
	case core::BuiltinFn::kFma:
	FMA(call);
	break;
	case core::BuiltinFn::kFrexp:
	Frexp(call);
	break;
	case core::BuiltinFn::kInsertBits:
	InsertBits(call);
	break;
	case core::BuiltinFn::kModf:
	Modf(call);
	break;
	case core::BuiltinFn::kQuantizeToF16:
	QuantizeToF16(call);
	break;
	case core::BuiltinFn::kSelect:
	Select(call);
	break;
	case core::BuiltinFn::kStorageBarrier:
	case core::BuiltinFn::kTextureBarrier:
	case core::BuiltinFn::kWorkgroupBarrier:
	Barrier(call);
	break;
	default:
	TINT_UNREACHABLE();
	}
	}
	}

	void Abs(core::ir::BuiltinCall* call) {
	auto args = call->Args();

	if (args[0]->Type()->DeepestElement()->IsUnsignedIntegerScalarOrVector()) {
	// GLSL does not support `abs` on unsigned arguments, replace it with the arg.
	call->Result(0)->ReplaceAllUsesWith(args[0]);
	} else {
	b.InsertBefore(call, [&] {
	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAbs,
	args[0]);
	});
	}
	call->Destroy();
	}

	void Any(core::ir::BuiltinCall* call) {
	auto args = call->Args();

	if (args[0]->Type()->Is<core::type::Scalar>()) {
	// GLSL has no scalar `any`, replace it with the arg.
	call->Result(0)->ReplaceAllUsesWith(args[0]);
	} else {
	b.InsertBefore(call, [&] {
	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAny,
	args[0]);
	});
	}
	call->Destroy();
	}

	void All(core::ir::BuiltinCall* call) {
	auto args = call->Args();

	if (args[0]->Type()->Is<core::type::Scalar>()) {
	// GLSL has no scalar `all`, replace it with the arg.
	call->Result(0)->ReplaceAllUsesWith(args[0]);
	} else {
	b.InsertBefore(call, [&] {
	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kAll,
	args[0]);
	});
	}
	call->Destroy();
	}

	void ArrayLength(core::ir::Call* call) {
	b.InsertBefore(call, [&] {
	auto* len = b.MemberCall<glsl::ir::MemberBuiltinCall>(ty.i32(), BuiltinFn::kLength,
	call->Args()[0]);
	b.ConvertWithResult(call->DetachResult(), len->Result(0));
	});
	call->Destroy();
	}

	core::ir::Function* CreateDotPolyfill(const core::type::Vector* type) {
	auto* ret_ty = type->DeepestElement();

	return dot_funcs_.GetOrAdd(type, [&]() -> core::ir::Function* {
	auto* f = b.Function("tint_int_dot", ret_ty);
	auto* x = b.FunctionParam("x", type);
	auto* y = b.FunctionParam("y", type);
	f->SetParams({x, y});

	b.Append(f->Block(), [&] {
	core::ir::Value* ret = nullptr;

	for (uint32_t i = 0; i < type->Width(); ++i) {
	auto* lhs = b.Swizzle(ret_ty, x, {i});
	auto* rhs = b.Swizzle(ret_ty, y, {i});
	auto* v = b.Multiply(ret_ty, lhs, rhs);

	if (ret != nullptr) {
	ret = b.Add(ret_ty, ret, v)->Result(0);
	} else {
	ret = v->Result(0);
	}
	}

	b.Return(f, ret);
	});
	return f;
	});
	}

	// GLSL does not have a builtin for `dot` with integer vector types. Generate the helper
	// function if it hasn't been created already
	void Dot(core::ir::BuiltinCall* call) {
	auto args = call->Args();

	auto* vec_ty = call->Args()[0]->Type()->As<core::type::Vector>();
	TINT_ASSERT(vec_ty);

	b.InsertBefore(call, [&] {
	if (!vec_ty->DeepestElement()->IsIntegerScalar()) {
	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kDot,
	args[0], args[1]);
	} else {
	auto* func = CreateDotPolyfill(vec_ty);
	b.CallWithResult(call->DetachResult(), func, args[0], args[1]);
	}
	});

	call->Destroy();
	}

	void Frexp(core::ir::BuiltinCall* call) {
	b.InsertBefore(call, [&] {
	// GLSL's frexp returns `fract` and outputs `whole` as an output parameter.
	// Polyfill it by declaring the result struct and then setting the values:
	// __frexp_result result = {};
	// result.fract = frexp(arg, result.exp);
	auto* result_type = call->Result(0)->Type();
	auto* float_type = result_type->Element(0);
	auto* i32_type = result_type->Element(1);
	auto* result = b.Var(ty.ptr(function, result_type));
	auto* exp = b.Access(ty.ptr(function, i32_type), result, u32(1));
	auto args = Vector<core::ir::Value*, 2>{call->Args()[0], exp->Result(0)};
	auto* res =
	b.Call<glsl::ir::BuiltinCall>(float_type, glsl::BuiltinFn::kFrexp, std::move(args));
	b.Store(b.Access(ty.ptr(function, float_type), result, u32(0)), res);
	b.LoadWithResult(call->DetachResult(), result);
	});
	call->Destroy();
	}

	void Modf(core::ir::BuiltinCall* call) {
	b.InsertBefore(call, [&] {
	// GLSL's modf returns `fract` and outputs `whole` as an output parameter.
	// Polyfill it by declaring the result struct and then setting the values:
	// __modf_result result = {};
	// result.fract = modf(arg, result.whole);
	auto* result_type = call->Result(0)->Type();
	auto* element_type = result_type->Element(0);
	auto* result = b.Var(ty.ptr(function, result_type));
	auto* whole = b.Access(ty.ptr(function, element_type), result, u32(1));
	auto args = Vector<core::ir::Value*, 2>{call->Args()[0], whole->Result(0)};
	auto* res = b.Call<glsl::ir::BuiltinCall>(element_type, glsl::BuiltinFn::kModf,
	std::move(args));
	b.Store(b.Access(ty.ptr(function, element_type), result, u32(0)), res);
	b.LoadWithResult(call->DetachResult(), result);
	});
	call->Destroy();
	}

	void ExtractBits(core::ir::Call* call) {
	b.InsertBefore(call, [&] {
	auto args = call->Args();
	auto* offset = b.Convert(ty.i32(), args[1]);
	auto* bits = b.Convert(ty.i32(), args[2]);

	b.CallWithResult<glsl::ir::BuiltinCall>(
	call->DetachResult(), glsl::BuiltinFn::kBitfieldExtract, args[0], offset, bits);
	});
	call->Destroy();
	}

	void InsertBits(core::ir::Call* call) {
	b.InsertBefore(call, [&] {
	auto args = call->Args();
	auto* offset = b.Convert(ty.i32(), args[2]);
	auto* bits = b.Convert(ty.i32(), args[3]);

	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(),
	glsl::BuiltinFn::kBitfieldInsert, args[0],
	args[1], offset, bits);
	});
	call->Destroy();
	}

	// There is no `fma` method in GLSL ES 3.10 so we emulate it. `fma` does exist in desktop after
	// 4.00 but we use the emulated version to be consistent. We could use the real one on desktop
	// if we decide too in the future.
	void FMA(core::ir::Call* call) {
	auto args = call->Args();

	b.InsertBefore(call, [&] {
	auto* res_ty = call->Result(0)->Type();
	auto* mul = b.Multiply(res_ty, args[0], args[1]);
	b.AddWithResult(call->DetachResult(), mul, args[2]);
	});
	call->Destroy();
	}

	// GLSL `bitCount` always returns an `i32` so we need to convert it. Convert to a `bitCount`
	// call to make it clear this isn't `countOneBits`.
	void CountOneBits(core::ir::Call* call) {
	auto* result_ty = call->Result(0)->Type();

	b.InsertBefore(call, [&] {
	auto* c = b.Call<glsl::ir::BuiltinCall>(ty.MatchWidth(ty.i32(), result_ty),
	glsl::BuiltinFn::kBitCount, call->Args()[0]);
	b.ConvertWithResult(call->DetachResult(), c);
	});
	call->Destroy();
	}

	void AtomicCompareExchangeWeak(core::ir::BuiltinCall* call) {
	auto args = call->Args();
	auto* type = args[1]->Type();

	auto* dest = args[0];
	auto* compare_value = args[1];
	auto* value = args[2];

	auto* result_type = call->Result(0)->Type();

	b.InsertBefore(call, [&] {
	auto* bitcast_cmp_value = b.Bitcast(type, compare_value);
	auto* bitcast_value = b.Bitcast(type, value);

	auto* swap = b.Call<glsl::ir::BuiltinCall>(
	type, glsl::BuiltinFn::kAtomicCompSwap,
	Vector<core::ir::Value*, 3>{dest, bitcast_cmp_value->Result(0),
	bitcast_value->Result(0)});

	auto* exchanged = b.Equal(ty.bool_(), swap, compare_value);

	auto* result = b.Construct(result_type, swap, exchanged)->Result(0);
	call->Result(0)->ReplaceAllUsesWith(result);
	});
	call->Destroy();
	}

	void AtomicSub(core::ir::BuiltinCall* call) {
	b.InsertBefore(call, [&] {
	auto args = call->Args();

	if (args[1]->Type()->Is<core::type::I32>()) {
	b.CallWithResult(call->DetachResult(), core::BuiltinFn::kAtomicAdd, args[0],
	b.Negation(args[1]->Type(), args[1]));
	} else {
	// Negating a u32 isn't possible in the IR, so pass a fake GLSL function and
	// handle in the printer.
	b.CallWithResult<glsl::ir::BuiltinCall>(
	call->DetachResult(), glsl::BuiltinFn::kAtomicSub,
	Vector<core::ir::Value*, 2>{args[0], args[1]});
	}
	});
	call->Destroy();
	}

	void AtomicLoad(core::ir::CoreBuiltinCall* call) {
	// GLSL does not have an atomicLoad, so we emulate it with atomicOr using 0 as the OR
	// value
	b.InsertBefore(call, [&] {
	auto args = call->Args();
	b.CallWithResult(
	call->DetachResult(), core::BuiltinFn::kAtomicOr, args[0],
	b.Zero(args[0]->Type()->UnwrapPtr()->As<core::type::Atomic>()->Type()));
	});
	call->Destroy();
	}

	void Barrier(core::ir::CoreBuiltinCall* call) {
	b.InsertBefore(call, [&] {
	switch (call->Func()) {
	case core::BuiltinFn::kStorageBarrier:
	b.Call<glsl::ir::BuiltinCall>(ty.void_(),
	glsl::BuiltinFn::kMemoryBarrierBuffer);
	break;
	case core::BuiltinFn::kTextureBarrier:
	b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kMemoryBarrierImage);
	break;
	default:
	break;
	}
	b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kBarrier);
	});

	call->Destroy();
	}

	void Select(core::ir::CoreBuiltinCall* call) {
	auto args = call->Args();

	// Implement as `mix` in GLSL. The one caveat is that `mix` requires the number of
	// parameters to match, so if we have a `vec2` for the results and a single `bool` value,
	// we need to splat the `bool`.
	auto bool_ty = args[2]->Type();
	auto val_ty = args[0]->Type();

	b.InsertBefore(call, [&] {
	core::ir::Value* cond = args[2];
	if (val_ty->Is<core::type::Vector>() && !bool_ty->Is<core::type::Vector>()) {
	cond = b.Construct(ty.MatchWidth(ty.bool_(), val_ty), cond)->Result(0);
	}

	b.CallWithResult<glsl::ir::BuiltinCall>(call->DetachResult(), glsl::BuiltinFn::kMix,
	args[0], args[1], cond);
	});
	call->Destroy();
	}

	core::ir::Function* CreateQuantizeToF16Polyfill(const core::type::Type* type) {
	return quantize_to_f16_funcs_.GetOrAdd(type, [&]() -> core::ir::Function* {
	auto* f = b.Function("tint_quantize_to_f16", type);
	auto* val = b.FunctionParam("val", type);
	f->SetParams({val});

	b.Append(f->Block(), [&] {
	core::ir::Value* ret = nullptr;

	auto* inner_ty = type->DeepestElement();
	auto* v2 = ty.vec2(inner_ty);

	auto pack_unpack = [&](core::ir::Value* item) {
	auto* r = b.Call(ty.u32(), core::BuiltinFn::kPack2X16Float, item)->Result(0);
	return b.Call(v2, core::BuiltinFn::kUnpack2X16Float, r)->Result(0);
	};

	if (auto* vec = type->As<core::type::Vector>()) {
	switch (vec->Width()) {
	case 2: {
	ret = pack_unpack(val);
	break;
	}
	case 3: {
	core::ir::Value* lhs = b.Swizzle(v2, val, {0, 1})->Result(0);
	lhs = pack_unpack(lhs);

	core::ir::Value* rhs = b.Swizzle(v2, val, {2, 2})->Result(0);
	rhs = pack_unpack(rhs);
	rhs = b.Swizzle(inner_ty, rhs, {0})->Result(0);

	ret = b.Construct(type, lhs, rhs)->Result(0);
	break;
	}
	default: {
	core::ir::Value* lhs = b.Swizzle(v2, val, {0, 1})->Result(0);
	lhs = pack_unpack(lhs);

	core::ir::Value* rhs = b.Swizzle(v2, val, {2, 3})->Result(0);
	rhs = pack_unpack(rhs);

	ret = b.Construct(type, lhs, rhs)->Result(0);
	break;
	}
	}
	} else {
	ret = b.Construct(v2, val)->Result(0);
	ret = pack_unpack(ret);
	ret = b.Swizzle(type, ret, {0})->Result(0);
	}
	b.Return(f, ret);
	});
	return f;
	});
	}

	// Emulate by casting to f16 and back again.
	void QuantizeToF16(core::ir::BuiltinCall* call) {
	auto args = call->Args();

	b.InsertBefore(call, [&] {
	auto* func = CreateQuantizeToF16Polyfill(args[0]->Type());
	b.CallWithResult(call->DetachResult(), func, args[0]);
	});
	call->Destroy();
	}
	};

	} // namespace

	Result<SuccessType> BuiltinPolyfill(core::ir::Module& ir) {
	auto result = ValidateAndDumpIfNeeded(ir, "BuiltinPolyfill transform");
	if (result != Success) {
	return result.Failure();
	}

	State{ir}.Process();

	return Success;
	}

	} // namespace tint::glsl::writer::raise