| // Copyright 2024 The Dawn & Tint Authors |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // 1. Redistributions of source code must retain the above copyright notice, this |
| // list of conditions and the following disclaimer. |
| // |
| // 2. Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // 3. Neither the name of the copyright holder nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "src/tint/lang/glsl/writer/raise/builtin_polyfill.h" |
| |
| #include <string> |
| #include <tuple> |
| |
| #include "src/tint/lang/core/fluent_types.h" // IWYU pragma: export |
| #include "src/tint/lang/core/ir/builder.h" |
| #include "src/tint/lang/core/ir/module.h" |
| #include "src/tint/lang/core/ir/validator.h" |
| #include "src/tint/lang/core/type/depth_multisampled_texture.h" |
| #include "src/tint/lang/core/type/multisampled_texture.h" |
| #include "src/tint/lang/core/type/storage_texture.h" |
| #include "src/tint/lang/glsl/builtin_fn.h" |
| #include "src/tint/lang/glsl/ir/builtin_call.h" |
| #include "src/tint/lang/glsl/ir/ternary.h" |
| |
| namespace tint::glsl::writer::raise { |
| namespace { |
| |
| using namespace tint::core::fluent_types; // NOLINT |
| using namespace tint::core::number_suffixes; // NOLINT |
| |
| /// PIMPL state for the transform. |
| struct State { |
| /// The IR module. |
| core::ir::Module& ir; |
| |
| /// The IR builder. |
| core::ir::Builder b{ir}; |
| |
| /// The type manager. |
| core::type::Manager& ty{ir.Types()}; |
| |
| // Polyfill functions for bitcast expression, BitcastType indicates the source type and the |
| // destination type. |
| using BitcastType = |
| tint::UnorderedKeyWrapper<std::tuple<const core::type::Type*, const core::type::Type*>>; |
| Hashmap<BitcastType, core::ir::Function*, 4> bitcast_funcs_{}; |
| |
| // The bitcast worklist is a member because some polyfills add bitcast calls. When they do, they |
| // can add the bitcast to the worklist to be fixed up as needed. |
| Vector<core::ir::Bitcast*, 4> bitcast_worklist{}; |
| |
| /// Process the module. |
| void Process() { |
| Vector<core::ir::CoreBuiltinCall*, 4> call_worklist; |
| for (auto* inst : ir.Instructions()) { |
| if (auto* bitcast = inst->As<core::ir::Bitcast>()) { |
| bitcast_worklist.Push(bitcast); |
| continue; |
| } |
| |
| if (auto* call = inst->As<core::ir::CoreBuiltinCall>()) { |
| switch (call->Func()) { |
| case core::BuiltinFn::kAtomicCompareExchangeWeak: |
| case core::BuiltinFn::kAtomicSub: |
| case core::BuiltinFn::kAtomicLoad: |
| case core::BuiltinFn::kCountOneBits: |
| case core::BuiltinFn::kSelect: |
| case core::BuiltinFn::kStorageBarrier: |
| case core::BuiltinFn::kTextureBarrier: |
| case core::BuiltinFn::kTextureDimensions: |
| case core::BuiltinFn::kWorkgroupBarrier: |
| call_worklist.Push(call); |
| break; |
| default: |
| break; |
| } |
| continue; |
| } |
| } |
| |
| // Replace the builtin calls that we found |
| for (auto* call : call_worklist) { |
| switch (call->Func()) { |
| case core::BuiltinFn::kAtomicCompareExchangeWeak: |
| AtomicCompareExchangeWeak(call); |
| break; |
| case core::BuiltinFn::kAtomicSub: |
| AtomicSub(call); |
| break; |
| case core::BuiltinFn::kAtomicLoad: |
| AtomicLoad(call); |
| break; |
| case core::BuiltinFn::kCountOneBits: |
| CountOneBits(call); |
| break; |
| case core::BuiltinFn::kSelect: |
| Select(call); |
| break; |
| case core::BuiltinFn::kStorageBarrier: |
| case core::BuiltinFn::kTextureBarrier: |
| case core::BuiltinFn::kWorkgroupBarrier: |
| Barrier(call); |
| break; |
| case core::BuiltinFn::kTextureDimensions: |
| TextureDimensions(call); |
| break; |
| default: |
| TINT_UNREACHABLE(); |
| } |
| } |
| |
| // Replace the bitcasts that we found. These are done after the other builtins as some of |
| // them also create bitcasts which will need to be updated. |
| for (auto* bitcast : bitcast_worklist) { |
| auto* src_type = bitcast->Val()->Type(); |
| auto* dst_type = bitcast->Result(0)->Type(); |
| auto* dst_deepest = dst_type->DeepestElement(); |
| |
| if (src_type == dst_type) { |
| ReplaceBitcastWithValue(bitcast); |
| } else if (src_type->DeepestElement()->Is<core::type::F16>()) { |
| ReplaceBitcastWithFromF16Polyfill(bitcast); |
| } else if (dst_deepest->Is<core::type::F16>()) { |
| ReplaceBitcastWithToF16Polyfill(bitcast); |
| } else if (src_type->DeepestElement()->Is<core::type::F32>()) { |
| ReplaceBitcastFromF32(bitcast); |
| } else if (dst_type->DeepestElement()->Is<core::type::F32>()) { |
| ReplaceBitcastToF32(bitcast); |
| } else { |
| ReplaceBitcast(bitcast); |
| } |
| } |
| } |
| |
| // GLSL `bitCount` always returns an `i32` so we need to convert it. Convert to a `bitCount` |
| // call to make it clear this isn't `countOneBits`. |
| void CountOneBits(core::ir::Call* call) { |
| auto* result_ty = call->Result(0)->Type(); |
| |
| b.InsertBefore(call, [&] { |
| auto* c = b.Call<glsl::ir::BuiltinCall>(ty.MatchWidth(ty.i32(), result_ty), |
| glsl::BuiltinFn::kBitCount, call->Args()[0]); |
| b.ConvertWithResult(call->DetachResult(), c); |
| }); |
| call->Destroy(); |
| } |
| |
| void ReplaceBitcastWithValue(core::ir::Bitcast* bitcast) { |
| bitcast->Result(0)->ReplaceAllUsesWith(bitcast->Val()); |
| bitcast->Destroy(); |
| } |
| |
| core::ir::Value* CreateBitcastFromF32(const core::type::Type* type, |
| const core::type::Type* result_type, |
| core::ir::Value* val) { |
| BuiltinFn fn = BuiltinFn::kNone; |
| tint::Switch( |
| type, // |
| [&](const core::type::I32*) { fn = BuiltinFn::kFloatBitsToInt; }, // |
| [&](const core::type::U32*) { fn = BuiltinFn::kFloatBitsToUint; }, // |
| TINT_ICE_ON_NO_MATCH); |
| |
| return b.Call<glsl::ir::BuiltinCall>(result_type, fn, val)->Result(0); |
| } |
| |
| void ReplaceBitcastFromF32(core::ir::Bitcast* bitcast) { |
| auto* dst_type = bitcast->Result(0)->Type(); |
| auto* dst_deepest = dst_type->DeepestElement(); |
| |
| b.InsertBefore(bitcast, [&] { |
| auto* bc = |
| CreateBitcastFromF32(dst_deepest, bitcast->Result(0)->Type(), bitcast->Val()); |
| bitcast->Result(0)->ReplaceAllUsesWith(bc); |
| }); |
| bitcast->Destroy(); |
| } |
| |
| core::ir::Value* CreateBitcastToF32(const core::type::Type* src_type, |
| const core::type::Type* dst_type, |
| core::ir::Value* val) { |
| BuiltinFn fn = BuiltinFn::kNone; |
| tint::Switch( |
| src_type, // |
| [&](const core::type::I32*) { fn = BuiltinFn::kIntBitsToFloat; }, // |
| [&](const core::type::U32*) { fn = BuiltinFn::kUintBitsToFloat; }, // |
| TINT_ICE_ON_NO_MATCH); |
| |
| return b.Call<glsl::ir::BuiltinCall>(dst_type, fn, val)->Result(0); |
| } |
| |
| void ReplaceBitcastToF32(core::ir::Bitcast* bitcast) { |
| auto* src_type = bitcast->Val()->Type(); |
| auto* src_deepest = src_type->DeepestElement(); |
| |
| b.InsertBefore(bitcast, [&] { |
| auto* bc = CreateBitcastToF32(src_deepest, bitcast->Result(0)->Type(), bitcast->Val()); |
| bitcast->Result(0)->ReplaceAllUsesWith(bc); |
| }); |
| bitcast->Destroy(); |
| } |
| |
| void ReplaceBitcast(core::ir::Bitcast* bitcast) { |
| b.InsertBefore(bitcast, |
| [&] { b.ConvertWithResult(bitcast->DetachResult(), bitcast->Val()); }); |
| bitcast->Destroy(); |
| } |
| |
| core::ir::Function* CreateBitcastFromF16(const core::type::Type* src_type, |
| const core::type::Type* dst_type) { |
| return bitcast_funcs_.GetOrAdd( |
| BitcastType{{src_type, dst_type}}, [&]() -> core::ir::Function* { |
| TINT_ASSERT(src_type->Is<core::type::Vector>()); |
| |
| // Generate a helper function that performs the following (in GLSL): |
| // |
| // ivec2 tint_bitcast_from_f16(f16vec4 src) { |
| // uvec2 r = uvec2(packFloat2x16(src.xy), packFloat2x16(src.zw)); |
| // return ivec2(r); |
| // } |
| |
| auto fn_name = b.ir.symbols.New("tint_bitcast_from_f16").Name(); |
| |
| auto* f = b.Function(fn_name, dst_type); |
| auto* src = b.FunctionParam("src", src_type); |
| f->SetParams({src}); |
| |
| b.Append(f->Block(), [&] { |
| auto* src_vec = src_type->As<core::type::Vector>(); |
| |
| core::ir::Value* packed = nullptr; |
| if (src_vec->Width() == 2) { |
| packed = b.Call<glsl::ir::BuiltinCall>(ty.u32(), |
| glsl::BuiltinFn::kPackFloat2X16, src) |
| ->Result(0); |
| } else if (src_vec->Width() == 4) { |
| auto* left = |
| b.Call<glsl::ir::BuiltinCall>(ty.u32(), glsl::BuiltinFn::kPackFloat2X16, |
| b.Swizzle(ty.vec2<f16>(), src, {0, 1})); |
| auto* right = |
| b.Call<glsl::ir::BuiltinCall>(ty.u32(), glsl::BuiltinFn::kPackFloat2X16, |
| b.Swizzle(ty.vec2<f16>(), src, {2, 3})); |
| packed = b.Construct(ty.vec2<u32>(), left, right)->Result(0); |
| } else { |
| TINT_UNREACHABLE(); |
| } |
| |
| if (dst_type->DeepestElement()->Is<core::type::F32>()) { |
| packed = |
| CreateBitcastToF32(packed->Type()->DeepestElement(), dst_type, packed); |
| } else { |
| packed = b.Convert(dst_type, packed)->Result(0); |
| } |
| |
| b.Return(f, packed); |
| }); |
| return f; |
| }); |
| } |
| |
| void ReplaceBitcastWithFromF16Polyfill(core::ir::Bitcast* bitcast) { |
| auto* src_type = bitcast->Val()->Type(); |
| auto* dst_type = bitcast->Result(0)->Type(); |
| |
| auto* f = CreateBitcastFromF16(src_type, dst_type); |
| b.InsertBefore(bitcast, |
| [&] { b.CallWithResult(bitcast->DetachResult(), f, bitcast->Args()[0]); }); |
| bitcast->Destroy(); |
| } |
| |
| core::ir::Function* CreateBitcastToF16(const core::type::Type* src_type, |
| const core::type::Type* dst_type) { |
| return bitcast_funcs_.GetOrAdd( |
| BitcastType{{src_type, dst_type}}, [&]() -> core::ir::Function* { |
| TINT_ASSERT(dst_type->Is<core::type::Vector>()); |
| |
| // Generate a helper function that performs the following (in GLSL): |
| // |
| // f16vec4 tint_bitcast_to_f16(ivec2 src) { |
| // uvec2 r = uvec2(src); |
| // f16vec2 v_xy = unpackFloat2x16(r.x); |
| // f16vec2 v_zw = unpackFloat2x16(r.y); |
| // return f16vec4(v_xy.x, v_xy.y, v_zw.x, v_zw.y); |
| // } |
| |
| auto fn_name = b.ir.symbols.New("tint_bitcast_to_f16").Name(); |
| |
| auto* f = b.Function(fn_name, dst_type); |
| auto* src = b.FunctionParam("src", src_type); |
| f->SetParams({src}); |
| b.Append(f->Block(), [&] { |
| core::ir::Value* conv = nullptr; |
| |
| if (src->Type()->DeepestElement()->Is<core::type::F32>()) { |
| conv = |
| CreateBitcastFromF32(ty.u32(), ty.MatchWidth(ty.u32(), src_type), src); |
| } else { |
| conv = b.Convert(ty.MatchWidth(ty.u32(), src->Type()), src)->Result(0); |
| } |
| |
| core::ir::Value* val = nullptr; |
| if (src->Type()->Is<core::type::Vector>()) { |
| auto* left = b.Call<glsl::ir::BuiltinCall>( |
| ty.vec2<f16>(), glsl::BuiltinFn::kUnpackFloat2X16, |
| b.Swizzle(ty.u32(), conv, {0})); |
| auto* right = b.Call<glsl::ir::BuiltinCall>( |
| ty.vec2<f16>(), glsl::BuiltinFn::kUnpackFloat2X16, |
| b.Swizzle(ty.u32(), conv, {1})); |
| |
| val = b.Construct(dst_type, left, right)->Result(0); |
| } else { |
| val = b.Call<glsl::ir::BuiltinCall>(ty.vec2<f16>(), |
| glsl::BuiltinFn::kUnpackFloat2X16, conv) |
| ->Result(0); |
| } |
| b.Return(f, val); |
| }); |
| return f; |
| }); |
| } |
| |
| void ReplaceBitcastWithToF16Polyfill(core::ir::Bitcast* bitcast) { |
| auto* src_type = bitcast->Val()->Type(); |
| auto* dst_type = bitcast->Result(0)->Type(); |
| |
| auto* f = CreateBitcastToF16(src_type, dst_type); |
| b.InsertBefore(bitcast, |
| [&] { b.CallWithResult(bitcast->DetachResult(), f, bitcast->Args()[0]); }); |
| bitcast->Destroy(); |
| } |
| |
| // `textureDimensions` returns an unsigned scalar / vector in WGSL. `textureSize` and |
| // `imageSize` return a signed scalar / vector in GLSL. So, we need to cast the result to |
| // the needed WGSL type. |
| void TextureDimensions(core::ir::BuiltinCall* call) { |
| auto args = call->Args(); |
| auto* tex = args[0]->Type()->As<core::type::Texture>(); |
| |
| b.InsertBefore(call, [&] { |
| auto func = glsl::BuiltinFn::kTextureSize; |
| if (tex->Is<core::type::StorageTexture>()) { |
| func = glsl::BuiltinFn::kImageSize; |
| } |
| |
| Vector<core::ir::Value*, 2> new_args; |
| new_args.Push(args[0]); |
| |
| if (!(tex->Is<core::type::StorageTexture>() || |
| tex->Is<core::type::MultisampledTexture>() || |
| tex->Is<core::type::DepthMultisampledTexture>())) { |
| // Add a LOD to any texture other then storage, and multi-sampled textures which |
| // does not already have an LOD. |
| if (args.Length() == 1) { |
| new_args.Push(b.Constant(0_i)); |
| } else { |
| // Make sure the LOD is a i32 |
| auto* bc = b.Bitcast(ty.i32(), args[1]); |
| bitcast_worklist.Push(bc); |
| new_args.Push(bc->Result(0)); |
| } |
| } |
| |
| auto ret_type = call->Result(0)->Type(); |
| |
| // In GLSL the array dimensions return a 3rd parameter. |
| if (tex->Dim() == core::type::TextureDimension::k2dArray || |
| tex->Dim() == core::type::TextureDimension::kCubeArray) { |
| ret_type = ty.vec(ty.i32(), 3); |
| } else { |
| ret_type = ty.MatchWidth(ty.i32(), call->Result(0)->Type()); |
| } |
| |
| core::ir::Value* result = |
| b.Call<glsl::ir::BuiltinCall>(ret_type, func, new_args)->Result(0); |
| |
| // `textureSize` on array samplers returns the array size in the final component, WGSL |
| // requires a 2 component response, so drop the array size |
| if (tex->Dim() == core::type::TextureDimension::k2dArray || |
| tex->Dim() == core::type::TextureDimension::kCubeArray) { |
| ret_type = ty.MatchWidth(ty.i32(), call->Result(0)->Type()); |
| result = b.Swizzle(ret_type, result, {0, 1})->Result(0); |
| } |
| |
| auto* ret_bc = b.Bitcast(call->Result(0)->Type(), result); |
| bitcast_worklist.Push(ret_bc); |
| call->Result(0)->ReplaceAllUsesWith(ret_bc->Result(0)); |
| }); |
| call->Destroy(); |
| } |
| |
| void AtomicCompareExchangeWeak(core::ir::BuiltinCall* call) { |
| auto args = call->Args(); |
| auto* type = args[1]->Type(); |
| |
| auto* dest = args[0]; |
| auto* compare_value = args[1]; |
| auto* value = args[2]; |
| |
| auto* result_type = call->Result(0)->Type(); |
| |
| b.InsertBefore(call, [&] { |
| auto* bitcast_cmp_value = b.Bitcast(type, compare_value); |
| auto* bitcast_value = b.Bitcast(type, value); |
| |
| bitcast_worklist.Push(bitcast_cmp_value); |
| bitcast_worklist.Push(bitcast_value); |
| |
| auto* swap = b.Call<glsl::ir::BuiltinCall>( |
| type, glsl::BuiltinFn::kAtomicCompSwap, |
| Vector<core::ir::Value*, 3>{dest, bitcast_cmp_value->Result(0), |
| bitcast_value->Result(0)}); |
| |
| auto* exchanged = b.Equal(ty.bool_(), swap, compare_value); |
| |
| auto* result = b.Construct(result_type, swap, exchanged)->Result(0); |
| call->Result(0)->ReplaceAllUsesWith(result); |
| }); |
| call->Destroy(); |
| } |
| |
| void AtomicSub(core::ir::BuiltinCall* call) { |
| b.InsertBefore(call, [&] { |
| auto args = call->Args(); |
| |
| if (args[1]->Type()->Is<core::type::I32>()) { |
| b.CallWithResult(call->DetachResult(), core::BuiltinFn::kAtomicAdd, args[0], |
| b.Negation(args[1]->Type(), args[1])); |
| } else { |
| // Negating a u32 isn't possible in the IR, so pass a fake GLSL function and |
| // handle in the printer. |
| b.CallWithResult<glsl::ir::BuiltinCall>( |
| call->DetachResult(), glsl::BuiltinFn::kAtomicSub, |
| Vector<core::ir::Value*, 2>{args[0], args[1]}); |
| } |
| }); |
| call->Destroy(); |
| } |
| |
| void AtomicLoad(core::ir::CoreBuiltinCall* call) { |
| // GLSL does not have an atomicLoad, so we emulate it with atomicOr using 0 as the OR |
| // value |
| b.InsertBefore(call, [&] { |
| auto args = call->Args(); |
| b.CallWithResult( |
| call->DetachResult(), core::BuiltinFn::kAtomicOr, args[0], |
| b.Zero(args[0]->Type()->UnwrapPtr()->As<core::type::Atomic>()->Type())); |
| }); |
| call->Destroy(); |
| } |
| |
| void Barrier(core::ir::CoreBuiltinCall* call) { |
| b.InsertBefore(call, [&] { |
| b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kBarrier); |
| |
| switch (call->Func()) { |
| case core::BuiltinFn::kStorageBarrier: |
| b.Call<glsl::ir::BuiltinCall>(ty.void_(), |
| glsl::BuiltinFn::kMemoryBarrierBuffer); |
| break; |
| case core::BuiltinFn::kTextureBarrier: |
| b.Call<glsl::ir::BuiltinCall>(ty.void_(), glsl::BuiltinFn::kMemoryBarrierImage); |
| break; |
| default: |
| break; |
| } |
| }); |
| |
| call->Destroy(); |
| } |
| |
| void Select(core::ir::CoreBuiltinCall* call) { |
| Vector<core::ir::Value*, 4> args = call->Args(); |
| |
| // GLSL does not support ternary expressions with a bool vector conditional, |
| // so polyfill by manually creating a vector with each of the |
| // individual scalar ternaries. |
| if (auto* vec = call->Result(0)->Type()->As<core::type::Vector>()) { |
| Vector<core::ir::Value*, 4> construct_args; |
| |
| b.InsertBefore(call, [&] { |
| auto* elm_ty = vec->Type(); |
| for (uint32_t i = 0; i < vec->Width(); i++) { |
| auto* false_ = b.Swizzle(elm_ty, args[0], {i})->Result(0); |
| auto* true_ = b.Swizzle(elm_ty, args[1], {i})->Result(0); |
| auto* cond = b.Swizzle(elm_ty, args[2], {i})->Result(0); |
| |
| auto* ternary = b.ir.CreateInstruction<glsl::ir::Ternary>( |
| b.InstructionResult(elm_ty), |
| Vector<core::ir::Value*, 3>{false_, true_, cond}); |
| ternary->InsertBefore(call); |
| |
| construct_args.Push(ternary->Result(0)); |
| } |
| |
| b.ConstructWithResult(call->DetachResult(), construct_args); |
| }); |
| |
| } else { |
| auto* ternary = b.ir.CreateInstruction<glsl::ir::Ternary>(call->DetachResult(), args); |
| ternary->InsertBefore(call); |
| } |
| call->Destroy(); |
| } |
| }; |
| |
| } // namespace |
| |
| Result<SuccessType> BuiltinPolyfill(core::ir::Module& ir) { |
| auto result = ValidateAndDumpIfNeeded(ir, "BuiltinPolyfill transform"); |
| if (result != Success) { |
| return result.Failure(); |
| } |
| |
| State{ir}.Process(); |
| |
| return Success; |
| } |
| |
| } // namespace tint::glsl::writer::raise |