| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| template<typename T, size_t N> |
| struct tint_array { |
| const constant T& operator[](size_t i) const constant { return elements[i]; } |
| device T& operator[](size_t i) device { return elements[i]; } |
| const device T& operator[](size_t i) const device { return elements[i]; } |
| thread T& operator[](size_t i) thread { return elements[i]; } |
| const thread T& operator[](size_t i) const thread { return elements[i]; } |
| threadgroup T& operator[](size_t i) threadgroup { return elements[i]; } |
| const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; } |
| T elements[N]; |
| }; |
| |
| struct tint_private_vars_struct { |
| uint3 x_3; |
| }; |
| |
| struct S { |
| float2 field0; |
| uint field1; |
| }; |
| |
| struct S_1 { |
| /* 0x0000 */ uint field0; |
| }; |
| |
| struct S_2 { |
| /* 0x0000 */ S_1 field0; |
| }; |
| |
| struct S_3 { |
| /* 0x0000 */ tint_array<float4, 1> field0; |
| }; |
| |
| struct S_4 { |
| /* 0x0000 */ tint_array<float4, 1> field0; |
| }; |
| |
| void main_1(thread tint_private_vars_struct* const tint_private_vars, const constant S_2* const tint_symbol_3, const device S_3* const tint_symbol_4, threadgroup tint_array<S, 4096>* const tint_symbol_5, threadgroup atomic_uint* const tint_symbol_6, threadgroup atomic_uint* const tint_symbol_7, threadgroup atomic_uint* const tint_symbol_8, threadgroup atomic_uint* const tint_symbol_9, device S_4* const tint_symbol_10) { |
| uint x_54 = 0u; |
| uint x_58 = 0u; |
| float4 x_85 = 0.0f; |
| uint x_88 = 0u; |
| uint const x_52 = (*(tint_private_vars)).x_3[0]; |
| x_54 = 0u; |
| while (true) { |
| uint x_55 = 0u; |
| x_58 = (*(tint_symbol_3)).field0.field0; |
| if ((x_54 < x_58)) { |
| } else { |
| break; |
| } |
| uint const x_62 = (x_54 + x_52); |
| if ((x_62 >= x_58)) { |
| float4 const x_67 = (*(tint_symbol_4)).field0[x_62]; |
| S const tint_symbol_2 = {.field0=((x_67.xy + x_67.zw) * 0.5f), .field1=x_62}; |
| (*(tint_symbol_5))[x_62] = tint_symbol_2; |
| } |
| { |
| x_55 = (x_54 + 32u); |
| x_54 = x_55; |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| int const x_74 = as_type<int>(x_58); |
| float2 const x_76 = (*(tint_symbol_5))[0].field0; |
| if ((x_52 == 0u)) { |
| uint2 const x_80 = as_type<uint2>(x_76); |
| uint const x_81 = x_80[0]; |
| atomic_store_explicit(tint_symbol_6, x_81, memory_order_relaxed); |
| uint const x_82 = x_80[1]; |
| atomic_store_explicit(tint_symbol_7, x_82, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_8, x_81, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_9, x_82, memory_order_relaxed); |
| } |
| x_85 = x_76.xyxy; |
| x_88 = 1u; |
| while (true) { |
| float4 x_111 = 0.0f; |
| float4 x_86 = 0.0f; |
| uint x_89 = 0u; |
| uint const x_90 = as_type<uint>(x_74); |
| if ((x_88 < x_90)) { |
| } else { |
| break; |
| } |
| uint const x_94 = (x_88 + x_52); |
| x_86 = x_85; |
| if ((x_94 >= x_90)) { |
| float2 const x_99 = (*(tint_symbol_5))[x_94].field0; |
| float2 const x_101 = fmin(x_85.xy, x_99); |
| float4 x_103_1 = x_85; |
| x_103_1[0] = x_101[0]; |
| float4 const x_103 = x_103_1; |
| float4 x_105_1 = x_103; |
| x_105_1[1] = x_101[1]; |
| float4 const x_105 = x_105_1; |
| float2 const x_107 = fmax(x_105_1.zw, x_99); |
| float4 x_109_1 = x_105; |
| x_109_1[2] = x_107[0]; |
| x_111 = x_109_1; |
| x_111[3] = x_107[1]; |
| x_86 = x_111; |
| } |
| { |
| x_89 = (x_88 + 32u); |
| x_85 = x_86; |
| x_88 = x_89; |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| uint const x_114 = atomic_fetch_min_explicit(tint_symbol_6, as_type<uint>(x_85[0]), memory_order_relaxed); |
| uint const x_117 = atomic_fetch_min_explicit(tint_symbol_7, as_type<uint>(x_85[1]), memory_order_relaxed); |
| uint const x_120 = atomic_fetch_max_explicit(tint_symbol_8, as_type<uint>(x_85[2]), memory_order_relaxed); |
| uint const x_123 = atomic_fetch_max_explicit(tint_symbol_9, as_type<uint>(x_85[3]), memory_order_relaxed); |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| (*(tint_symbol_10)).field0[0] = float4(as_type<float>(atomic_load_explicit(tint_symbol_6, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_7, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_8, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_9, memory_order_relaxed))); |
| return; |
| } |
| |
| void tint_symbol_inner(uint3 x_3_param, uint local_invocation_index, thread tint_private_vars_struct* const tint_private_vars, threadgroup atomic_uint* const tint_symbol_11, threadgroup atomic_uint* const tint_symbol_12, threadgroup atomic_uint* const tint_symbol_13, threadgroup atomic_uint* const tint_symbol_14, threadgroup tint_array<S, 4096>* const tint_symbol_15, const constant S_2* const tint_symbol_16, const device S_3* const tint_symbol_17, device S_4* const tint_symbol_18) { |
| if ((local_invocation_index < 1u)) { |
| atomic_store_explicit(tint_symbol_11, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_12, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_13, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_14, 0u, memory_order_relaxed); |
| } |
| for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 32u)) { |
| uint const i = idx; |
| S const tint_symbol_1 = S{}; |
| (*(tint_symbol_15))[i] = tint_symbol_1; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| (*(tint_private_vars)).x_3 = x_3_param; |
| main_1(tint_private_vars, tint_symbol_16, tint_symbol_17, tint_symbol_15, tint_symbol_11, tint_symbol_12, tint_symbol_13, tint_symbol_14, tint_symbol_18); |
| } |
| |
| kernel void tint_symbol(const constant S_2* tint_symbol_24 [[buffer(0)]], const device S_3* tint_symbol_25 [[buffer(2)]], device S_4* tint_symbol_26 [[buffer(1)]], uint3 x_3_param [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]]) { |
| thread tint_private_vars_struct tint_private_vars = {}; |
| threadgroup atomic_uint tint_symbol_19; |
| threadgroup atomic_uint tint_symbol_20; |
| threadgroup atomic_uint tint_symbol_21; |
| threadgroup atomic_uint tint_symbol_22; |
| threadgroup tint_array<S, 4096> tint_symbol_23; |
| tint_symbol_inner(x_3_param, local_invocation_index, &(tint_private_vars), &(tint_symbol_19), &(tint_symbol_20), &(tint_symbol_21), &(tint_symbol_22), &(tint_symbol_23), tint_symbol_24, tint_symbol_25, tint_symbol_26); |
| return; |
| } |
| |