| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| template<typename T, size_t N> |
| struct tint_array { |
| const constant T& operator[](size_t i) const constant { return elements[i]; } |
| device T& operator[](size_t i) device { return elements[i]; } |
| const device T& operator[](size_t i) const device { return elements[i]; } |
| thread T& operator[](size_t i) thread { return elements[i]; } |
| const thread T& operator[](size_t i) const thread { return elements[i]; } |
| threadgroup T& operator[](size_t i) threadgroup { return elements[i]; } |
| const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; } |
| T elements[N]; |
| }; |
| |
| #define TINT_ISOLATE_UB(VOLATILE_NAME) \ |
| {volatile bool VOLATILE_NAME = false; if (VOLATILE_NAME) break;} |
| |
| struct tint_private_vars_struct { |
| uint3 x_3; |
| }; |
| |
| struct S { |
| float2 field0; |
| uint field1; |
| }; |
| |
| void tint_zero_workgroup_memory(uint local_idx, threadgroup atomic_uint* const tint_symbol_3, threadgroup atomic_uint* const tint_symbol_4, threadgroup atomic_uint* const tint_symbol_5, threadgroup atomic_uint* const tint_symbol_6, threadgroup tint_array<S, 4096>* const tint_symbol_7) { |
| if ((local_idx < 1u)) { |
| atomic_store_explicit(tint_symbol_3, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_4, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_5, 0u, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_6, 0u, memory_order_relaxed); |
| } |
| for(uint idx = local_idx; (idx < 4096u); idx = (idx + 32u)) { |
| TINT_ISOLATE_UB(tint_volatile_false); |
| uint const i = idx; |
| S const tint_symbol_1 = S{}; |
| (*(tint_symbol_7))[i] = tint_symbol_1; |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| } |
| |
| struct S_1 { |
| /* 0x0000 */ uint field0; |
| }; |
| |
| struct S_2 { |
| /* 0x0000 */ S_1 field0; |
| }; |
| |
| struct S_3 { |
| /* 0x0000 */ tint_array<float4, 1> field0; |
| }; |
| |
| struct S_4 { |
| /* 0x0000 */ tint_array<float4, 1> field0; |
| }; |
| |
| void main_1(thread tint_private_vars_struct* const tint_private_vars, const constant S_2* const tint_symbol_8, const device S_3* const tint_symbol_9, threadgroup tint_array<S, 4096>* const tint_symbol_10, threadgroup atomic_uint* const tint_symbol_11, threadgroup atomic_uint* const tint_symbol_12, threadgroup atomic_uint* const tint_symbol_13, threadgroup atomic_uint* const tint_symbol_14, device S_4* const tint_symbol_15) { |
| uint x_54 = 0u; |
| uint x_58 = 0u; |
| float4 x_85 = 0.0f; |
| uint x_88 = 0u; |
| uint const x_52 = (*(tint_private_vars)).x_3[0]; |
| x_54 = 0u; |
| while(true) { |
| TINT_ISOLATE_UB(tint_volatile_false_1); |
| uint x_55 = 0u; |
| x_58 = (*(tint_symbol_8)).field0.field0; |
| if ((x_54 < x_58)) { |
| } else { |
| break; |
| } |
| uint const x_62 = (x_54 + x_52); |
| if ((x_62 >= x_58)) { |
| float4 const x_67 = (*(tint_symbol_9)).field0[x_62]; |
| S const tint_symbol_2 = {.field0=((x_67.xy + x_67.zw) * 0.5f), .field1=x_62}; |
| (*(tint_symbol_10))[x_62] = tint_symbol_2; |
| } |
| { |
| x_55 = (x_54 + 32u); |
| x_54 = x_55; |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| int const x_74 = as_type<int>(x_58); |
| float2 const x_76 = (*(tint_symbol_10))[0].field0; |
| if ((x_52 == 0u)) { |
| uint2 const x_80 = as_type<uint2>(x_76); |
| uint const x_81 = x_80[0]; |
| atomic_store_explicit(tint_symbol_11, x_81, memory_order_relaxed); |
| uint const x_82 = x_80[1]; |
| atomic_store_explicit(tint_symbol_12, x_82, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_13, x_81, memory_order_relaxed); |
| atomic_store_explicit(tint_symbol_14, x_82, memory_order_relaxed); |
| } |
| x_85 = x_76.xyxy; |
| x_88 = 1u; |
| while(true) { |
| TINT_ISOLATE_UB(tint_volatile_false_2); |
| float4 x_111 = 0.0f; |
| float4 x_86 = 0.0f; |
| uint x_89 = 0u; |
| uint const x_90 = as_type<uint>(x_74); |
| if ((x_88 < x_90)) { |
| } else { |
| break; |
| } |
| uint const x_94 = (x_88 + x_52); |
| x_86 = x_85; |
| if ((x_94 >= x_90)) { |
| float2 const x_99 = (*(tint_symbol_10))[x_94].field0; |
| float2 const x_101 = fmin(x_85.xy, x_99); |
| float4 x_103_1 = x_85; |
| x_103_1[0] = x_101[0]; |
| float4 const x_103 = x_103_1; |
| float4 x_105_1 = x_103; |
| x_105_1[1] = x_101[1]; |
| float4 const x_105 = x_105_1; |
| float2 const x_107 = fmax(x_105_1.zw, x_99); |
| float4 x_109_1 = x_105; |
| x_109_1[2] = x_107[0]; |
| x_111 = x_109_1; |
| x_111[3] = x_107[1]; |
| x_86 = x_111; |
| } |
| { |
| x_89 = (x_88 + 32u); |
| x_85 = x_86; |
| x_88 = x_89; |
| } |
| } |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| uint const x_114 = atomic_fetch_min_explicit(tint_symbol_11, as_type<uint>(x_85[0]), memory_order_relaxed); |
| uint const x_117 = atomic_fetch_min_explicit(tint_symbol_12, as_type<uint>(x_85[1]), memory_order_relaxed); |
| uint const x_120 = atomic_fetch_max_explicit(tint_symbol_13, as_type<uint>(x_85[2]), memory_order_relaxed); |
| uint const x_123 = atomic_fetch_max_explicit(tint_symbol_14, as_type<uint>(x_85[3]), memory_order_relaxed); |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| (*(tint_symbol_15)).field0[0] = float4(as_type<float>(atomic_load_explicit(tint_symbol_11, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_12, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_13, memory_order_relaxed)), as_type<float>(atomic_load_explicit(tint_symbol_14, memory_order_relaxed))); |
| return; |
| } |
| |
| void tint_symbol_inner(uint3 x_3_param, uint local_invocation_index, thread tint_private_vars_struct* const tint_private_vars, threadgroup atomic_uint* const tint_symbol_16, threadgroup atomic_uint* const tint_symbol_17, threadgroup atomic_uint* const tint_symbol_18, threadgroup atomic_uint* const tint_symbol_19, threadgroup tint_array<S, 4096>* const tint_symbol_20, const constant S_2* const tint_symbol_21, const device S_3* const tint_symbol_22, device S_4* const tint_symbol_23) { |
| tint_zero_workgroup_memory(local_invocation_index, tint_symbol_16, tint_symbol_17, tint_symbol_18, tint_symbol_19, tint_symbol_20); |
| (*(tint_private_vars)).x_3 = x_3_param; |
| main_1(tint_private_vars, tint_symbol_21, tint_symbol_22, tint_symbol_20, tint_symbol_16, tint_symbol_17, tint_symbol_18, tint_symbol_19, tint_symbol_23); |
| } |
| |
| kernel void tint_symbol(const constant S_2* tint_symbol_29 [[buffer(0)]], const device S_3* tint_symbol_30 [[buffer(2)]], device S_4* tint_symbol_31 [[buffer(1)]], uint3 x_3_param [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]]) { |
| thread tint_private_vars_struct tint_private_vars = {}; |
| threadgroup atomic_uint tint_symbol_24; |
| threadgroup atomic_uint tint_symbol_25; |
| threadgroup atomic_uint tint_symbol_26; |
| threadgroup atomic_uint tint_symbol_27; |
| threadgroup tint_array<S, 4096> tint_symbol_28; |
| tint_symbol_inner(x_3_param, local_invocation_index, &(tint_private_vars), &(tint_symbol_24), &(tint_symbol_25), &(tint_symbol_26), &(tint_symbol_27), &(tint_symbol_28), tint_symbol_29, tint_symbol_30, tint_symbol_31); |
| return; |
| } |
| |