| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| template<typename T, int N, int M> |
| inline vec<T, M> operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) { |
| return lhs * vec<T, N>(rhs); |
| } |
| |
| template<typename T, int N, int M> |
| inline vec<T, N> operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) { |
| return vec<T, M>(lhs) * rhs; |
| } |
| |
| struct Camera { |
| /* 0x0000 */ float4x4 projection; |
| /* 0x0040 */ float4x4 inverseProjection; |
| /* 0x0080 */ float4x4 view; |
| /* 0x00c0 */ packed_float3 position; |
| /* 0x00cc */ float time; |
| /* 0x00d0 */ float2 outputSize; |
| /* 0x00d8 */ float zNear; |
| /* 0x00dc */ float zFar; |
| }; |
| struct ClusterBounds { |
| /* 0x0000 */ packed_float3 minAABB; |
| /* 0x000c */ int8_t tint_pad[4]; |
| /* 0x0010 */ packed_float3 maxAABB; |
| /* 0x001c */ int8_t tint_pad_1[4]; |
| }; |
| struct tint_array_wrapper { |
| /* 0x0000 */ ClusterBounds arr[27648]; |
| }; |
| struct Clusters { |
| /* 0x0000 */ tint_array_wrapper bounds; |
| }; |
| struct ClusterLights { |
| /* 0x0000 */ uint offset; |
| /* 0x0004 */ uint count; |
| }; |
| struct tint_array_wrapper_1 { |
| /* 0x0000 */ ClusterLights arr[27648]; |
| }; |
| struct tint_array_wrapper_2 { |
| /* 0x0000 */ uint arr[1769472]; |
| }; |
| struct ClusterLightGroup { |
| /* 0x0000 */ atomic_uint offset; |
| /* 0x0004 */ tint_array_wrapper_1 lights; |
| /* 0x36004 */ tint_array_wrapper_2 indices; |
| }; |
| struct Light { |
| /* 0x0000 */ packed_float3 position; |
| /* 0x000c */ float range; |
| /* 0x0010 */ packed_float3 color; |
| /* 0x001c */ float intensity; |
| }; |
| struct GlobalLights { |
| /* 0x0000 */ packed_float3 ambient; |
| /* 0x000c */ int8_t tint_pad_2[4]; |
| /* 0x0010 */ packed_float3 dirColor; |
| /* 0x001c */ float dirIntensity; |
| /* 0x0020 */ packed_float3 dirDirection; |
| /* 0x002c */ uint lightCount; |
| /* 0x0030 */ Light lights[1]; |
| }; |
| struct tint_array_wrapper_3 { |
| uint arr[256]; |
| }; |
| |
| constant uint3 tileCount = uint3(32u, 18u, 48u); |
| float linearDepth(float depthSample, const constant Camera* const tint_symbol) { |
| return (((*(tint_symbol)).zFar * (*(tint_symbol)).zNear) / fma(depthSample, ((*(tint_symbol)).zNear - (*(tint_symbol)).zFar), (*(tint_symbol)).zFar)); |
| } |
| |
| uint3 getTile(float4 fragCoord, const constant Camera* const tint_symbol_1) { |
| float const sliceScale = (float(tileCount[2]) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear))); |
| float const sliceBias = -(((float(tileCount[2]) * log2((*(tint_symbol_1)).zNear)) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear)))); |
| uint const zTile = uint(fmax(((log2(linearDepth(fragCoord[2], tint_symbol_1)) * sliceScale) + sliceBias), 0.0f)); |
| return uint3(uint((fragCoord[0] / ((*(tint_symbol_1)).outputSize[0] / float(tileCount[0])))), uint((fragCoord[1] / ((*(tint_symbol_1)).outputSize[1] / float(tileCount[1])))), zTile); |
| } |
| |
| uint getClusterIndex(float4 fragCoord, const constant Camera* const tint_symbol_2) { |
| uint3 const tile = getTile(fragCoord, tint_symbol_2); |
| return ((tile[0] + (tile[1] * tileCount[0])) + ((tile[2] * tileCount[0]) * tileCount[1])); |
| } |
| |
| float sqDistPointAABB(float3 point, float3 minAABB, float3 maxAABB) { |
| float sqDist = 0.0f; |
| for(int i = 0; (i < 3); i = as_type<int>((as_type<uint>(i) + as_type<uint>(1)))) { |
| float const v = point[i]; |
| if ((v < minAABB[i])) { |
| sqDist = (sqDist + ((minAABB[i] - v) * (minAABB[i] - v))); |
| } |
| if ((v > maxAABB[i])) { |
| sqDist = (sqDist + ((v - maxAABB[i]) * (v - maxAABB[i]))); |
| } |
| } |
| return sqDist; |
| } |
| |
| void computeMain_inner(uint3 global_id, const device GlobalLights* const tint_symbol_3, const constant Camera* const tint_symbol_4, const device Clusters* const tint_symbol_5, device ClusterLightGroup* const tint_symbol_6) { |
| uint const tileIndex = ((global_id[0] + (global_id[1] * tileCount[0])) + ((global_id[2] * tileCount[0]) * tileCount[1])); |
| uint clusterLightCount = 0u; |
| tint_array_wrapper_3 cluserLightIndices = {}; |
| for(uint i = 0u; (i < (*(tint_symbol_3)).lightCount); i = (i + 1u)) { |
| float const range = (*(tint_symbol_3)).lights[i].range; |
| bool lightInCluster = (range <= 0.0f); |
| if (!(lightInCluster)) { |
| float4 const lightViewPos = ((*(tint_symbol_4)).view * float4((*(tint_symbol_3)).lights[i].position, 1.0f)); |
| float const sqDist = sqDistPointAABB(float4(lightViewPos).xyz, (*(tint_symbol_5)).bounds.arr[tileIndex].minAABB, (*(tint_symbol_5)).bounds.arr[tileIndex].maxAABB); |
| lightInCluster = (sqDist <= (range * range)); |
| } |
| if (lightInCluster) { |
| cluserLightIndices.arr[clusterLightCount] = i; |
| clusterLightCount = (clusterLightCount + 1u); |
| } |
| if ((clusterLightCount == 256u)) { |
| break; |
| } |
| } |
| uint const lightCount = clusterLightCount; |
| uint offset = atomic_fetch_add_explicit(&((*(tint_symbol_6)).offset), lightCount, memory_order_relaxed); |
| if ((offset >= 1769472u)) { |
| return; |
| } |
| for(uint i = 0u; (i < clusterLightCount); i = (i + 1u)) { |
| (*(tint_symbol_6)).indices.arr[(offset + i)] = cluserLightIndices.arr[i]; |
| } |
| (*(tint_symbol_6)).lights.arr[tileIndex].offset = offset; |
| (*(tint_symbol_6)).lights.arr[tileIndex].count = clusterLightCount; |
| } |
| |
| kernel void computeMain(const device GlobalLights* tint_symbol_7 [[buffer(2)]], const constant Camera* tint_symbol_8 [[buffer(0)]], const device Clusters* tint_symbol_9 [[buffer(3)]], device ClusterLightGroup* tint_symbol_10 [[buffer(1)]], uint3 global_id [[thread_position_in_grid]]) { |
| computeMain_inner(global_id, tint_symbol_7, tint_symbol_8, tint_symbol_9, tint_symbol_10); |
| return; |
| } |
| |