| #include <metal_stdlib> |
| |
| using namespace metal; |
| |
| template<typename T, int N, int M> |
| inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) { |
| return lhs * vec<T, N>(rhs); |
| } |
| |
| template<typename T, int N, int M> |
| inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) { |
| return vec<T, M>(lhs) * rhs; |
| } |
| |
| struct Uniforms { |
| /* 0x0000 */ uint numTriangles; |
| /* 0x0004 */ uint gridSize; |
| /* 0x0008 */ uint pad1; |
| /* 0x000c */ uint pad2; |
| /* 0x0010 */ packed_float3 bbMin; |
| /* 0x001c */ int8_t tint_pad[4]; |
| /* 0x0020 */ packed_float3 bbMax; |
| /* 0x002c */ int8_t tint_pad_1[4]; |
| }; |
| struct Dbg { |
| /* 0x0000 */ atomic_uint offsetCounter; |
| /* 0x0004 */ uint pad0; |
| /* 0x0008 */ uint pad1; |
| /* 0x000c */ uint pad2; |
| /* 0x0010 */ uint value0; |
| /* 0x0014 */ uint value1; |
| /* 0x0018 */ uint value2; |
| /* 0x001c */ uint value3; |
| /* 0x0020 */ float value_f32_0; |
| /* 0x0024 */ float value_f32_1; |
| /* 0x0028 */ float value_f32_2; |
| /* 0x002c */ float value_f32_3; |
| }; |
| struct F32s { |
| /* 0x0000 */ float values[1]; |
| }; |
| struct U32s { |
| /* 0x0000 */ uint values[1]; |
| }; |
| struct I32s { |
| int values[1]; |
| }; |
| struct AU32s { |
| /* 0x0000 */ atomic_uint values[1]; |
| }; |
| struct AI32s { |
| /* 0x0000 */ atomic_int values[1]; |
| }; |
| |
| float3 toVoxelPos(constant Uniforms& uniforms, float3 position) { |
| float3 bbMin = float3(uniforms.bbMin.x, uniforms.bbMin.y, uniforms.bbMin.z); |
| float3 bbMax = float3(uniforms.bbMax.x, uniforms.bbMax.y, uniforms.bbMax.z); |
| float3 bbSize = (bbMax - bbMin); |
| float cubeSize = fmax(fmax(bbSize.x, bbSize.y), bbSize.z); |
| float gridSize = float(uniforms.gridSize); |
| float gx = ((gridSize * (position.x - uniforms.bbMin.x)) / cubeSize); |
| float gy = ((gridSize * (position.y - uniforms.bbMin.y)) / cubeSize); |
| float gz = ((gridSize * (position.z - uniforms.bbMin.z)) / cubeSize); |
| return float3(gx, gy, gz); |
| } |
| |
| uint toIndex1D(uint gridSize, float3 voxelPos) { |
| uint3 icoord = uint3(voxelPos); |
| return ((icoord.x + (gridSize * icoord.y)) + ((gridSize * gridSize) * icoord.z)); |
| } |
| |
| uint3 toIndex3D(uint gridSize, uint index) { |
| uint z_1 = (index / (gridSize * gridSize)); |
| uint y_1 = ((index - ((gridSize * gridSize) * z_1)) / gridSize); |
| uint x_1 = (index % gridSize); |
| return uint3(x_1, y_1, z_1); |
| } |
| |
| float3 loadPosition(device F32s& positions, uint vertexIndex) { |
| float3 position = float3(positions.values[((3u * vertexIndex) + 0u)], positions.values[((3u * vertexIndex) + 1u)], positions.values[((3u * vertexIndex) + 2u)]); |
| return position; |
| } |
| |
| void doIgnore(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT) { |
| uint g42 = uniforms.numTriangles; |
| uint kj6 = dbg.value1; |
| uint b53 = atomic_load_explicit(&(counters.values[0]), memory_order_relaxed); |
| uint rwg = indices.values[0]; |
| float rb5 = positions.values[0]; |
| int g55 = atomic_load_explicit(&(LUT.values[0]), memory_order_relaxed); |
| } |
| |
| void main_count_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { |
| uint triangleIndex = GlobalInvocationID.x; |
| if ((triangleIndex >= uniforms.numTriangles)) { |
| return; |
| } |
| doIgnore(uniforms, dbg, counters, indices, positions, LUT); |
| uint i0 = indices.values[((3u * triangleIndex) + 0u)]; |
| uint i1 = indices.values[((3u * triangleIndex) + 1u)]; |
| uint i2 = indices.values[((3u * triangleIndex) + 2u)]; |
| float3 p0 = loadPosition(positions, i0); |
| float3 p1 = loadPosition(positions, i1); |
| float3 p2 = loadPosition(positions, i2); |
| float3 center = (((p0 + p1) + p2) / 3.0f); |
| float3 voxelPos = toVoxelPos(uniforms, center); |
| uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos); |
| uint acefg = atomic_fetch_add_explicit(&(counters.values[voxelIndex]), 1u, memory_order_relaxed); |
| if ((triangleIndex == 0u)) { |
| dbg.value0 = uniforms.gridSize; |
| dbg.value_f32_0 = center.x; |
| dbg.value_f32_1 = center.y; |
| dbg.value_f32_2 = center.z; |
| } |
| } |
| |
| kernel void main_count(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { |
| main_count_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); |
| return; |
| } |
| |
| void main_create_lut_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { |
| uint voxelIndex = GlobalInvocationID.x; |
| doIgnore(uniforms, dbg, counters, indices, positions, LUT); |
| uint maxVoxels = ((uniforms.gridSize * uniforms.gridSize) * uniforms.gridSize); |
| if ((voxelIndex >= maxVoxels)) { |
| return; |
| } |
| uint numTriangles = atomic_load_explicit(&(counters.values[voxelIndex]), memory_order_relaxed); |
| int offset = -1; |
| if ((numTriangles > 0u)) { |
| offset = int(atomic_fetch_add_explicit(&(dbg.offsetCounter), numTriangles, memory_order_relaxed)); |
| } |
| atomic_store_explicit(&(LUT.values[voxelIndex]), offset, memory_order_relaxed); |
| } |
| |
| kernel void main_create_lut(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { |
| main_create_lut_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); |
| return; |
| } |
| |
| void main_sort_triangles_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { |
| uint triangleIndex = GlobalInvocationID.x; |
| doIgnore(uniforms, dbg, counters, indices, positions, LUT); |
| if ((triangleIndex >= uniforms.numTriangles)) { |
| return; |
| } |
| uint i0 = indices.values[((3u * triangleIndex) + 0u)]; |
| uint i1 = indices.values[((3u * triangleIndex) + 1u)]; |
| uint i2 = indices.values[((3u * triangleIndex) + 2u)]; |
| float3 p0 = loadPosition(positions, i0); |
| float3 p1 = loadPosition(positions, i1); |
| float3 p2 = loadPosition(positions, i2); |
| float3 center = (((p0 + p1) + p2) / 3.0f); |
| float3 voxelPos = toVoxelPos(uniforms, center); |
| uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos); |
| int triangleOffset = atomic_fetch_add_explicit(&(LUT.values[voxelIndex]), 1, memory_order_relaxed); |
| } |
| |
| kernel void main_sort_triangles(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { |
| main_sort_triangles_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); |
| return; |
| } |
| |