test/bug/tint/1113.wgsl.expected.msl - tint - Git at Google

 #include <metal_stdlib>

 using namespace metal;

 template<typename T, int N, int M>
 inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
   return lhs * vec<T, N>(rhs);
 }

 template<typename T, int N, int M>
 inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
   return vec<T, M>(lhs) * rhs;
 }

 struct Uniforms {
   /* 0x0000 */ uint numTriangles;
   /* 0x0004 */ uint gridSize;
   /* 0x0008 */ uint pad1;
   /* 0x000c */ uint pad2;
   /* 0x0010 */ packed_float3 bbMin;
   /* 0x001c */ int8_t tint_pad[4];
   /* 0x0020 */ packed_float3 bbMax;
   /* 0x002c */ int8_t tint_pad_1[4];
 };
 struct Dbg {
   /* 0x0000 */ atomic_uint offsetCounter;
   /* 0x0004 */ uint pad0;
   /* 0x0008 */ uint pad1;
   /* 0x000c */ uint pad2;
   /* 0x0010 */ uint value0;
   /* 0x0014 */ uint value1;
   /* 0x0018 */ uint value2;
   /* 0x001c */ uint value3;
   /* 0x0020 */ float value_f32_0;
   /* 0x0024 */ float value_f32_1;
   /* 0x0028 */ float value_f32_2;
   /* 0x002c */ float value_f32_3;
 };
 struct F32s {
   /* 0x0000 */ float values[1];
 };
 struct U32s {
   /* 0x0000 */ uint values[1];
 };
 struct I32s {
   int values[1];
 };
 struct AU32s {
   /* 0x0000 */ atomic_uint values[1];
 };
 struct AI32s {
   /* 0x0000 */ atomic_int values[1];
 };

 float3 toVoxelPos(constant Uniforms& uniforms, float3 position) {
   float3 bbMin = float3(uniforms.bbMin.x, uniforms.bbMin.y, uniforms.bbMin.z);
   float3 bbMax = float3(uniforms.bbMax.x, uniforms.bbMax.y, uniforms.bbMax.z);
   float3 bbSize = (bbMax - bbMin);
   float cubeSize = fmax(fmax(bbSize.x, bbSize.y), bbSize.z);
   float gridSize = float(uniforms.gridSize);
   float gx = ((gridSize * (position.x - uniforms.bbMin.x)) / cubeSize);
   float gy = ((gridSize * (position.y - uniforms.bbMin.y)) / cubeSize);
   float gz = ((gridSize * (position.z - uniforms.bbMin.z)) / cubeSize);
   return float3(gx, gy, gz);
 }

 uint toIndex1D(uint gridSize, float3 voxelPos) {
   uint3 icoord = uint3(voxelPos);
   return ((icoord.x + (gridSize * icoord.y)) + ((gridSize * gridSize) * icoord.z));
 }

 uint3 toIndex3D(uint gridSize, uint index) {
   uint z_1 = (index / (gridSize * gridSize));
   uint y_1 = ((index - ((gridSize * gridSize) * z_1)) / gridSize);
   uint x_1 = (index % gridSize);
   return uint3(x_1, y_1, z_1);
 }

 float3 loadPosition(device F32s& positions, uint vertexIndex) {
   float3 position = float3(positions.values[((3u * vertexIndex) + 0u)], positions.values[((3u * vertexIndex) + 1u)], positions.values[((3u * vertexIndex) + 2u)]);
   return position;
 }

 void doIgnore(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT) {
   uint g42 = uniforms.numTriangles;
   uint kj6 = dbg.value1;
   uint b53 = atomic_load_explicit(&(counters.values[0]), memory_order_relaxed);
   uint rwg = indices.values[0];
   float rb5 = positions.values[0];
   int g55 = atomic_load_explicit(&(LUT.values[0]), memory_order_relaxed);
 }

 void main_count_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
   uint triangleIndex = GlobalInvocationID.x;
   if ((triangleIndex >= uniforms.numTriangles)) {
     return;
   }
   doIgnore(uniforms, dbg, counters, indices, positions, LUT);
   uint i0 = indices.values[((3u * triangleIndex) + 0u)];
   uint i1 = indices.values[((3u * triangleIndex) + 1u)];
   uint i2 = indices.values[((3u * triangleIndex) + 2u)];
   float3 p0 = loadPosition(positions, i0);
   float3 p1 = loadPosition(positions, i1);
   float3 p2 = loadPosition(positions, i2);
   float3 center = (((p0 + p1) + p2) / 3.0f);
   float3 voxelPos = toVoxelPos(uniforms, center);
   uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
   uint acefg = atomic_fetch_add_explicit(&(counters.values[voxelIndex]), 1u, memory_order_relaxed);
   if ((triangleIndex == 0u)) {
     dbg.value0 = uniforms.gridSize;
     dbg.value_f32_0 = center.x;
     dbg.value_f32_1 = center.y;
     dbg.value_f32_2 = center.z;
   }
 }

 kernel void main_count(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
   main_count_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
   return;
 }

 void main_create_lut_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
   uint voxelIndex = GlobalInvocationID.x;
   doIgnore(uniforms, dbg, counters, indices, positions, LUT);
   uint maxVoxels = ((uniforms.gridSize * uniforms.gridSize) * uniforms.gridSize);
   if ((voxelIndex >= maxVoxels)) {
     return;
   }
   uint numTriangles = atomic_load_explicit(&(counters.values[voxelIndex]), memory_order_relaxed);
   int offset = -1;
   if ((numTriangles > 0u)) {
     offset = int(atomic_fetch_add_explicit(&(dbg.offsetCounter), numTriangles, memory_order_relaxed));
   }
   atomic_store_explicit(&(LUT.values[voxelIndex]), offset, memory_order_relaxed);
 }

 kernel void main_create_lut(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
   main_create_lut_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
   return;
 }

 void main_sort_triangles_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
   uint triangleIndex = GlobalInvocationID.x;
   doIgnore(uniforms, dbg, counters, indices, positions, LUT);
   if ((triangleIndex >= uniforms.numTriangles)) {
     return;
   }
   uint i0 = indices.values[((3u * triangleIndex) + 0u)];
   uint i1 = indices.values[((3u * triangleIndex) + 1u)];
   uint i2 = indices.values[((3u * triangleIndex) + 2u)];
   float3 p0 = loadPosition(positions, i0);
   float3 p1 = loadPosition(positions, i1);
   float3 p2 = loadPosition(positions, i2);
   float3 center = (((p0 + p1) + p2) / 3.0f);
   float3 voxelPos = toVoxelPos(uniforms, center);
   uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
   int triangleOffset = atomic_fetch_add_explicit(&(LUT.values[voxelIndex]), 1, memory_order_relaxed);
 }

 kernel void main_sort_triangles(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
   main_sort_triangles_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
   return;
 }
	#include <metal_stdlib>

	using namespace metal;

	template<typename T, int N, int M>
	inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
	return lhs * vec<T, N>(rhs);
	}

	template<typename T, int N, int M>
	inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
	return vec<T, M>(lhs) * rhs;
	}

	struct Uniforms {
	/* 0x0000 */ uint numTriangles;
	/* 0x0004 */ uint gridSize;
	/* 0x0008 */ uint pad1;
	/* 0x000c */ uint pad2;
	/* 0x0010 */ packed_float3 bbMin;
	/* 0x001c */ int8_t tint_pad[4];
	/* 0x0020 */ packed_float3 bbMax;
	/* 0x002c */ int8_t tint_pad_1[4];
	};
	struct Dbg {
	/* 0x0000 */ atomic_uint offsetCounter;
	/* 0x0004 */ uint pad0;
	/* 0x0008 */ uint pad1;
	/* 0x000c */ uint pad2;
	/* 0x0010 */ uint value0;
	/* 0x0014 */ uint value1;
	/* 0x0018 */ uint value2;
	/* 0x001c */ uint value3;
	/* 0x0020 */ float value_f32_0;
	/* 0x0024 */ float value_f32_1;
	/* 0x0028 */ float value_f32_2;
	/* 0x002c */ float value_f32_3;
	};
	struct F32s {
	/* 0x0000 */ float values[1];
	};
	struct U32s {
	/* 0x0000 */ uint values[1];
	};
	struct I32s {
	int values[1];
	};
	struct AU32s {
	/* 0x0000 */ atomic_uint values[1];
	};
	struct AI32s {
	/* 0x0000 */ atomic_int values[1];
	};

	float3 toVoxelPos(constant Uniforms& uniforms, float3 position) {
	float3 bbMin = float3(uniforms.bbMin.x, uniforms.bbMin.y, uniforms.bbMin.z);
	float3 bbMax = float3(uniforms.bbMax.x, uniforms.bbMax.y, uniforms.bbMax.z);
	float3 bbSize = (bbMax - bbMin);
	float cubeSize = fmax(fmax(bbSize.x, bbSize.y), bbSize.z);
	float gridSize = float(uniforms.gridSize);
	float gx = ((gridSize * (position.x - uniforms.bbMin.x)) / cubeSize);
	float gy = ((gridSize * (position.y - uniforms.bbMin.y)) / cubeSize);
	float gz = ((gridSize * (position.z - uniforms.bbMin.z)) / cubeSize);
	return float3(gx, gy, gz);
	}

	uint toIndex1D(uint gridSize, float3 voxelPos) {
	uint3 icoord = uint3(voxelPos);
	return ((icoord.x + (gridSize * icoord.y)) + ((gridSize * gridSize) * icoord.z));
	}

	uint3 toIndex3D(uint gridSize, uint index) {
	uint z_1 = (index / (gridSize * gridSize));
	uint y_1 = ((index - ((gridSize * gridSize) * z_1)) / gridSize);
	uint x_1 = (index % gridSize);
	return uint3(x_1, y_1, z_1);
	}

	float3 loadPosition(device F32s& positions, uint vertexIndex) {
	float3 position = float3(positions.values[((3u * vertexIndex) + 0u)], positions.values[((3u * vertexIndex) + 1u)], positions.values[((3u * vertexIndex) + 2u)]);
	return position;
	}

	void doIgnore(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT) {
	uint g42 = uniforms.numTriangles;
	uint kj6 = dbg.value1;
	uint b53 = atomic_load_explicit(&(counters.values[0]), memory_order_relaxed);
	uint rwg = indices.values[0];
	float rb5 = positions.values[0];
	int g55 = atomic_load_explicit(&(LUT.values[0]), memory_order_relaxed);
	}

	void main_count_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
	uint triangleIndex = GlobalInvocationID.x;
	if ((triangleIndex >= uniforms.numTriangles)) {
	return;
	}
	doIgnore(uniforms, dbg, counters, indices, positions, LUT);
	uint i0 = indices.values[((3u * triangleIndex) + 0u)];
	uint i1 = indices.values[((3u * triangleIndex) + 1u)];
	uint i2 = indices.values[((3u * triangleIndex) + 2u)];
	float3 p0 = loadPosition(positions, i0);
	float3 p1 = loadPosition(positions, i1);
	float3 p2 = loadPosition(positions, i2);
	float3 center = (((p0 + p1) + p2) / 3.0f);
	float3 voxelPos = toVoxelPos(uniforms, center);
	uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
	uint acefg = atomic_fetch_add_explicit(&(counters.values[voxelIndex]), 1u, memory_order_relaxed);
	if ((triangleIndex == 0u)) {
	dbg.value0 = uniforms.gridSize;
	dbg.value_f32_0 = center.x;
	dbg.value_f32_1 = center.y;
	dbg.value_f32_2 = center.z;
	}
	}

	kernel void main_count(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
	main_count_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
	return;
	}

	void main_create_lut_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
	uint voxelIndex = GlobalInvocationID.x;
	doIgnore(uniforms, dbg, counters, indices, positions, LUT);
	uint maxVoxels = ((uniforms.gridSize * uniforms.gridSize) * uniforms.gridSize);
	if ((voxelIndex >= maxVoxels)) {
	return;
	}
	uint numTriangles = atomic_load_explicit(&(counters.values[voxelIndex]), memory_order_relaxed);
	int offset = -1;
	if ((numTriangles > 0u)) {
	offset = int(atomic_fetch_add_explicit(&(dbg.offsetCounter), numTriangles, memory_order_relaxed));
	}
	atomic_store_explicit(&(LUT.values[voxelIndex]), offset, memory_order_relaxed);
	}

	kernel void main_create_lut(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
	main_create_lut_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
	return;
	}

	void main_sort_triangles_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
	uint triangleIndex = GlobalInvocationID.x;
	doIgnore(uniforms, dbg, counters, indices, positions, LUT);
	if ((triangleIndex >= uniforms.numTriangles)) {
	return;
	}
	uint i0 = indices.values[((3u * triangleIndex) + 0u)];
	uint i1 = indices.values[((3u * triangleIndex) + 1u)];
	uint i2 = indices.values[((3u * triangleIndex) + 2u)];
	float3 p0 = loadPosition(positions, i0);
	float3 p1 = loadPosition(positions, i1);
	float3 p2 = loadPosition(positions, i2);
	float3 center = (((p0 + p1) + p2) / 3.0f);
	float3 voxelPos = toVoxelPos(uniforms, center);
	uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
	int triangleOffset = atomic_fetch_add_explicit(&(LUT.values[voxelIndex]), 1, memory_order_relaxed);
	}

	kernel void main_sort_triangles(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
	main_sort_triangles_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
	return;
	}