test/bug/tint/942.wgsl.expected.msl - tint - Git at Google

 #include <metal_stdlib>

 using namespace metal;
 struct Params {
   /* 0x0000 */ uint filterDim;
   /* 0x0004 */ uint blockDim;
 };
 struct Flip {
   /* 0x0000 */ uint value;
 };
 struct tint_array_wrapper_1 {
   float3 arr[256];
 };
 struct tint_array_wrapper {
   tint_array_wrapper_1 arr[4];
 };

 void tint_symbol_inner(constant Params& params, constant Flip& flip, uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index, threadgroup tint_array_wrapper* const tint_symbol_1, texture2d<float, access::sample> tint_symbol_2, sampler tint_symbol_3, texture2d<float, access::write> tint_symbol_4) {
   for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
     uint const i_1 = (idx / 256u);
     uint const i_2 = (idx % 256u);
     (*(tint_symbol_1)).arr[i_1].arr[i_2] = float3();
   }
   threadgroup_barrier(mem_flags::mem_threadgroup);
   uint const filterOffset = ((params.filterDim - 1u) / 2u);
   int2 const dims = int2(tint_symbol_2.get_width(0), tint_symbol_2.get_height(0));
   int2 const baseIndex = as_type<int2>((as_type<uint2>(int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u))))) - as_type<uint2>(int2(int(filterOffset), 0))));
   for(uint r = 0u; (r < 4u); r = (r + 1u)) {
     for(uint c = 0u; (c < 4u); c = (c + 1u)) {
       int2 loadIndex = as_type<int2>((as_type<uint2>(baseIndex) + as_type<uint2>(int2(int(c), int(r)))));
       if ((flip.value != 0u)) {
         loadIndex = loadIndex.yx;
       }
       (*(tint_symbol_1)).arr[r].arr[((4u * LocalInvocationID.x) + c)] = tint_symbol_2.sample(tint_symbol_3, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
     }
   }
   threadgroup_barrier(mem_flags::mem_threadgroup);
   for(uint r = 0u; (r < 4u); r = (r + 1u)) {
     for(uint c = 0u; (c < 4u); c = (c + 1u)) {
       int2 writeIndex = as_type<int2>((as_type<uint2>(baseIndex) + as_type<uint2>(int2(int(c), int(r)))));
       if ((flip.value != 0u)) {
         writeIndex = writeIndex.yx;
       }
       uint const center = ((4u * LocalInvocationID.x) + c);
       if ((((center >= filterOffset) && (center < (256u - filterOffset))) && all((writeIndex < dims)))) {
         float3 acc = float3(0.0f, 0.0f, 0.0f);
         for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) {
           uint i = ((center + f) - filterOffset);
           acc = (acc + ((1.0f / float(params.filterDim)) * (*(tint_symbol_1)).arr[r].arr[i]));
         }
         tint_symbol_4.write(float4(acc, 1.0f), uint2(writeIndex));
       }
     }
   }
 }

 kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_6 [[texture(0)]], sampler tint_symbol_7 [[sampler(0)]], texture2d<float, access::write> tint_symbol_8 [[texture(1)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(0)]], constant Flip& flip [[buffer(1)]]) {
   threadgroup tint_array_wrapper tint_symbol_5;
   tint_symbol_inner(params, flip, WorkGroupID, LocalInvocationID, local_invocation_index, &(tint_symbol_5), tint_symbol_6, tint_symbol_7, tint_symbol_8);
   return;
 }
	#include <metal_stdlib>

	using namespace metal;
	struct Params {
	/* 0x0000 */ uint filterDim;
	/* 0x0004 */ uint blockDim;
	};
	struct Flip {
	/* 0x0000 */ uint value;
	};
	struct tint_array_wrapper_1 {
	float3 arr[256];
	};
	struct tint_array_wrapper {
	tint_array_wrapper_1 arr[4];
	};

	void tint_symbol_inner(constant Params& params, constant Flip& flip, uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index, threadgroup tint_array_wrapper* const tint_symbol_1, texture2d<float, access::sample> tint_symbol_2, sampler tint_symbol_3, texture2d<float, access::write> tint_symbol_4) {
	for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
	uint const i_1 = (idx / 256u);
	uint const i_2 = (idx % 256u);
	(*(tint_symbol_1)).arr[i_1].arr[i_2] = float3();
	}
	threadgroup_barrier(mem_flags::mem_threadgroup);
	uint const filterOffset = ((params.filterDim - 1u) / 2u);
	int2 const dims = int2(tint_symbol_2.get_width(0), tint_symbol_2.get_height(0));
	int2 const baseIndex = as_type<int2>((as_type<uint2>(int2(((WorkGroupID.xy * uint2(params.blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u))))) - as_type<uint2>(int2(int(filterOffset), 0))));
	for(uint r = 0u; (r < 4u); r = (r + 1u)) {
	for(uint c = 0u; (c < 4u); c = (c + 1u)) {
	int2 loadIndex = as_type<int2>((as_type<uint2>(baseIndex) + as_type<uint2>(int2(int(c), int(r)))));
	if ((flip.value != 0u)) {
	loadIndex = loadIndex.yx;
	}
	((tint_symbol_1)).arr[r].arr[((4u LocalInvocationID.x) + c)] = tint_symbol_2.sample(tint_symbol_3, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), level(0.0f)).rgb;
	}
	}
	threadgroup_barrier(mem_flags::mem_threadgroup);
	for(uint r = 0u; (r < 4u); r = (r + 1u)) {
	for(uint c = 0u; (c < 4u); c = (c + 1u)) {
	int2 writeIndex = as_type<int2>((as_type<uint2>(baseIndex) + as_type<uint2>(int2(int(c), int(r)))));
	if ((flip.value != 0u)) {
	writeIndex = writeIndex.yx;
	}
	uint const center = ((4u * LocalInvocationID.x) + c);
	if ((((center >= filterOffset) && (center < (256u - filterOffset))) && all((writeIndex < dims)))) {
	float3 acc = float3(0.0f, 0.0f, 0.0f);
	for(uint f = 0u; (f < params.filterDim); f = (f + 1u)) {
	uint i = ((center + f) - filterOffset);
	acc = (acc + ((1.0f / float(params.filterDim)) * (*(tint_symbol_1)).arr[r].arr[i]));
	}
	tint_symbol_4.write(float4(acc, 1.0f), uint2(writeIndex));
	}
	}
	}
	}

	kernel void tint_symbol(texture2d<float, access::sample> tint_symbol_6 [[texture(0)]], sampler tint_symbol_7 [[sampler(0)]], texture2d<float, access::write> tint_symbol_8 [[texture(1)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]], constant Params& params [[buffer(0)]], constant Flip& flip [[buffer(1)]]) {
	threadgroup tint_array_wrapper tint_symbol_5;
	tint_symbol_inner(params, flip, WorkGroupID, LocalInvocationID, local_invocation_index, &(tint_symbol_5), tint_symbol_6, tint_symbol_7, tint_symbol_8);
	return;
	}