blob: 7c842fe7d2e84ad525a823c336f4efa738e6905d [file] [log] [blame]
#version 310 es
#extension GL_AMD_gpu_shader_half_float : require
struct mat2x4_f16 {
f16vec4 col0;
f16vec4 col1;
};
layout(binding = 0, std140) uniform u_block_std140_ubo {
mat2x4_f16 inner[4];
} u;
shared f16mat2x4 w[4];
f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
return f16mat2x4(val.col0, val.col1);
}
f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
{
for(uint i = 0u; (i < 4u); i = (i + 1u)) {
arr[i] = conv_mat2x4_f16(val[i]);
}
}
return arr;
}
void f(uint local_invocation_index) {
{
for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
uint i = idx;
w[i] = f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf));
}
}
barrier();
w = conv_arr4_mat2x4_f16(u.inner);
w[1] = conv_mat2x4_f16(u.inner[2u]);
w[1][0] = u.inner[0u].col1.ywxz;
w[1][0].x = u.inner[0u].col1[0u];
}
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main() {
f(gl_LocalInvocationIndex);
return;
}