ByteAddressBuffer sb : register(t0);

struct tint_symbol_1 {
  uint idx : SV_GroupIndex;
};

float2x2 sb_load_16(uint offset) {
  return float2x2(asfloat(sb.Load2((offset + 0u))), asfloat(sb.Load2((offset + 8u))));
}

float2x3 sb_load_17(uint offset) {
  return float2x3(asfloat(sb.Load3((offset + 0u))), asfloat(sb.Load3((offset + 16u))));
}

float2x4 sb_load_18(uint offset) {
  return float2x4(asfloat(sb.Load4((offset + 0u))), asfloat(sb.Load4((offset + 16u))));
}

float3x2 sb_load_19(uint offset) {
  return float3x2(asfloat(sb.Load2((offset + 0u))), asfloat(sb.Load2((offset + 8u))), asfloat(sb.Load2((offset + 16u))));
}

float3x3 sb_load_20(uint offset) {
  return float3x3(asfloat(sb.Load3((offset + 0u))), asfloat(sb.Load3((offset + 16u))), asfloat(sb.Load3((offset + 32u))));
}

float3x4 sb_load_21(uint offset) {
  return float3x4(asfloat(sb.Load4((offset + 0u))), asfloat(sb.Load4((offset + 16u))), asfloat(sb.Load4((offset + 32u))));
}

float4x2 sb_load_22(uint offset) {
  return float4x2(asfloat(sb.Load2((offset + 0u))), asfloat(sb.Load2((offset + 8u))), asfloat(sb.Load2((offset + 16u))), asfloat(sb.Load2((offset + 24u))));
}

float4x3 sb_load_23(uint offset) {
  return float4x3(asfloat(sb.Load3((offset + 0u))), asfloat(sb.Load3((offset + 16u))), asfloat(sb.Load3((offset + 32u))), asfloat(sb.Load3((offset + 48u))));
}

float4x4 sb_load_24(uint offset) {
  return float4x4(asfloat(sb.Load4((offset + 0u))), asfloat(sb.Load4((offset + 16u))), asfloat(sb.Load4((offset + 32u))), asfloat(sb.Load4((offset + 48u))));
}

matrix<float16_t, 2, 2> sb_load_25(uint offset) {
  return matrix<float16_t, 2, 2>(sb.Load<vector<float16_t, 2> >((offset + 0u)), sb.Load<vector<float16_t, 2> >((offset + 4u)));
}

matrix<float16_t, 2, 3> sb_load_26(uint offset) {
  return matrix<float16_t, 2, 3>(sb.Load<vector<float16_t, 3> >((offset + 0u)), sb.Load<vector<float16_t, 3> >((offset + 8u)));
}

matrix<float16_t, 2, 4> sb_load_27(uint offset) {
  return matrix<float16_t, 2, 4>(sb.Load<vector<float16_t, 4> >((offset + 0u)), sb.Load<vector<float16_t, 4> >((offset + 8u)));
}

matrix<float16_t, 3, 2> sb_load_28(uint offset) {
  return matrix<float16_t, 3, 2>(sb.Load<vector<float16_t, 2> >((offset + 0u)), sb.Load<vector<float16_t, 2> >((offset + 4u)), sb.Load<vector<float16_t, 2> >((offset + 8u)));
}

matrix<float16_t, 3, 3> sb_load_29(uint offset) {
  return matrix<float16_t, 3, 3>(sb.Load<vector<float16_t, 3> >((offset + 0u)), sb.Load<vector<float16_t, 3> >((offset + 8u)), sb.Load<vector<float16_t, 3> >((offset + 16u)));
}

matrix<float16_t, 3, 4> sb_load_30(uint offset) {
  return matrix<float16_t, 3, 4>(sb.Load<vector<float16_t, 4> >((offset + 0u)), sb.Load<vector<float16_t, 4> >((offset + 8u)), sb.Load<vector<float16_t, 4> >((offset + 16u)));
}

matrix<float16_t, 4, 2> sb_load_31(uint offset) {
  return matrix<float16_t, 4, 2>(sb.Load<vector<float16_t, 2> >((offset + 0u)), sb.Load<vector<float16_t, 2> >((offset + 4u)), sb.Load<vector<float16_t, 2> >((offset + 8u)), sb.Load<vector<float16_t, 2> >((offset + 12u)));
}

matrix<float16_t, 4, 3> sb_load_32(uint offset) {
  return matrix<float16_t, 4, 3>(sb.Load<vector<float16_t, 3> >((offset + 0u)), sb.Load<vector<float16_t, 3> >((offset + 8u)), sb.Load<vector<float16_t, 3> >((offset + 16u)), sb.Load<vector<float16_t, 3> >((offset + 24u)));
}

matrix<float16_t, 4, 4> sb_load_33(uint offset) {
  return matrix<float16_t, 4, 4>(sb.Load<vector<float16_t, 4> >((offset + 0u)), sb.Load<vector<float16_t, 4> >((offset + 8u)), sb.Load<vector<float16_t, 4> >((offset + 16u)), sb.Load<vector<float16_t, 4> >((offset + 24u)));
}

typedef float3 sb_load_34_ret[2];
sb_load_34_ret sb_load_34(uint offset) {
  float3 arr_1[2] = (float3[2])0;
  {
    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
      arr_1[i] = asfloat(sb.Load3((offset + (i * 16u))));
    }
  }
  return arr_1;
}

typedef matrix<float16_t, 4, 2> sb_load_35_ret[2];
sb_load_35_ret sb_load_35(uint offset) {
  matrix<float16_t, 4, 2> arr_2[2] = (matrix<float16_t, 4, 2>[2])0;
  {
    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
      arr_2[i_1] = sb_load_31((offset + (i_1 * 16u)));
    }
  }
  return arr_2;
}

void main_inner(uint idx) {
  const float scalar_f32 = asfloat(sb.Load((800u * idx)));
  const int scalar_i32 = asint(sb.Load(((800u * idx) + 4u)));
  const uint scalar_u32 = sb.Load(((800u * idx) + 8u));
  const float16_t scalar_f16 = sb.Load<float16_t>(((800u * idx) + 12u));
  const float2 vec2_f32 = asfloat(sb.Load2(((800u * idx) + 16u)));
  const int2 vec2_i32 = asint(sb.Load2(((800u * idx) + 24u)));
  const uint2 vec2_u32 = sb.Load2(((800u * idx) + 32u));
  const vector<float16_t, 2> vec2_f16 = sb.Load<vector<float16_t, 2> >(((800u * idx) + 40u));
  const float3 vec3_f32 = asfloat(sb.Load3(((800u * idx) + 48u)));
  const int3 vec3_i32 = asint(sb.Load3(((800u * idx) + 64u)));
  const uint3 vec3_u32 = sb.Load3(((800u * idx) + 80u));
  const vector<float16_t, 3> vec3_f16 = sb.Load<vector<float16_t, 3> >(((800u * idx) + 96u));
  const float4 vec4_f32 = asfloat(sb.Load4(((800u * idx) + 112u)));
  const int4 vec4_i32 = asint(sb.Load4(((800u * idx) + 128u)));
  const uint4 vec4_u32 = sb.Load4(((800u * idx) + 144u));
  const vector<float16_t, 4> vec4_f16 = sb.Load<vector<float16_t, 4> >(((800u * idx) + 160u));
  const float2x2 mat2x2_f32 = sb_load_16(((800u * idx) + 168u));
  const float2x3 mat2x3_f32 = sb_load_17(((800u * idx) + 192u));
  const float2x4 mat2x4_f32 = sb_load_18(((800u * idx) + 224u));
  const float3x2 mat3x2_f32 = sb_load_19(((800u * idx) + 256u));
  const float3x3 mat3x3_f32 = sb_load_20(((800u * idx) + 288u));
  const float3x4 mat3x4_f32 = sb_load_21(((800u * idx) + 336u));
  const float4x2 mat4x2_f32 = sb_load_22(((800u * idx) + 384u));
  const float4x3 mat4x3_f32 = sb_load_23(((800u * idx) + 416u));
  const float4x4 mat4x4_f32 = sb_load_24(((800u * idx) + 480u));
  const matrix<float16_t, 2, 2> mat2x2_f16 = sb_load_25(((800u * idx) + 544u));
  const matrix<float16_t, 2, 3> mat2x3_f16 = sb_load_26(((800u * idx) + 552u));
  const matrix<float16_t, 2, 4> mat2x4_f16 = sb_load_27(((800u * idx) + 568u));
  const matrix<float16_t, 3, 2> mat3x2_f16 = sb_load_28(((800u * idx) + 584u));
  const matrix<float16_t, 3, 3> mat3x3_f16 = sb_load_29(((800u * idx) + 600u));
  const matrix<float16_t, 3, 4> mat3x4_f16 = sb_load_30(((800u * idx) + 624u));
  const matrix<float16_t, 4, 2> mat4x2_f16 = sb_load_31(((800u * idx) + 648u));
  const matrix<float16_t, 4, 3> mat4x3_f16 = sb_load_32(((800u * idx) + 664u));
  const matrix<float16_t, 4, 4> mat4x4_f16 = sb_load_33(((800u * idx) + 696u));
  const float3 arr2_vec3_f32[2] = sb_load_34(((800u * idx) + 736u));
  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = sb_load_35(((800u * idx) + 768u));
}

[numthreads(1, 1, 1)]
void main(tint_symbol_1 tint_symbol) {
  main_inner(tint_symbol.idx);
  return;
}
