Support unorm8x4-bgra in OpenGL / GLSL

This vertex format is not natively supported on OpenGL so a Tint option
is added to optionally swizzle vertex inputs from RGBA to BGRA. The
transform is done as part of ShaderIO by extracting the vertex input as
a vec4f before swizzling it and converting it to the original vertex
input type.

Bug: 376924407
Change-Id: I681b9c0b29f3fdaa39076a7d64ffa439178f6ebd
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/213436
Reviewed-by: James Price <jrprice@google.com>
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/dawn/native/opengl/ComputePipelineGL.cpp b/src/dawn/native/opengl/ComputePipelineGL.cpp
index f193bea..4eef500 100644
--- a/src/dawn/native/opengl/ComputePipelineGL.cpp
+++ b/src/dawn/native/opengl/ComputePipelineGL.cpp
@@ -48,7 +48,7 @@
 MaybeError ComputePipeline::InitializeImpl() {
     DAWN_TRY(InitializeBase(ToBackend(GetDevice())->GetGL(), ToBackend(GetLayout()), GetAllStages(),
                             /* usesVertexIndex */ false, /* usesInstanceIndex */ false,
-                            /* usesFragDepth */ false));
+                            /* usesFragDepth */ false, /* bgraSwizzleAttributes */ {}));
     return {};
 }
 
diff --git a/src/dawn/native/opengl/PipelineGL.cpp b/src/dawn/native/opengl/PipelineGL.cpp
index 0dcba7a..22a4159 100644
--- a/src/dawn/native/opengl/PipelineGL.cpp
+++ b/src/dawn/native/opengl/PipelineGL.cpp
@@ -55,7 +55,8 @@
                                       const PerStage<ProgrammableStage>& stages,
                                       bool usesVertexIndex,
                                       bool usesInstanceIndex,
-                                      bool usesFragDepth) {
+                                      bool usesFragDepth,
+                                      VertexAttributeMask bgraSwizzleAttributes) {
     mProgram = gl.CreateProgram();
 
     // Compute the set of active stages.
@@ -70,16 +71,15 @@
     PerStage<CombinedSamplerInfo> combinedSamplers;
     bool needsPlaceholderSampler = false;
     std::vector<GLuint> glShaders;
-    // TODO(376924407): Add information for a transform that swizzles vertex inputs for the
-    // unorm8x4-bgra vertex format.
     for (SingleShaderStage stage : IterateStages(activeStages)) {
         const ShaderModule* module = ToBackend(stages[stage].module.Get());
         GLuint shader;
         DAWN_TRY_ASSIGN(
-            shader, module->CompileShader(
-                        gl, stages[stage], stage, usesVertexIndex, usesInstanceIndex, usesFragDepth,
-                        &combinedSamplers[stage], layout, &needsPlaceholderSampler,
-                        &mNeedsTextureBuiltinUniformBuffer, &mBindingPointEmulatedBuiltins));
+            shader,
+            module->CompileShader(
+                gl, stages[stage], stage, usesVertexIndex, usesInstanceIndex, usesFragDepth,
+                bgraSwizzleAttributes, &combinedSamplers[stage], layout, &needsPlaceholderSampler,
+                &mNeedsTextureBuiltinUniformBuffer, &mBindingPointEmulatedBuiltins));
         // XXX transform to flip some attributes from RGBA to BGRA
         gl.AttachShader(mProgram, shader);
         glShaders.push_back(shader);
diff --git a/src/dawn/native/opengl/PipelineGL.h b/src/dawn/native/opengl/PipelineGL.h
index 3b5ac9e..5d16e4b 100644
--- a/src/dawn/native/opengl/PipelineGL.h
+++ b/src/dawn/native/opengl/PipelineGL.h
@@ -31,6 +31,7 @@
 #include <utility>
 #include <vector>
 
+#include "dawn/native/IntegerTypes.h"
 #include "dawn/native/Pipeline.h"
 
 #include "include/tint/tint.h"
@@ -76,7 +77,8 @@
                               const PerStage<ProgrammableStage>& stages,
                               bool usesVertexIndex,
                               bool usesInstanceIndex,
-                              bool usesFragDepth);
+                              bool usesFragDepth,
+                              VertexAttributeMask bgraSwizzleAttributes);
     void DeleteProgram(const OpenGLFunctions& gl);
 
   private:
diff --git a/src/dawn/native/opengl/RenderPipelineGL.cpp b/src/dawn/native/opengl/RenderPipelineGL.cpp
index f2ae926..3500ce6 100644
--- a/src/dawn/native/opengl/RenderPipelineGL.cpp
+++ b/src/dawn/native/opengl/RenderPipelineGL.cpp
@@ -27,6 +27,7 @@
 
 #include "dawn/native/opengl/RenderPipelineGL.h"
 
+#include "dawn/common/BitSetIterator.h"
 #include "dawn/native/opengl/DeviceGL.h"
 #include "dawn/native/opengl/Forward.h"
 #include "dawn/native/opengl/PersistentPipelineStateGL.h"
@@ -216,8 +217,14 @@
       mGlPrimitiveTopology(GLPrimitiveTopology(GetPrimitiveTopology())) {}
 
 MaybeError RenderPipeline::InitializeImpl() {
+    VertexAttributeMask bgraSwizzleAttributes = {};
+    for (VertexAttributeLocation i : IterateBitSet(GetAttributeLocationsUsed())) {
+        bgraSwizzleAttributes.set(i, GetAttribute(i).format == wgpu::VertexFormat::Unorm8x4BGRA);
+    }
+
     DAWN_TRY(InitializeBase(ToBackend(GetDevice())->GetGL(), ToBackend(GetLayout()), GetAllStages(),
-                            UsesVertexIndex(), UsesInstanceIndex(), UsesFragDepth()));
+                            UsesVertexIndex(), UsesInstanceIndex(), UsesFragDepth(),
+                            bgraSwizzleAttributes));
     CreateVAOForVertexState();
     return {};
 }
diff --git a/src/dawn/native/opengl/ShaderModuleGL.cpp b/src/dawn/native/opengl/ShaderModuleGL.cpp
index 7c5aa36..98310b5 100644
--- a/src/dawn/native/opengl/ShaderModuleGL.cpp
+++ b/src/dawn/native/opengl/ShaderModuleGL.cpp
@@ -119,7 +119,7 @@
 // Any texture-only references will have "usePlaceholderSampler" set to true, and only the texture
 // binding point will be used in naming them. In addition, Dawn will bind a non-filtering sampler
 // for them (see PipelineGL).
-CombinedSamplerInfo generateCombinedSamplerInfo(tint::inspector::Inspector& inspector,
+CombinedSamplerInfo GenerateCombinedSamplerInfo(tint::inspector::Inspector& inspector,
                                                 const std::string& entryPoint,
                                                 tint::glsl::writer::Bindings& bindings,
                                                 BindingMap externalTextureExpansionMap,
@@ -199,7 +199,7 @@
     return combinedSamplerInfo;
 }
 
-bool generateTextureBuiltinFromUniformData(tint::inspector::Inspector& inspector,
+bool GenerateTextureBuiltinFromUniformData(tint::inspector::Inspector& inspector,
                                            const std::string& entryPoint,
                                            const PipelineLayout* layout,
                                            BindingPointToFunctionAndOffset* bindingPointToData,
@@ -301,7 +301,7 @@
     return {};
 }
 
-std::pair<tint::glsl::writer::Bindings, BindingMap> generateBindingInfo(
+std::pair<tint::glsl::writer::Bindings, BindingMap> GenerateBindingInfo(
     SingleShaderStage stage,
     const PipelineLayout* layout,
     const BindingInfoArray& moduleBindingInfo,
@@ -399,6 +399,7 @@
     bool usesVertexIndex,
     bool usesInstanceIndex,
     bool usesFragDepth,
+    VertexAttributeMask bgraSwizzleAttributes,
     CombinedSamplerInfo* combinedSamplers,
     const PipelineLayout* layout,
     bool* needsPlaceholderSampler,
@@ -423,7 +424,7 @@
     const BindingInfoArray& moduleBindingInfo = entryPointMetaData.bindings;
 
     auto [bindings, externalTextureExpansionMap] =
-        generateBindingInfo(stage, layout, moduleBindingInfo, req);
+        GenerateBindingInfo(stage, layout, moduleBindingInfo, req);
 
     // When textures are accessed without a sampler (e.g., textureLoad()),
     // GetSamplerTextureUses() will return this sentinel value.
@@ -439,10 +440,10 @@
         tint::glsl::writer::binding::Uniform{layout->GetInternalUniformBinding()});
 
     CombinedSamplerInfo combinedSamplerInfo =
-        generateCombinedSamplerInfo(inspector, programmableStage.entryPoint, bindings,
+        GenerateCombinedSamplerInfo(inspector, programmableStage.entryPoint, bindings,
                                     externalTextureExpansionMap, needsPlaceholderSampler);
 
-    bool needsInternalUBO = generateTextureBuiltinFromUniformData(
+    bool needsInternalUBO = GenerateTextureBuiltinFromUniformData(
         inspector, programmableStage.entryPoint, layout, bindingPointToData, bindings);
 
     std::optional<tint::ast::transform::SubstituteOverride::Config> substituteOverrideConfig;
@@ -477,6 +478,12 @@
                                                4 * PipelineLayout::PushConstantLocation::MaxDepth};
     }
 
+    if (stage == SingleShaderStage::Vertex) {
+        for (VertexAttributeLocation i : IterateBitSet(bgraSwizzleAttributes)) {
+            req.tintOptions.bgra_swizzle_locations.insert(static_cast<uint8_t>(i));
+        }
+    }
+
     req.disableSymbolRenaming = GetDevice()->IsToggleEnabled(Toggle::DisableSymbolRenaming);
 
     req.interstageVariables = {};
diff --git a/src/dawn/native/opengl/ShaderModuleGL.h b/src/dawn/native/opengl/ShaderModuleGL.h
index 133eb44..b7735f4 100644
--- a/src/dawn/native/opengl/ShaderModuleGL.h
+++ b/src/dawn/native/opengl/ShaderModuleGL.h
@@ -31,6 +31,7 @@
 #include <string>
 #include <vector>
 
+#include "dawn/native/IntegerTypes.h"
 #include "dawn/native/Serializable.h"
 #include "dawn/native/ShaderModule.h"
 #include "dawn/native/opengl/BindingPoint.h"
@@ -93,6 +94,7 @@
                                         bool usesVertexIndex,
                                         bool usesInstanceIndex,
                                         bool usesFragDepth,
+                                        VertexAttributeMask bgraSwizzleAttributes,
                                         CombinedSamplerInfo* combinedSamplers,
                                         const PipelineLayout* layout,
                                         bool* needsPlaceholderSampler,
diff --git a/src/dawn/tests/end2end/VertexFormatTests.cpp b/src/dawn/tests/end2end/VertexFormatTests.cpp
index b6624ae..dd4efac 100644
--- a/src/dawn/tests/end2end/VertexFormatTests.cpp
+++ b/src/dawn/tests/end2end/VertexFormatTests.cpp
@@ -706,10 +706,6 @@
 }
 
 TEST_P(VertexFormatTest, Unorm8x4BGRA) {
-    // TODO(376924407): OpenGL needs a shader transform to support the BGRA format as it doesn't
-    // have any swizzled vertex format.
-    DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
-
     std::vector<uint8_t> vertexData = {std::numeric_limits<uint8_t>::max(),
                                        std::numeric_limits<uint8_t>::min(),
                                        0,
diff --git a/src/tint/lang/glsl/writer/common/options.h b/src/tint/lang/glsl/writer/common/options.h
index aabe86c..0fdab12 100644
--- a/src/tint/lang/glsl/writer/common/options.h
+++ b/src/tint/lang/glsl/writer/common/options.h
@@ -31,6 +31,7 @@
 #include <optional>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "src/tint/api/common/binding_point.h"
@@ -267,6 +268,9 @@
     /// Offsets of the minDepth and maxDepth push constants.
     std::optional<RangeOffsets> depth_range_offsets;
 
+    /// Vertex inputs to perform BGRA swizzle on.
+    std::unordered_set<uint32_t> bgra_swizzle_locations;
+
     /// The bindings
     Bindings bindings{};
 
@@ -279,6 +283,7 @@
                  first_vertex_offset,
                  first_instance_offset,
                  depth_range_offsets,
+                 bgra_swizzle_locations,
                  bindings);
 };
 
diff --git a/src/tint/lang/glsl/writer/raise/raise.cc b/src/tint/lang/glsl/writer/raise/raise.cc
index 5badc90..9b57a2c 100644
--- a/src/tint/lang/glsl/writer/raise/raise.cc
+++ b/src/tint/lang/glsl/writer/raise/raise.cc
@@ -191,7 +191,8 @@
     RUN_TRANSFORM(core::ir::transform::AddEmptyEntryPoint, module);
 
     RUN_TRANSFORM(raise::ShaderIO, module,
-                  raise::ShaderIOConfig{push_constant_layout.Get(), options.depth_range_offsets});
+                  raise::ShaderIOConfig{push_constant_layout.Get(), options.depth_range_offsets,
+                                        options.bgra_swizzle_locations});
 
     // Must come after ShaderIO as it operates on module-scope `in` variables.
     RUN_TRANSFORM(
diff --git a/src/tint/lang/glsl/writer/raise/shader_io.cc b/src/tint/lang/glsl/writer/raise/shader_io.cc
index 551357a..5e75180 100644
--- a/src/tint/lang/glsl/writer/raise/shader_io.cc
+++ b/src/tint/lang/glsl/writer/raise/shader_io.cc
@@ -33,6 +33,7 @@
 #include "src/tint/lang/core/ir/module.h"
 #include "src/tint/lang/core/ir/transform/shader_io.h"
 #include "src/tint/lang/core/ir/validator.h"
+#include "src/tint/utils/containers/vector.h"
 
 using namespace tint::core::fluent_types;     // NOLINT
 using namespace tint::core::number_suffixes;  // NOLINT
@@ -50,6 +51,9 @@
     /// The output variables.
     Vector<core::ir::Var*, 4> output_vars;
 
+    /// The original type of vertex input variables that we are bgra-swizzling. Keyed by location.
+    Hashmap<uint32_t, const core::type::Type*, 1> bgra_swizzle_original_types;
+
     /// The configuration options.
     const ShaderIOConfig& config;
 
@@ -143,15 +147,25 @@
                 }
             } else {
                 name << ir.NameOf(func).Name();
+                auto* type = io.type;
 
                 if (io.attributes.location) {
                     name << "_loc" << io.attributes.location.value();
                     if (io.attributes.blend_src.has_value()) {
                         name << "_idx" << io.attributes.blend_src.value();
                     }
+
+                    // When doing the BGRA swizzle, always emit vec4f. The component type must be
+                    // f32 for unorm8x4-bgra, but the number of components can be anything. Even if
+                    // the input variable is an f32 we need to get the full vec4f when swizzling as
+                    // the components are reordered.
+                    if (config.bgra_swizzle_locations.count(*io.attributes.location) != 0) {
+                        bgra_swizzle_original_types.Add(*io.attributes.location, io.type);
+                        type = ty.vec4<f32>();
+                    }
                 }
                 name << name_suffix;
-                ptr = ty.ptr(addrspace, io.type, access);
+                ptr = ty.ptr(addrspace, type, access);
             }
 
             // Create an IO variable and add it to the root block.
@@ -201,6 +215,18 @@
             }
         }
 
+        // Replace the value with the BGRA-swizzled version of it when required.
+        if (inputs[idx].attributes.location &&
+            config.bgra_swizzle_locations.count(*inputs[idx].attributes.location) != 0) {
+            auto* original_type =
+                *bgra_swizzle_original_types.Get(*inputs[idx].attributes.location);
+
+            Vector<uint32_t, 4> swizzles = {2, 1, 0, 3};
+            swizzles.Resize(original_type->Elements(nullptr, 1).count);
+
+            value = builder.Swizzle(original_type, value, swizzles)->Result(0);
+        }
+
         return value;
     }
 
diff --git a/src/tint/lang/glsl/writer/raise/shader_io.h b/src/tint/lang/glsl/writer/raise/shader_io.h
index d857e05..0390211 100644
--- a/src/tint/lang/glsl/writer/raise/shader_io.h
+++ b/src/tint/lang/glsl/writer/raise/shader_io.h
@@ -29,6 +29,7 @@
 #define SRC_TINT_LANG_GLSL_WRITER_RAISE_SHADER_IO_H_
 
 #include <string>
+#include <unordered_set>
 
 #include "src/tint/lang/core/ir/transform/prepare_push_constants.h"
 #include "src/tint/lang/glsl/writer/common/options.h"
@@ -49,6 +50,9 @@
 
     /// offsets for clamping frag depth
     std::optional<Options::RangeOffsets> depth_range_offsets{};
+
+    /// locations of vertex input variables to apply BGRA swizzle to.
+    std::unordered_set<uint32_t> bgra_swizzle_locations{};
 };
 
 /// ShaderIO is a transform that moves each entry point function's parameters and return value to
diff --git a/src/tint/lang/glsl/writer/raise/shader_io_test.cc b/src/tint/lang/glsl/writer/raise/shader_io_test.cc
index 438cb4a..75f0eee 100644
--- a/src/tint/lang/glsl/writer/raise/shader_io_test.cc
+++ b/src/tint/lang/glsl/writer/raise/shader_io_test.cc
@@ -1516,5 +1516,166 @@
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(GlslWriter_ShaderIOTest, BGRASwizzleSingleValue) {
+    auto* ep = b.Function("vert", ty.vec4<f32>());
+    ep->SetReturnBuiltin(core::BuiltinValue::kPosition);
+    ep->SetReturnInvariant(true);
+    ep->SetStage(core::ir::Function::PipelineStage::kVertex);
+
+    auto* val = b.FunctionParam("val", ty.vec4<f32>());
+    val->SetLocation(0);
+    ep->SetParams({val});
+
+    b.Append(ep->Block(), [&] {  //
+        b.Return(ep, b.Construct(ty.vec4<f32>(), 0.5_f));
+    });
+
+    auto* src = R"(
+%vert = @vertex func(%val:vec4<f32> [@location(0)]):vec4<f32> [@invariant, @position] {
+  $B1: {
+    %3:vec4<f32> = construct 0.5f
+    ret %3
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+$B1: {  # root
+  %vert_loc0_Input:ptr<__in, vec4<f32>, read> = var @location(0)
+  %gl_Position:ptr<__out, vec4<f32>, write> = var @invariant @builtin(position)
+  %gl_PointSize:ptr<__out, f32, write> = var @builtin(__point_size)
+}
+
+%vert_inner = func(%val:vec4<f32>):vec4<f32> {
+  $B2: {
+    %6:vec4<f32> = construct 0.5f
+    ret %6
+  }
+}
+%vert = @vertex func():void {
+  $B3: {
+    %8:vec4<f32> = load %vert_loc0_Input
+    %9:vec4<f32> = swizzle %8, zyxw
+    %10:vec4<f32> = call %vert_inner, %9
+    store %gl_Position, %10
+    %11:f32 = swizzle %gl_Position, y
+    %12:f32 = negation %11
+    store_vector_element %gl_Position, 1u, %12
+    %13:f32 = swizzle %gl_Position, z
+    %14:f32 = swizzle %gl_Position, w
+    %15:f32 = mul 2.0f, %13
+    %16:f32 = sub %15, %14
+    store_vector_element %gl_Position, 2u, %16
+    store %gl_PointSize, 1.0f
+    ret
+  }
+}
+)";
+
+    core::ir::transform::PushConstantLayout push_constants;
+    ShaderIOConfig config{push_constants};
+    config.bgra_swizzle_locations.insert(0u);
+    Run(ShaderIO, config);
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(GlslWriter_ShaderIOTest, BGRASwizzleMultipleValueMixedTypes) {
+    auto* ep = b.Function("vert", ty.vec4<f32>());
+    ep->SetReturnBuiltin(core::BuiltinValue::kPosition);
+    ep->SetReturnInvariant(true);
+    ep->SetStage(core::ir::Function::PipelineStage::kVertex);
+
+    std::unordered_set<uint32_t> swizzled_locations;
+
+    // Checks swizzling happens before conversion to the original type.
+    auto* val1 = b.FunctionParam("val1", ty.f32());
+    val1->SetLocation(5);
+    swizzled_locations.insert(5);
+
+    auto* val2 = b.FunctionParam("val2", ty.vec2<f32>());
+    val2->SetLocation(0);
+    swizzled_locations.insert(0);
+
+    auto* val3 = b.FunctionParam("val3", ty.vec3<f32>());
+    val3->SetLocation(3);
+    swizzled_locations.insert(3);
+
+    auto* val4 = b.FunctionParam("val4", ty.vec4<f32>());
+    val4->SetLocation(7);
+    swizzled_locations.insert(7);
+
+    // Checks that the sentinel doesn't get swizzled.
+    auto* sentinel = b.FunctionParam("sentinel", ty.vec4<f32>());
+    sentinel->SetLocation(4);
+
+    ep->SetParams({val1, val2, sentinel, val3, val4});
+    b.Append(ep->Block(), [&] {  //
+        b.Return(ep, b.Construct(ty.vec4<f32>(), 0.5_f));
+    });
+
+    auto* src = R"(
+%vert = @vertex func(%val1:f32 [@location(5)], %val2:vec2<f32> [@location(0)], %sentinel:vec4<f32> [@location(4)], %val3:vec3<f32> [@location(3)], %val4:vec4<f32> [@location(7)]):vec4<f32> [@invariant, @position] {
+  $B1: {
+    %7:vec4<f32> = construct 0.5f
+    ret %7
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+$B1: {  # root
+  %vert_loc5_Input:ptr<__in, vec4<f32>, read> = var @location(5)
+  %vert_loc0_Input:ptr<__in, vec4<f32>, read> = var @location(0)
+  %vert_loc4_Input:ptr<__in, vec4<f32>, read> = var @location(4)
+  %vert_loc3_Input:ptr<__in, vec4<f32>, read> = var @location(3)
+  %vert_loc7_Input:ptr<__in, vec4<f32>, read> = var @location(7)
+  %gl_Position:ptr<__out, vec4<f32>, write> = var @invariant @builtin(position)
+  %gl_PointSize:ptr<__out, f32, write> = var @builtin(__point_size)
+}
+
+%vert_inner = func(%val1:f32, %val2:vec2<f32>, %sentinel:vec4<f32>, %val3:vec3<f32>, %val4:vec4<f32>):vec4<f32> {
+  $B2: {
+    %14:vec4<f32> = construct 0.5f
+    ret %14
+  }
+}
+%vert = @vertex func():void {
+  $B3: {
+    %16:vec4<f32> = load %vert_loc5_Input
+    %17:f32 = swizzle %16, z
+    %18:vec4<f32> = load %vert_loc0_Input
+    %19:vec2<f32> = swizzle %18, zy
+    %20:vec4<f32> = load %vert_loc4_Input
+    %21:vec4<f32> = load %vert_loc3_Input
+    %22:vec3<f32> = swizzle %21, zyx
+    %23:vec4<f32> = load %vert_loc7_Input
+    %24:vec4<f32> = swizzle %23, zyxw
+    %25:vec4<f32> = call %vert_inner, %17, %19, %20, %22, %24
+    store %gl_Position, %25
+    %26:f32 = swizzle %gl_Position, y
+    %27:f32 = negation %26
+    store_vector_element %gl_Position, 1u, %27
+    %28:f32 = swizzle %gl_Position, z
+    %29:f32 = swizzle %gl_Position, w
+    %30:f32 = mul 2.0f, %28
+    %31:f32 = sub %30, %29
+    store_vector_element %gl_Position, 2u, %31
+    store %gl_PointSize, 1.0f
+    ret
+  }
+}
+)";
+
+    core::ir::transform::PushConstantLayout push_constants;
+    ShaderIOConfig config{push_constants};
+    config.bgra_swizzle_locations = swizzled_locations;
+    Run(ShaderIO, config);
+
+    EXPECT_EQ(expect, str());
+}
+
 }  // namespace
 }  // namespace tint::glsl::writer::raise