Compat GLES: blit BGRA8Unorm texture to buffer using compute

Support BGRA8Unorm texture readback using texture to buffer
compute blit emulation when GL_EXT_read_format_bgra is
not available.

Bug: dawn:1393
Change-Id: Iaad7057f73057829fb161a7580679702ba9f70bf
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/139506
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Stephen White <senorblanco@chromium.org>
Commit-Queue: Shrek Shao <shrekshao@google.com>
Auto-Submit: Shrek Shao <shrekshao@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/BlitTextureToBuffer.cpp b/src/dawn/native/BlitTextureToBuffer.cpp
index 2189e69..454deea 100644
--- a/src/dawn/native/BlitTextureToBuffer.cpp
+++ b/src/dawn/native/BlitTextureToBuffer.cpp
@@ -60,7 +60,7 @@
 template <std::string_view const&... Strs>
 static constexpr auto ConcatStringViews = ConcatStringViewsImpl<Strs...>::value;
 
-constexpr std::string_view kSnormTexture1D = R"(
+constexpr std::string_view kFloatTexture1D = R"(
 fn textureLoadGeneral(tex: texture_1d<f32>, coords: vec3u, level: u32) -> vec4<f32> {
     return textureLoad(tex, coords.x, level);
 }
@@ -68,7 +68,7 @@
 @group(0) @binding(1) var<storage, read_write> dst_buf : array<u32>;
 )";
 
-constexpr std::string_view kSnormTexture2D = R"(
+constexpr std::string_view kFloatTexture2D = R"(
 fn textureLoadGeneral(tex: texture_2d_array<f32>, coords: vec3u, level: u32) -> vec4<f32> {
     return textureLoad(tex, coords.xy, coords.z, level);
 }
@@ -76,7 +76,7 @@
 @group(0) @binding(1) var<storage, read_write> dst_buf : array<u32>;
 )";
 
-constexpr std::string_view kSnormTexture3D = R"(
+constexpr std::string_view kFloatTexture3D = R"(
 fn textureLoadGeneral(tex: texture_3d<f32>, coords: vec3u, level: u32) -> vec4<f32> {
     return textureLoad(tex, coords, level);
 }
@@ -273,31 +273,49 @@
     let result: u32 = pack4x8snorm(v);
 )";
 
+constexpr std::string_view kPackBGRA8UnormToU32 = R"(
+    // Storing and swizzling bgra8unorm texel values
+    // later called by pack4x8unorm to convert to u32.
+    var v: vec4<f32>;
+
+    let texel0 = textureLoadGeneral(src_tex, coord0, 0);
+    v = texel0.bgra;
+
+    let result: u32 = pack4x8unorm(v);
+)";
+
 constexpr std::string_view kLoadDepth32Float = R"(
     dst_buf[dstOffset] = textureLoad(src_tex, coord0.xy, coord0.z, 0);
 }
 )";
 
 constexpr std::string_view kBlitR8Snorm1D =
-    ConcatStringViews<kSnormTexture1D, kCommon, kPackR8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture1D, kCommon, kPackR8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRG8Snorm1D =
-    ConcatStringViews<kSnormTexture1D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture1D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRGBA8Snorm1D =
-    ConcatStringViews<kSnormTexture1D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture1D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
 
 constexpr std::string_view kBlitR8Snorm2D =
-    ConcatStringViews<kSnormTexture2D, kCommon, kPackR8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture2D, kCommon, kPackR8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRG8Snorm2D =
-    ConcatStringViews<kSnormTexture2D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture2D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRGBA8Snorm2D =
-    ConcatStringViews<kSnormTexture2D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture2D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
 
 constexpr std::string_view kBlitR8Snorm3D =
-    ConcatStringViews<kSnormTexture3D, kCommon, kPackR8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture3D, kCommon, kPackR8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRG8Snorm3D =
-    ConcatStringViews<kSnormTexture3D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture3D, kCommon, kPackRG8SnormToU32, kCommonEnd>;
 constexpr std::string_view kBlitRGBA8Snorm3D =
-    ConcatStringViews<kSnormTexture3D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
+    ConcatStringViews<kFloatTexture3D, kCommon, kPackRGBA8SnormToU32, kCommonEnd>;
+
+constexpr std::string_view kBlitBGRA8Unorm1D =
+    ConcatStringViews<kFloatTexture1D, kCommon, kPackBGRA8UnormToU32, kCommonEnd>;
+constexpr std::string_view kBlitBGRA8Unorm2D =
+    ConcatStringViews<kFloatTexture2D, kCommon, kPackBGRA8UnormToU32, kCommonEnd>;
+constexpr std::string_view kBlitBGRA8Unorm3D =
+    ConcatStringViews<kFloatTexture3D, kCommon, kPackBGRA8UnormToU32, kCommonEnd>;
 
 constexpr std::string_view kBlitStencil8 =
     ConcatStringViews<kStencilTexture, kCommon, kPackStencil8ToU32, kCommonEnd>;
@@ -367,6 +385,20 @@
             }
             textureSampleType = wgpu::TextureSampleType::Float;
             break;
+        case wgpu::TextureFormat::BGRA8Unorm:
+            switch (dimension) {
+                case wgpu::TextureDimension::e1D:
+                    wgslDesc.code = kBlitBGRA8Unorm1D.data();
+                    break;
+                case wgpu::TextureDimension::e2D:
+                    wgslDesc.code = kBlitBGRA8Unorm2D.data();
+                    break;
+                case wgpu::TextureDimension::e3D:
+                    wgslDesc.code = kBlitBGRA8Unorm3D.data();
+                    break;
+            }
+            textureSampleType = wgpu::TextureSampleType::Float;
+            break;
         case wgpu::TextureFormat::Depth16Unorm:
             DAWN_ASSERT(dimension == wgpu::TextureDimension::e2D);
             wgslDesc.code = kBlitDepth16Unorm.data();
diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index c170968..63be90f 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp
@@ -176,7 +176,8 @@
         if (device->IsToggleEnabled(Toggle::UseBlitForDepth16UnormTextureToBufferCopy) ||
             device->IsToggleEnabled(Toggle::UseBlitForDepth32FloatTextureToBufferCopy) ||
             device->IsToggleEnabled(Toggle::UseBlitForStencilTextureToBufferCopy) ||
-            device->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy)) {
+            device->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy) ||
+            device->IsToggleEnabled(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy)) {
             mUsage |= kInternalStorageBuffer;
         }
     }
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index 6ccbd5d..dfa8934c 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -758,6 +758,34 @@
     Ref<TextureViewBase> copyDst;
 };
 
+bool ShouldUseTextureToBufferBlit(const DeviceBase* device,
+                                  const Format& format,
+                                  const Aspect& aspect) {
+    // Snorm
+    if (format.IsSnorm() && device->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy)) {
+        return true;
+    }
+    // BGRA8Unorm
+    if (format.format == wgpu::TextureFormat::BGRA8Unorm &&
+        device->IsToggleEnabled(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy)) {
+        return true;
+    }
+    // Depth
+    if (aspect == Aspect::Depth &&
+        ((format.format == wgpu::TextureFormat::Depth16Unorm &&
+          device->IsToggleEnabled(Toggle::UseBlitForDepth16UnormTextureToBufferCopy)) ||
+         (format.format == wgpu::TextureFormat::Depth32Float &&
+          device->IsToggleEnabled(Toggle::UseBlitForDepth32FloatTextureToBufferCopy)))) {
+        return true;
+    }
+    // Stencil
+    if (aspect == Aspect::Stencil &&
+        device->IsToggleEnabled(Toggle::UseBlitForStencilTextureToBufferCopy)) {
+        return true;
+    }
+    return false;
+}
+
 }  // namespace
 
 Color ClampClearColorValueToLegalRange(const Color& originalColor, const Format& format) {
@@ -1493,8 +1521,8 @@
             auto format = source->texture->GetFormat();
             auto aspect = ConvertAspect(format, source->aspect);
 
-            if (format.IsSnorm() &&
-                GetDevice()->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy)) {
+            // Workaround to use compute pass to emulate texture to buffer copy
+            if (ShouldUseTextureToBufferBlit(GetDevice(), format, aspect)) {
                 // This function might create new resources. Need to lock the Device.
                 // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
                 // Command Submit time, so the locking would be removed from here at that point.
@@ -1512,67 +1540,12 @@
                 dst.rowsPerImage = destination->layout.rowsPerImage;
                 dst.offset = destination->layout.offset;
                 DAWN_TRY_CONTEXT(BlitTextureToBuffer(GetDevice(), this, src, dst, *copySize),
-                                 "copying snorm texture %s to %s using blit workaround.",
+                                 "copying texture %s to %s using blit workaround.",
                                  src.texture.Get(), destination->buffer);
 
                 return {};
             }
 
-            if (aspect == Aspect::Depth) {
-                if ((format.format == wgpu::TextureFormat::Depth16Unorm &&
-                     GetDevice()->IsToggleEnabled(
-                         Toggle::UseBlitForDepth16UnormTextureToBufferCopy)) ||
-                    (format.format == wgpu::TextureFormat::Depth32Float &&
-                     GetDevice()->IsToggleEnabled(
-                         Toggle::UseBlitForDepth32FloatTextureToBufferCopy))) {
-                    // This function might create new resources. Need to lock the Device.
-                    // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
-                    // Command Submit time, so the locking would be removed from here at that point.
-                    auto deviceLock(GetDevice()->GetScopedLock());
-
-                    TextureCopy src;
-                    src.texture = source->texture;
-                    src.origin = source->origin;
-                    src.mipLevel = source->mipLevel;
-                    src.aspect = aspect;
-
-                    BufferCopy dst;
-                    dst.buffer = destination->buffer;
-                    dst.bytesPerRow = destination->layout.bytesPerRow;
-                    dst.rowsPerImage = destination->layout.rowsPerImage;
-                    dst.offset = destination->layout.offset;
-                    DAWN_TRY_CONTEXT(BlitTextureToBuffer(GetDevice(), this, src, dst, *copySize),
-                                     "copying depth aspect from %s to %s using blit workaround.",
-                                     src.texture.Get(), destination->buffer);
-
-                    return {};
-                }
-            } else if (aspect == Aspect::Stencil) {
-                if (GetDevice()->IsToggleEnabled(Toggle::UseBlitForStencilTextureToBufferCopy)) {
-                    // This function might create new resources. Need to lock the Device.
-                    // TODO(crbug.com/dawn/1618): In future, all temp resources should be created at
-                    // Command Submit time, so the locking would be removed from here at that point.
-                    auto deviceLock(GetDevice()->GetScopedLock());
-
-                    TextureCopy src;
-                    src.texture = source->texture;
-                    src.origin = source->origin;
-                    src.mipLevel = source->mipLevel;
-                    src.aspect = aspect;
-
-                    BufferCopy dst;
-                    dst.buffer = destination->buffer;
-                    dst.bytesPerRow = destination->layout.bytesPerRow;
-                    dst.rowsPerImage = destination->layout.rowsPerImage;
-                    dst.offset = destination->layout.offset;
-                    DAWN_TRY_CONTEXT(BlitTextureToBuffer(GetDevice(), this, src, dst, *copySize),
-                                     "copying stencil aspect from %s to %s using blit workaround.",
-                                     src.texture.Get(), destination->buffer);
-
-                    return {};
-                }
-            }
-
             CopyTextureToBufferCmd* t2b =
                 allocator->Allocate<CopyTextureToBufferCmd>(Command::CopyTextureToBuffer);
             t2b->source.texture = source->texture;
diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp
index f62d240..c1a3e87 100644
--- a/src/dawn/native/Texture.cpp
+++ b/src/dawn/native/Texture.cpp
@@ -628,6 +628,12 @@
             AddInternalUsage(wgpu::TextureUsage::TextureBinding);
         }
     }
+    if (mFormat.format == wgpu::TextureFormat::BGRA8Unorm &&
+        device->IsToggleEnabled(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy)) {
+        if (mInternalUsage & wgpu::TextureUsage::CopySrc) {
+            AddInternalUsage(wgpu::TextureUsage::TextureBinding);
+        }
+    }
 }
 
 TextureBase::~TextureBase() = default;
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index 1610e7d..e5ceb56 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -132,10 +132,6 @@
      {"disable_indexed_draw_buffers",
       "Disables the use of indexed draw buffer state which is unsupported on some platforms.",
       "https://crbug.com/dawn/582", ToggleStage::Device}},
-    {Toggle::DisableSnormRead,
-     {"disable_snorm_read",
-      "Disables reading from Snorm textures which is unsupported on some platforms.",
-      "https://crbug.com/dawn/667", ToggleStage::Device}},
     {Toggle::DisableDepthRead,
      {"disable_depth_read",
       "Disables reading from depth textures which is unsupported on some platforms.",
@@ -148,10 +144,6 @@
      {"disable_depth_stencil_read",
       "Disables reading from depth/stencil textures which is unsupported on some platforms.",
       "https://crbug.com/dawn/667", ToggleStage::Device}},
-    {Toggle::DisableBGRARead,
-     {"disable_bgra_read",
-      "Disables reading from BGRA textures which is unsupported on some platforms.",
-      "https://crbug.com/dawn/1393", ToggleStage::Device}},
     {Toggle::DisableSampleVariables,
      {"disable_sample_variables",
       "Disables gl_SampleMask and related functionality which is unsupported on some platforms.",
@@ -406,6 +398,11 @@
       "Use a blit instead of a copy command to copy snorm texture to a buffer."
       "Workaround for OpenGLES.",
       "https://crbug.com/dawn/1781", ToggleStage::Device}},
+    {Toggle::UseBlitForBGRA8UnormTextureToBufferCopy,
+     {"use_blit_for_snorm_texture_to_buffer_copy",
+      "Use a blit instead of a copy command to copy bgra8unorm texture to a buffer."
+      "Workaround for OpenGLES.",
+      "https://crbug.com/dawn/1393", ToggleStage::Device}},
     {Toggle::D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
      {"d3d12_replace_add_with_minus_when_dst_factor_is_zero_and_src_factor_is_dst_alpha",
       "Replace the blending operation 'Add' with 'Minus' when dstBlendFactor is 'Zero' and "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 64b4be2..271b5c9 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -46,11 +46,9 @@
     DisableBaseVertex,
     DisableBaseInstance,
     DisableIndexedDrawBuffers,
-    DisableSnormRead,
     DisableDepthRead,
     DisableStencilRead,
     DisableDepthStencilRead,
-    DisableBGRARead,
     DisableSampleVariables,
     UseD3D12SmallShaderVisibleHeapForTesting,
     UseDXC,
@@ -96,6 +94,7 @@
     UseBlitForDepth32FloatTextureToBufferCopy,
     UseBlitForStencilTextureToBufferCopy,
     UseBlitForSnormTextureToBufferCopy,
+    UseBlitForBGRA8UnormTextureToBufferCopy,
     D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
     D3D12PolyfillReflectVec2F32,
     VulkanClearGen12TextureWithCCSAmbiguateOnCreation,
diff --git a/src/dawn/native/opengl/CommandBufferGL.cpp b/src/dawn/native/opengl/CommandBufferGL.cpp
index 06154f4..63e619c 100644
--- a/src/dawn/native/opengl/CommandBufferGL.cpp
+++ b/src/dawn/native/opengl/CommandBufferGL.cpp
@@ -578,11 +578,7 @@
                 const GLFormat& format = texture->GetGLFormat();
                 GLenum target = texture->GetGLTarget();
 
-                // For SNORM texture formats, Dawn translates CopyTextureToBuffer command to a
-                // compute shader pass.
-                if (formatInfo.isCompressed ||
-                    (formatInfo.IsSnorm() &&
-                     GetDevice()->IsToggleEnabled(Toggle::DisableSnormRead))) {
+                if (formatInfo.isCompressed) {
                     UNREACHABLE();
                 }
 
diff --git a/src/dawn/native/opengl/PhysicalDeviceGL.cpp b/src/dawn/native/opengl/PhysicalDeviceGL.cpp
index d79d41a..df82a35 100644
--- a/src/dawn/native/opengl/PhysicalDeviceGL.cpp
+++ b/src/dawn/native/opengl/PhysicalDeviceGL.cpp
@@ -279,11 +279,9 @@
     deviceToggles->Default(Toggle::DisableBaseVertex, !supportsBaseVertex);
     deviceToggles->Default(Toggle::DisableBaseInstance, !supportsBaseInstance);
     deviceToggles->Default(Toggle::DisableIndexedDrawBuffers, !supportsIndexedDrawBuffers);
-    deviceToggles->Default(Toggle::DisableSnormRead, !supportsSnormRead);
     deviceToggles->Default(Toggle::DisableDepthRead, !supportsDepthRead);
     deviceToggles->Default(Toggle::DisableStencilRead, !supportsStencilRead);
     deviceToggles->Default(Toggle::DisableDepthStencilRead, !supportsDepthStencilRead);
-    deviceToggles->Default(Toggle::DisableBGRARead, !supportsBGRARead);
     deviceToggles->Default(Toggle::DisableSampleVariables, !supportsSampleVariables);
     deviceToggles->Default(Toggle::FlushBeforeClientWaitSync, gl.GetVersion().IsES());
     // For OpenGL ES, we must use a placeholder fragment shader for vertex-only render pipeline.
@@ -303,6 +301,9 @@
     // For OpenGL ES, use compute shader blit to emulate snorm texture to buffer copies.
     deviceToggles->Default(Toggle::UseBlitForSnormTextureToBufferCopy,
                            gl.GetVersion().IsES() || !supportsSnormRead);
+
+    // For OpenGL ES, use compute shader blit to emulate bgra8unorm texture to buffer copies.
+    deviceToggles->Default(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy, !supportsBGRARead);
 }
 
 ResultOrError<Ref<DeviceBase>> PhysicalDevice::CreateDeviceImpl(AdapterBase* adapter,
diff --git a/src/dawn/tests/end2end/TextureFormatTests.cpp b/src/dawn/tests/end2end/TextureFormatTests.cpp
index 5c87de4..421ea3e 100644
--- a/src/dawn/tests/end2end/TextureFormatTests.cpp
+++ b/src/dawn/tests/end2end/TextureFormatTests.cpp
@@ -557,8 +557,6 @@
 
 // Test the BGRA8Unorm format
 TEST_P(TextureFormatTest, BGRA8Unorm) {
-    DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("disable_bgra_read"));
-
     // Intel's implementation of BGRA on ES is broken: it claims to support
     // GL_EXT_texture_format_BGRA8888, but won't accept GL_BGRA or GL_BGRA8_EXT as internalFormat.
     DAWN_SUPPRESS_TEST_IF(IsIntel() && IsOpenGLES() && IsLinux());