Compat: Blit T2B for float16/32 textures when not color renderable GL uses framebuffer to implement copy for these texture formats. This does not work if the formats are not color renderable. Use the compute shader blit to emulate the copy. Bug: 381214487 Change-Id: I613206b318db62b61d23553680d6231b48c901d7 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/221836 Reviewed-by: Stephen White <senorblanco@chromium.org> Reviewed-by: Kai Ninomiya <kainino@chromium.org> Commit-Queue: Shrek Shao <shrekshao@google.com>

commit: fcf22c35a51fdae89f8a936bc47fb19f4b74ee17 [log] [tgz]
author: Shrek Shao <shrekshao@google.com> Fri Jan 17 13:13:09 2025 -0800
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Jan 17 13:13:09 2025 -0800
tree: 3b25741625325bf7f967cb495abd50b3850e3164
parent: 91608fc52ab113da9d6f87bf6ff7216a64355e91 [diff]
diff --git a/src/dawn/native/BlitTextureToBuffer.cpp b/src/dawn/native/BlitTextureToBuffer.cpp
index cdc7dde..5e64a72 100644
--- a/src/dawn/native/BlitTextureToBuffer.cpp
+++ b/src/dawn/native/BlitTextureToBuffer.cpp

@@ -655,12 +655,30 @@
 }
 )";
 
-// Directly loading R32Float values into dst_buf
+// Directly loading float32 values into dst_buf
 // No bit manipulation and packing is needed.
 constexpr std::string_view kLoadR32Float = R"(
     dst_buf[dstOffset] = textureLoadGeneral(src_tex, coord0, params.mipLevel).r;
 }
 )";
+constexpr std::string_view kLoadRG32Float = R"(
+    let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
+    // dstOffset is based on 8 bytes so we need to multiply by 2 to get uint32 offset.
+    let uintOffset = dstOffset << 1;
+    dst_buf[uintOffset] = v.r;
+    dst_buf[uintOffset + 1u] = v.g;
+}
+)";
+constexpr std::string_view kLoadRGBA32Float = R"(
+    let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
+    // dstOffset is based on 16 bytes so we need to multiply by 4.
+    let uintOffset = dstOffset << 2;
+    dst_buf[uintOffset] = v.r;
+    dst_buf[uintOffset + 1u] = v.g;
+    dst_buf[uintOffset + 2u] = v.b;
+    dst_buf[uintOffset + 3u] = v.a;
+}
+)";
 
 ResultOrError<Ref<ComputePipelineBase>> GetOrCreateTextureToBufferPipeline(
     DeviceBase* device,
@@ -819,6 +837,22 @@
             shader += kLoadR32Float;
             textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
             break;
+        case wgpu::TextureFormat::RG32Float:
+            AppendFloatTextureHead();
+            shader += kDstBufferF32;
+            shader += kCommonHead;
+            shader += kCommonStart;
+            shader += kLoadRG32Float;
+            textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
+            break;
+        case wgpu::TextureFormat::RGBA32Float:
+            AppendFloatTextureHead();
+            shader += kDstBufferF32;
+            shader += kCommonHead;
+            shader += kCommonStart;
+            shader += kLoadRGBA32Float;
+            textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
+            break;
         case wgpu::TextureFormat::Stencil8:
         case wgpu::TextureFormat::Depth24PlusStencil8:
             // Depth24PlusStencil8 can only copy with stencil aspect and is gated by validation.
@@ -908,13 +942,13 @@
     const uint32_t bytesPerTexel = format.GetAspectInfo(src.aspect).block.byteSize;
     // Size of one unit for a thread to write to. For format < 4 bytes, we always write 4 bytes at a
     // time.
-    const uint32_t ouputUnitSize = std::max(bytesPerTexel, 4u);
+    const uint32_t outputUnitSize = std::max(bytesPerTexel, 4u);
     const uint32_t adjustedWorkGroupSizeY =
         (viewDimension == wgpu::TextureViewDimension::e1D) ? 1 : kWorkgroupSizeY;
     const std::array<ConstantEntry, 3> constants = {{
         {nullptr, "workgroupSizeX", kWorkgroupSizeX},
         {nullptr, "workgroupSizeY", static_cast<double>(adjustedWorkGroupSizeY)},
-        {nullptr, "gOutputUnitSize", static_cast<double>(ouputUnitSize)},
+        {nullptr, "gOutputUnitSize", static_cast<double>(outputUnitSize)},
     }};
     computePipelineDescriptor.compute.constantCount = constants.size();
     computePipelineDescriptor.compute.constants = constants.data();
@@ -944,6 +978,8 @@
         case wgpu::TextureFormat::RG16Float:
         case wgpu::TextureFormat::RGBA16Float:
         case wgpu::TextureFormat::R32Float:
+        case wgpu::TextureFormat::RG32Float:
+        case wgpu::TextureFormat::RGBA32Float:
         case wgpu::TextureFormat::Depth16Unorm:
         case wgpu::TextureFormat::Depth32Float:
         case wgpu::TextureFormat::Stencil8:
@@ -1040,6 +1076,7 @@
                 break;
             case 4:
             case 8:
+            case 16:
                 workgroupCountX = Align(copyExtent.width, kWorkgroupSizeX) / kWorkgroupSizeX;
                 break;
             default:

diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index 0b70008..d8dccb7 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp

@@ -129,6 +129,8 @@
             device->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy) ||
             device->IsToggleEnabled(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy) ||
             device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy) ||
+            device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy) ||
+            device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy) ||
             device->IsToggleEnabled(Toggle::UseBlitForT2B);
         if (useComputeForT2B) {
             if (!(usage & (kMappableBufferUsages | wgpu::BufferUsage::Uniform)) ||

diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index 0bda7a6..cc5cac3 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp

@@ -971,6 +971,20 @@
         device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy)) {
         return true;
     }
+    // float16
+    if ((format.format == wgpu::TextureFormat::R16Float ||
+         format.format == wgpu::TextureFormat::RG16Float ||
+         format.format == wgpu::TextureFormat::RGBA16Float) &&
+        device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy)) {
+        return true;
+    }
+    // float32
+    if ((format.format == wgpu::TextureFormat::R32Float ||
+         format.format == wgpu::TextureFormat::RG32Float ||
+         format.format == wgpu::TextureFormat::RGBA32Float) &&
+        device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy)) {
+        return true;
+    }
     // Depth
     if (aspect == Aspect::Depth &&
         ((format.format == wgpu::TextureFormat::Depth16Unorm &&

diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp
index 3c8cff1..bd96d05 100644
--- a/src/dawn/native/Texture.cpp
+++ b/src/dawn/native/Texture.cpp

@@ -544,6 +544,21 @@
         device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy)) {
         return true;
     }
+    // float16
+    if ((format.format == wgpu::TextureFormat::R16Float ||
+         format.format == wgpu::TextureFormat::RG16Float ||
+         format.format == wgpu::TextureFormat::RGBA16Float) &&
+        device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy)) {
+        return true;
+    }
+    // float32
+    if ((format.format == wgpu::TextureFormat::R32Float ||
+         format.format == wgpu::TextureFormat::RG32Float ||
+         format.format == wgpu::TextureFormat::RGBA32Float) &&
+        device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy)) {
+        return true;
+    }
+
     // Depth
     if (format.HasDepth() &&
         (device->IsToggleEnabled(Toggle::UseBlitForDepthTextureToTextureCopyToNonzeroSubresource) ||

diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index 2a8ca23..d6208f2 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp

@@ -430,6 +430,16 @@
       "Use a blit instead of a copy command to copy rgb9e5ufloat texture to a texture or a buffer."
       "Workaround for OpenGLES.",
       "https://crbug.com/dawn/2079", ToggleStage::Device}},
+    {Toggle::UseBlitForFloat16TextureCopy,
+     {"use_blit_for_float_16_texture_copy",
+      "Use a blit instead of a copy command to copy float16 texture to a texture or a buffer."
+      "Workaround for OpenGLES.",
+      "https://crbug.com/381214487", ToggleStage::Device}},
+    {Toggle::UseBlitForFloat32TextureCopy,
+     {"use_blit_for_float_32_texture_copy",
+      "Use a blit instead of a copy command to copy float32 texture to a texture or a buffer."
+      "Workaround for OpenGLES.",
+      "https://crbug.com/381214487", ToggleStage::Device}},
     {Toggle::UseBlitForT2B,
      {"use_blit_for_t2b",
       "Use a compute based blit instead of a copy command to copy texture with supported format to "

diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 5a80e6a..93ed488 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h

@@ -110,6 +110,8 @@
     UseBlitForSnormTextureToBufferCopy,
     UseBlitForBGRA8UnormTextureToBufferCopy,
     UseBlitForRGB9E5UfloatTextureCopy,
+    UseBlitForFloat16TextureCopy,
+    UseBlitForFloat32TextureCopy,
     UseBlitForT2B,
     UseT2B2TForSRGBTextureCopy,
     D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,

diff --git a/src/dawn/native/opengl/PhysicalDeviceGL.cpp b/src/dawn/native/opengl/PhysicalDeviceGL.cpp
index dd944fe..67e7f7d 100644
--- a/src/dawn/native/opengl/PhysicalDeviceGL.cpp
+++ b/src/dawn/native/opengl/PhysicalDeviceGL.cpp

@@ -396,6 +396,11 @@
     bool supportsStencilWriteTexture =
         gl.GetVersion().IsDesktop() || gl.IsGLExtensionSupported("GL_OES_texture_stencil8");
 
+    bool isFloat32Renderable = gl.GetVersion().IsDesktop() || gl.IsAtLeastGLES(3, 2) ||
+                               gl.IsGLExtensionSupported("GL_EXT_color_buffer_float");
+    bool isFloat16Renderable =
+        isFloat32Renderable || gl.IsGLExtensionSupported("GL_EXT_color_buffer_half_float");
+
     // TODO(crbug.com/dawn/343): Investigate emulation.
     deviceToggles->Default(Toggle::DisableIndexedDrawBuffers, !supportsIndexedDrawBuffers);
     deviceToggles->Default(Toggle::DisableSampleVariables, !supportsSampleVariables);
@@ -424,6 +429,12 @@
     // For OpenGL ES, use compute shader blit to emulate rgb9e5ufloat texture to buffer copies.
     deviceToggles->Default(Toggle::UseBlitForRGB9E5UfloatTextureCopy, gl.GetVersion().IsES());
 
+    // Use compute shader blit to emulate float16 texture to buffer copies if not color renderable.
+    deviceToggles->Default(Toggle::UseBlitForFloat16TextureCopy, !isFloat16Renderable);
+
+    // Use compute shader blit to emulate float32 texture to buffer copies if not color renderable.
+    deviceToggles->Default(Toggle::UseBlitForFloat32TextureCopy, !isFloat32Renderable);
+
     // Use a blit to emulate stencil-only buffer-to-texture copies.
     deviceToggles->Default(Toggle::UseBlitForBufferToStencilTextureCopy, true);
 

diff --git a/src/dawn/tests/end2end/CopyTests.cpp b/src/dawn/tests/end2end/CopyTests.cpp
index 968efae..1122f0a 100644
--- a/src/dawn/tests/end2end/CopyTests.cpp
+++ b/src/dawn/tests/end2end/CopyTests.cpp

@@ -288,13 +288,6 @@
             // GL_EXT_texture_format_BGRA8888 or GL_APPLE_texture_format_BGRA8888 is required for
             // compat mode.
             DAWN_TEST_UNSUPPORTED_IF(format == wgpu::TextureFormat::BGRA8Unorm);
-            // TODO(crbug.com/381214487): float16-renderable and float32-renderable features
-            DAWN_SUPPRESS_TEST_IF(format == wgpu::TextureFormat::R16Float ||
-                                  format == wgpu::TextureFormat::RG16Float ||
-                                  format == wgpu::TextureFormat::RGBA16Float ||
-                                  format == wgpu::TextureFormat::R32Float ||
-                                  format == wgpu::TextureFormat::RG32Float ||
-                                  format == wgpu::TextureFormat::RGBA32Float);
             // TODO(crbug.com/388318201): GL_R11F_G11F_B10F: Framebuffer incomplete.
             DAWN_SUPPRESS_TEST_IF(format == wgpu::TextureFormat::RG11B10Ufloat);
         }
commit	fcf22c35a51fdae89f8a936bc47fb19f4b74ee17	[log] [tgz]
author	Shrek Shao <shrekshao@google.com>	Fri Jan 17 13:13:09 2025 -0800
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Jan 17 13:13:09 2025 -0800
tree	3b25741625325bf7f967cb495abd50b3850e3164
parent	91608fc52ab113da9d6f87bf6ff7216a64355e91 [diff]