Compat: Blit T2B for float16/32 textures when not color renderable
GL uses framebuffer to implement copy for these texture formats.
This does not work if the formats are not color renderable.
Use the compute shader blit to emulate the copy.
Bug: 381214487
Change-Id: I613206b318db62b61d23553680d6231b48c901d7
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/221836
Reviewed-by: Stephen White <senorblanco@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Commit-Queue: Shrek Shao <shrekshao@google.com>
diff --git a/src/dawn/native/BlitTextureToBuffer.cpp b/src/dawn/native/BlitTextureToBuffer.cpp
index cdc7dde..5e64a72 100644
--- a/src/dawn/native/BlitTextureToBuffer.cpp
+++ b/src/dawn/native/BlitTextureToBuffer.cpp
@@ -655,12 +655,30 @@
}
)";
-// Directly loading R32Float values into dst_buf
+// Directly loading float32 values into dst_buf
// No bit manipulation and packing is needed.
constexpr std::string_view kLoadR32Float = R"(
dst_buf[dstOffset] = textureLoadGeneral(src_tex, coord0, params.mipLevel).r;
}
)";
+constexpr std::string_view kLoadRG32Float = R"(
+ let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
+ // dstOffset is based on 8 bytes so we need to multiply by 2 to get uint32 offset.
+ let uintOffset = dstOffset << 1;
+ dst_buf[uintOffset] = v.r;
+ dst_buf[uintOffset + 1u] = v.g;
+}
+)";
+constexpr std::string_view kLoadRGBA32Float = R"(
+ let v = textureLoadGeneral(src_tex, coord0, params.mipLevel);
+ // dstOffset is based on 16 bytes so we need to multiply by 4.
+ let uintOffset = dstOffset << 2;
+ dst_buf[uintOffset] = v.r;
+ dst_buf[uintOffset + 1u] = v.g;
+ dst_buf[uintOffset + 2u] = v.b;
+ dst_buf[uintOffset + 3u] = v.a;
+}
+)";
ResultOrError<Ref<ComputePipelineBase>> GetOrCreateTextureToBufferPipeline(
DeviceBase* device,
@@ -819,6 +837,22 @@
shader += kLoadR32Float;
textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
break;
+ case wgpu::TextureFormat::RG32Float:
+ AppendFloatTextureHead();
+ shader += kDstBufferF32;
+ shader += kCommonHead;
+ shader += kCommonStart;
+ shader += kLoadRG32Float;
+ textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
+ break;
+ case wgpu::TextureFormat::RGBA32Float:
+ AppendFloatTextureHead();
+ shader += kDstBufferF32;
+ shader += kCommonHead;
+ shader += kCommonStart;
+ shader += kLoadRGBA32Float;
+ textureSampleType = wgpu::TextureSampleType::UnfilterableFloat;
+ break;
case wgpu::TextureFormat::Stencil8:
case wgpu::TextureFormat::Depth24PlusStencil8:
// Depth24PlusStencil8 can only copy with stencil aspect and is gated by validation.
@@ -908,13 +942,13 @@
const uint32_t bytesPerTexel = format.GetAspectInfo(src.aspect).block.byteSize;
// Size of one unit for a thread to write to. For format < 4 bytes, we always write 4 bytes at a
// time.
- const uint32_t ouputUnitSize = std::max(bytesPerTexel, 4u);
+ const uint32_t outputUnitSize = std::max(bytesPerTexel, 4u);
const uint32_t adjustedWorkGroupSizeY =
(viewDimension == wgpu::TextureViewDimension::e1D) ? 1 : kWorkgroupSizeY;
const std::array<ConstantEntry, 3> constants = {{
{nullptr, "workgroupSizeX", kWorkgroupSizeX},
{nullptr, "workgroupSizeY", static_cast<double>(adjustedWorkGroupSizeY)},
- {nullptr, "gOutputUnitSize", static_cast<double>(ouputUnitSize)},
+ {nullptr, "gOutputUnitSize", static_cast<double>(outputUnitSize)},
}};
computePipelineDescriptor.compute.constantCount = constants.size();
computePipelineDescriptor.compute.constants = constants.data();
@@ -944,6 +978,8 @@
case wgpu::TextureFormat::RG16Float:
case wgpu::TextureFormat::RGBA16Float:
case wgpu::TextureFormat::R32Float:
+ case wgpu::TextureFormat::RG32Float:
+ case wgpu::TextureFormat::RGBA32Float:
case wgpu::TextureFormat::Depth16Unorm:
case wgpu::TextureFormat::Depth32Float:
case wgpu::TextureFormat::Stencil8:
@@ -1040,6 +1076,7 @@
break;
case 4:
case 8:
+ case 16:
workgroupCountX = Align(copyExtent.width, kWorkgroupSizeX) / kWorkgroupSizeX;
break;
default:
diff --git a/src/dawn/native/Buffer.cpp b/src/dawn/native/Buffer.cpp
index 0b70008..d8dccb7 100644
--- a/src/dawn/native/Buffer.cpp
+++ b/src/dawn/native/Buffer.cpp
@@ -129,6 +129,8 @@
device->IsToggleEnabled(Toggle::UseBlitForSnormTextureToBufferCopy) ||
device->IsToggleEnabled(Toggle::UseBlitForBGRA8UnormTextureToBufferCopy) ||
device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy) ||
+ device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy) ||
+ device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy) ||
device->IsToggleEnabled(Toggle::UseBlitForT2B);
if (useComputeForT2B) {
if (!(usage & (kMappableBufferUsages | wgpu::BufferUsage::Uniform)) ||
diff --git a/src/dawn/native/CommandEncoder.cpp b/src/dawn/native/CommandEncoder.cpp
index 0bda7a6..cc5cac3 100644
--- a/src/dawn/native/CommandEncoder.cpp
+++ b/src/dawn/native/CommandEncoder.cpp
@@ -971,6 +971,20 @@
device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy)) {
return true;
}
+ // float16
+ if ((format.format == wgpu::TextureFormat::R16Float ||
+ format.format == wgpu::TextureFormat::RG16Float ||
+ format.format == wgpu::TextureFormat::RGBA16Float) &&
+ device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy)) {
+ return true;
+ }
+ // float32
+ if ((format.format == wgpu::TextureFormat::R32Float ||
+ format.format == wgpu::TextureFormat::RG32Float ||
+ format.format == wgpu::TextureFormat::RGBA32Float) &&
+ device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy)) {
+ return true;
+ }
// Depth
if (aspect == Aspect::Depth &&
((format.format == wgpu::TextureFormat::Depth16Unorm &&
diff --git a/src/dawn/native/Texture.cpp b/src/dawn/native/Texture.cpp
index 3c8cff1..bd96d05 100644
--- a/src/dawn/native/Texture.cpp
+++ b/src/dawn/native/Texture.cpp
@@ -544,6 +544,21 @@
device->IsToggleEnabled(Toggle::UseBlitForRGB9E5UfloatTextureCopy)) {
return true;
}
+ // float16
+ if ((format.format == wgpu::TextureFormat::R16Float ||
+ format.format == wgpu::TextureFormat::RG16Float ||
+ format.format == wgpu::TextureFormat::RGBA16Float) &&
+ device->IsToggleEnabled(Toggle::UseBlitForFloat16TextureCopy)) {
+ return true;
+ }
+ // float32
+ if ((format.format == wgpu::TextureFormat::R32Float ||
+ format.format == wgpu::TextureFormat::RG32Float ||
+ format.format == wgpu::TextureFormat::RGBA32Float) &&
+ device->IsToggleEnabled(Toggle::UseBlitForFloat32TextureCopy)) {
+ return true;
+ }
+
// Depth
if (format.HasDepth() &&
(device->IsToggleEnabled(Toggle::UseBlitForDepthTextureToTextureCopyToNonzeroSubresource) ||
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index 2a8ca23..d6208f2 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -430,6 +430,16 @@
"Use a blit instead of a copy command to copy rgb9e5ufloat texture to a texture or a buffer."
"Workaround for OpenGLES.",
"https://crbug.com/dawn/2079", ToggleStage::Device}},
+ {Toggle::UseBlitForFloat16TextureCopy,
+ {"use_blit_for_float_16_texture_copy",
+ "Use a blit instead of a copy command to copy float16 texture to a texture or a buffer."
+ "Workaround for OpenGLES.",
+ "https://crbug.com/381214487", ToggleStage::Device}},
+ {Toggle::UseBlitForFloat32TextureCopy,
+ {"use_blit_for_float_32_texture_copy",
+ "Use a blit instead of a copy command to copy float32 texture to a texture or a buffer."
+ "Workaround for OpenGLES.",
+ "https://crbug.com/381214487", ToggleStage::Device}},
{Toggle::UseBlitForT2B,
{"use_blit_for_t2b",
"Use a compute based blit instead of a copy command to copy texture with supported format to "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 5a80e6a..93ed488 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -110,6 +110,8 @@
UseBlitForSnormTextureToBufferCopy,
UseBlitForBGRA8UnormTextureToBufferCopy,
UseBlitForRGB9E5UfloatTextureCopy,
+ UseBlitForFloat16TextureCopy,
+ UseBlitForFloat32TextureCopy,
UseBlitForT2B,
UseT2B2TForSRGBTextureCopy,
D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha,
diff --git a/src/dawn/native/opengl/PhysicalDeviceGL.cpp b/src/dawn/native/opengl/PhysicalDeviceGL.cpp
index dd944fe..67e7f7d 100644
--- a/src/dawn/native/opengl/PhysicalDeviceGL.cpp
+++ b/src/dawn/native/opengl/PhysicalDeviceGL.cpp
@@ -396,6 +396,11 @@
bool supportsStencilWriteTexture =
gl.GetVersion().IsDesktop() || gl.IsGLExtensionSupported("GL_OES_texture_stencil8");
+ bool isFloat32Renderable = gl.GetVersion().IsDesktop() || gl.IsAtLeastGLES(3, 2) ||
+ gl.IsGLExtensionSupported("GL_EXT_color_buffer_float");
+ bool isFloat16Renderable =
+ isFloat32Renderable || gl.IsGLExtensionSupported("GL_EXT_color_buffer_half_float");
+
// TODO(crbug.com/dawn/343): Investigate emulation.
deviceToggles->Default(Toggle::DisableIndexedDrawBuffers, !supportsIndexedDrawBuffers);
deviceToggles->Default(Toggle::DisableSampleVariables, !supportsSampleVariables);
@@ -424,6 +429,12 @@
// For OpenGL ES, use compute shader blit to emulate rgb9e5ufloat texture to buffer copies.
deviceToggles->Default(Toggle::UseBlitForRGB9E5UfloatTextureCopy, gl.GetVersion().IsES());
+ // Use compute shader blit to emulate float16 texture to buffer copies if not color renderable.
+ deviceToggles->Default(Toggle::UseBlitForFloat16TextureCopy, !isFloat16Renderable);
+
+ // Use compute shader blit to emulate float32 texture to buffer copies if not color renderable.
+ deviceToggles->Default(Toggle::UseBlitForFloat32TextureCopy, !isFloat32Renderable);
+
// Use a blit to emulate stencil-only buffer-to-texture copies.
deviceToggles->Default(Toggle::UseBlitForBufferToStencilTextureCopy, true);
diff --git a/src/dawn/tests/end2end/CopyTests.cpp b/src/dawn/tests/end2end/CopyTests.cpp
index 968efae..1122f0a 100644
--- a/src/dawn/tests/end2end/CopyTests.cpp
+++ b/src/dawn/tests/end2end/CopyTests.cpp
@@ -288,13 +288,6 @@
// GL_EXT_texture_format_BGRA8888 or GL_APPLE_texture_format_BGRA8888 is required for
// compat mode.
DAWN_TEST_UNSUPPORTED_IF(format == wgpu::TextureFormat::BGRA8Unorm);
- // TODO(crbug.com/381214487): float16-renderable and float32-renderable features
- DAWN_SUPPRESS_TEST_IF(format == wgpu::TextureFormat::R16Float ||
- format == wgpu::TextureFormat::RG16Float ||
- format == wgpu::TextureFormat::RGBA16Float ||
- format == wgpu::TextureFormat::R32Float ||
- format == wgpu::TextureFormat::RG32Float ||
- format == wgpu::TextureFormat::RGBA32Float);
// TODO(crbug.com/388318201): GL_R11F_G11F_B10F: Framebuffer incomplete.
DAWN_SUPPRESS_TEST_IF(format == wgpu::TextureFormat::RG11B10Ufloat);
}