| // Copyright 2025 The Dawn & Tint Authors |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // 1. Redistributions of source code must retain the above copyright notice, this |
| // list of conditions and the following disclaimer. |
| // |
| // 2. Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // 3. Neither the name of the copyright holder nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "dawn/native/BlitBufferToTexture.h" |
| |
| #include <sstream> |
| #include <string> |
| #include <string_view> |
| #include <utility> |
| |
| #include "dawn/common/Assert.h" |
| #include "dawn/native/BindGroup.h" |
| #include "dawn/native/CommandBuffer.h" |
| #include "dawn/native/CommandEncoder.h" |
| #include "dawn/native/Device.h" |
| #include "dawn/native/InternalPipelineStore.h" |
| #include "dawn/native/Queue.h" |
| #include "dawn/native/RenderPassEncoder.h" |
| #include "dawn/native/RenderPipeline.h" |
| #include "dawn/native/utils/WGPUHelpers.h" |
| |
| namespace dawn::native { |
| |
| namespace { |
| |
| constexpr std::string_view kShaderCommonSrc = R"( |
| @vertex fn vert_fullscreen_quad( |
| @builtin(vertex_index) vertex_index : u32 |
| ) -> @builtin(position) vec4f { |
| const pos = array( |
| vec2f(-1.0, -1.0), |
| vec2f( 3.0, -1.0), |
| vec2f(-1.0, 3.0)); |
| return vec4f(pos[vertex_index], 0.0, 1.0); |
| } |
| |
| struct Params { |
| srcOffset : u32, |
| bytesPerRow : u32, |
| dstOrigin : vec2u |
| }; |
| |
| @group(0) @binding(0) var<storage, read> src_buf : array<u32>; |
| @group(0) @binding(1) var<uniform> params : Params; |
| |
| fn loadU8AsU32(byteOffset: u32) -> u32 { |
| let uintOffset = byteOffset >> 2; |
| let uintModOffset = byteOffset & 3; |
| let bitShift = uintModOffset * 8; |
| return (src_buf[uintOffset] >> bitShift) & 0xff; |
| } |
| |
| fn loadU16AsU32(byteOffset: u32) -> u32 { |
| let firstHalf = loadU8AsU32(byteOffset); |
| let secondHalf = loadU8AsU32(byteOffset + 1); |
| return firstHalf | (secondHalf << 8); |
| } |
| |
| // byteOffset is expected to be aligned to 4. |
| fn loadU32(byteOffset: u32) -> u32 { |
| let uintOffset = byteOffset >> 2; |
| return src_buf[uintOffset]; |
| } |
| |
| // byteOffset is expected to be aligned to 4. |
| fn loadTwoU32s(byteOffset: u32) -> vec2u { |
| return vec2u(loadU32(byteOffset), loadU32(byteOffset + 4)); |
| } |
| |
| @fragment fn blit_buffer_to_texture( |
| @builtin(position) screen_position : vec4f |
| ) -> @location(0) vec4f { |
| let iposition = vec2u(screen_position.xy) - params.dstOrigin; |
| |
| let srcOffset = params.srcOffset + iposition.x * kPixelSize + iposition.y * params.bytesPerRow; |
| |
| return unpackData(srcOffset); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackR8Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return unpack4x8unorm(loadU8AsU32(byteOffset)); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRG8Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return unpack4x8unorm(loadU16AsU32(byteOffset)); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRGBA8Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return unpack4x8unorm(loadU32(byteOffset)); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackBGRA8Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return unpack4x8unorm(loadU32(byteOffset)).bgra; |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRGB10A2Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| let data = loadU32(byteOffset); |
| let r = f32((data & 0x3ff)) / 1023.0; |
| let g = f32(((data >> 10) & 0x3ff)) / 1023.0; |
| let b = f32(((data >> 20) & 0x3ff)) / 1023.0; |
| let a = f32(((data >> 30) & 0x3)) / 3.0; |
| return vec4f(r, g, b, a); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackR16Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return vec4f(unpack2x16float(loadU16AsU32(byteOffset)), 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackR16Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return vec4f(f32(loadU16AsU32(byteOffset)) / f32(0xffff), 0.0, 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRG16Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return vec4f(unpack2x16float(loadU32(byteOffset)), 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRG16Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| let word = loadU32(byteOffset); |
| let x = f32(word & 0xffff); |
| let y = f32(word >> 16); |
| return vec4f(vec2f(x, y) / f32(0xffff), 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRGBA16Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| let data = loadTwoU32s(byteOffset); |
| return vec4f(unpack2x16float(data.x), unpack2x16float(data.y)); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRGBA16Unorm = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| let words = loadTwoU32s(byteOffset); |
| let x = f32(words[0] & 0xffff); |
| let y = f32(words[0] >> 16); |
| let z = f32(words[1] & 0xffff); |
| let w = f32(words[1] >> 16); |
| return vec4f(x, y, z, w) / f32(0xffff); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackR32Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return vec4f(bitcast<f32>(loadU32(byteOffset)), 0.0, 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRG32Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| let color = bitcast<vec2f>(loadTwoU32s(byteOffset)); |
| return vec4f(color, 0.0, 1.0); |
| } |
| )"; |
| |
| constexpr std::string_view kUnpackRGBA32Float = R"( |
| fn unpackData(byteOffset: u32) -> vec4f { |
| return vec4f(bitcast<vec2f>(loadTwoU32s(byteOffset)), |
| bitcast<vec2f>(loadTwoU32s(byteOffset + 8))); |
| } |
| )"; |
| |
| std::string GenerateShaderSource(wgpu::TextureFormat format) { |
| int pixelSize = 0; |
| std::ostringstream ss; |
| switch (format) { |
| case wgpu::TextureFormat::R8Unorm: |
| pixelSize = 1; |
| ss << kUnpackR8Unorm; |
| break; |
| case wgpu::TextureFormat::RG8Unorm: |
| pixelSize = 2; |
| ss << kUnpackRG8Unorm; |
| break; |
| case wgpu::TextureFormat::RGBA8Unorm: |
| pixelSize = 4; |
| ss << kUnpackRGBA8Unorm; |
| break; |
| case wgpu::TextureFormat::BGRA8Unorm: |
| pixelSize = 4; |
| ss << kUnpackBGRA8Unorm; |
| break; |
| case wgpu::TextureFormat::RGB10A2Unorm: |
| pixelSize = 4; |
| ss << kUnpackRGB10A2Unorm; |
| break; |
| case wgpu::TextureFormat::R16Float: |
| pixelSize = 2; |
| ss << kUnpackR16Float; |
| break; |
| case wgpu::TextureFormat::R16Unorm: |
| pixelSize = 2; |
| ss << kUnpackR16Unorm; |
| break; |
| case wgpu::TextureFormat::RG16Float: |
| pixelSize = 4; |
| ss << kUnpackRG16Float; |
| break; |
| case wgpu::TextureFormat::RG16Unorm: |
| pixelSize = 4; |
| ss << kUnpackRG16Unorm; |
| break; |
| case wgpu::TextureFormat::RGBA16Float: |
| pixelSize = 8; |
| ss << kUnpackRGBA16Float; |
| break; |
| case wgpu::TextureFormat::RGBA16Unorm: |
| pixelSize = 8; |
| ss << kUnpackRGBA16Unorm; |
| break; |
| case wgpu::TextureFormat::R32Float: |
| pixelSize = 4; |
| ss << kUnpackR32Float; |
| break; |
| case wgpu::TextureFormat::RG32Float: |
| pixelSize = 8; |
| ss << kUnpackRG32Float; |
| break; |
| case wgpu::TextureFormat::RGBA32Float: |
| pixelSize = 16; |
| ss << kUnpackRGBA32Float; |
| break; |
| default: |
| DAWN_UNREACHABLE(); |
| } |
| |
| ss << "const kPixelSize = " << pixelSize << ";\n"; |
| ss << kShaderCommonSrc; |
| |
| return ss.str(); |
| } |
| |
| ResultOrError<Ref<RenderPipelineBase>> GetOrCreatePipeline(DeviceBase* device, |
| wgpu::TextureFormat format) { |
| InternalPipelineStore* store = device->GetInternalPipelineStore(); |
| { |
| auto it = store->blitBufferToTexturePipelines.find(format); |
| if (it != store->blitBufferToTexturePipelines.end()) { |
| return it->second; |
| } |
| } |
| |
| // vertex shader's source. |
| ShaderSourceWGSL wgslDesc = {}; |
| ShaderModuleDescriptor shaderModuleDesc = {}; |
| shaderModuleDesc.nextInChain = &wgslDesc; |
| |
| // shader's source will depend on format key. |
| std::string shaderCode = GenerateShaderSource(format); |
| wgslDesc.code = shaderCode.c_str(); |
| Ref<ShaderModuleBase> shaderModule; |
| DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&shaderModuleDesc)); |
| |
| FragmentState fragmentState = {}; |
| fragmentState.module = shaderModule.Get(); |
| fragmentState.entryPoint = "blit_buffer_to_texture"; |
| |
| // Color target states. |
| ColorTargetState colorTarget = {}; |
| colorTarget.format = format; |
| colorTarget.writeMask = wgpu::ColorWriteMask::All; |
| |
| fragmentState.targetCount = 1; |
| fragmentState.targets = &colorTarget; |
| |
| RenderPipelineDescriptor renderPipelineDesc = {}; |
| renderPipelineDesc.label = "blit_buffer_to_texture"; |
| renderPipelineDesc.vertex.module = shaderModule.Get(); |
| renderPipelineDesc.vertex.entryPoint = "vert_fullscreen_quad"; |
| renderPipelineDesc.fragment = &fragmentState; |
| |
| // Bind group layout. |
| Ref<BindGroupLayoutBase> bindGroupLayout; |
| DAWN_TRY_ASSIGN(bindGroupLayout, |
| utils::MakeBindGroupLayout( |
| device, |
| { |
| {0, wgpu::ShaderStage::Fragment, kInternalReadOnlyStorageBufferBinding}, |
| {1, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Uniform}, |
| }, |
| /* allowInternalBinding */ true)); |
| |
| Ref<PipelineLayoutBase> pipelineLayout; |
| DAWN_TRY_ASSIGN(pipelineLayout, utils::MakeBasicPipelineLayout(device, bindGroupLayout)); |
| renderPipelineDesc.layout = pipelineLayout.Get(); |
| |
| Ref<RenderPipelineBase> pipeline; |
| DAWN_TRY_ASSIGN(pipeline, device->CreateRenderPipeline(&renderPipelineDesc)); |
| |
| store->blitBufferToTexturePipelines.emplace(format, pipeline); |
| return pipeline; |
| } |
| |
| } // anonymous namespace |
| |
| bool IsFormatSupportedByBufferToTextureBlit(wgpu::TextureFormat format) { |
| // TODO(348653642): Eventually we should support all non-compressed formats. For now, just list |
| // a subset of them that we support. |
| switch (format) { |
| case wgpu::TextureFormat::R8Unorm: |
| case wgpu::TextureFormat::RG8Unorm: |
| case wgpu::TextureFormat::RGBA8Unorm: |
| case wgpu::TextureFormat::BGRA8Unorm: |
| case wgpu::TextureFormat::RGB10A2Unorm: |
| case wgpu::TextureFormat::R16Float: |
| case wgpu::TextureFormat::R16Unorm: |
| case wgpu::TextureFormat::RG16Float: |
| case wgpu::TextureFormat::RG16Unorm: |
| case wgpu::TextureFormat::RGBA16Float: |
| case wgpu::TextureFormat::RGBA16Unorm: |
| case wgpu::TextureFormat::R32Float: |
| case wgpu::TextureFormat::RG32Float: |
| case wgpu::TextureFormat::RGBA32Float: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsBufferToTextureBlitSupported(BufferBase* buffer, |
| const TextureCopy& dst, |
| const Extent3D& copyExtent) { |
| if (!(buffer->GetInternalUsage() & |
| (kReadOnlyStorageBuffer | kInternalStorageBuffer | wgpu::BufferUsage::Storage))) { |
| return false; |
| } |
| |
| if (!IsFormatSupportedByBufferToTextureBlit(dst.texture->GetFormat().format)) { |
| return false; |
| } |
| |
| if (dst.texture->GetDimension() == wgpu::TextureDimension::e1D) { |
| // 1D texture cannot be rendered to so skip it. |
| return false; |
| } |
| |
| if (dst.aspect != Aspect::Color) { |
| // Don't support multiplanar copies yet. |
| return false; |
| } |
| |
| // Must have non-zero copy size. |
| return copyExtent.width * copyExtent.height * copyExtent.depthOrArrayLayers > 0; |
| } |
| |
| MaybeError BlitBufferToTexture(DeviceBase* device, |
| CommandEncoder* commandEncoder, |
| BufferBase* buffer, |
| const TexelCopyBufferLayout& src, |
| const TextureCopy& dst, |
| const Extent3D& copyExtent) { |
| DAWN_ASSERT(device->IsLockedByCurrentThreadIfNeeded()); |
| |
| // This function assumes bytesPerRow is multiples of 4. Normally it's required that |
| // bytesPerRow is aligned to 256. However some backends might enable |
| // DawnTexelCopyBufferRowAlignment feature to relax the alignment. Currently only D3D11 backend |
| // enables this feature, and the relaxed alignment there is 4. |
| DAWN_ASSERT((src.bytesPerRow % 4) == 0); |
| |
| DAWN_ASSERT(buffer->GetInternalUsage() & |
| (kReadOnlyStorageBuffer | kInternalStorageBuffer | wgpu::BufferUsage::Storage)); |
| |
| DAWN_ASSERT(copyExtent.width > 0 && copyExtent.height > 0 && copyExtent.depthOrArrayLayers > 0); |
| |
| // Allow internal usages since we need to use the destination |
| // as a render attachment. |
| auto scope = commandEncoder->MakeInternalUsageScope(); |
| |
| Ref<RenderPipelineBase> pipeline; |
| DAWN_TRY_ASSIGN(pipeline, GetOrCreatePipeline(device, dst.texture->GetFormat().format)); |
| |
| Ref<BindGroupLayoutBase> bgl; |
| DAWN_TRY_ASSIGN(bgl, pipeline->GetBindGroupLayout(0)); |
| |
| const auto ssboAlignment = device->GetLimits().v1.minStorageBufferOffsetAlignment; |
| DAWN_ASSERT(IsPowerOfTwo(ssboAlignment)); |
| |
| wgpu::TextureViewDimension viewDimension; |
| uint32_t baseDepth = 0; |
| uint32_t baseArray = 0; |
| uint32_t depthStep = 0; |
| uint32_t arrayStep = 0; |
| switch (dst.texture->GetDimension()) { |
| case wgpu::TextureDimension::e1D: |
| DAWN_UNREACHABLE(); |
| break; |
| case wgpu::TextureDimension::e3D: |
| viewDimension = wgpu::TextureViewDimension::e3D; |
| baseDepth = dst.origin.z; |
| depthStep = 1; |
| break; |
| default: |
| viewDimension = wgpu::TextureViewDimension::e2D; |
| baseArray = dst.origin.z; |
| arrayStep = 1; |
| break; |
| } |
| |
| for (uint32_t z = 0; z < copyExtent.depthOrArrayLayers; ++z) { |
| Ref<TextureViewBase> dstView; |
| { |
| TextureViewDescriptor viewDesc = {}; |
| viewDesc.dimension = viewDimension; |
| viewDesc.baseArrayLayer = baseArray + arrayStep * z; |
| viewDesc.arrayLayerCount = 1; |
| viewDesc.baseMipLevel = dst.mipLevel; |
| viewDesc.mipLevelCount = 1; |
| DAWN_TRY_ASSIGN(dstView, dst.texture->CreateView(&viewDesc)); |
| } |
| |
| const uint64_t srcOffset = src.offset + z * src.rowsPerImage * src.bytesPerRow; |
| const uint64_t srcBufferBindingOffset = AlignDown(srcOffset, ssboAlignment); |
| const uint32_t shaderReadOffset = static_cast<uint32_t>(srcOffset & (ssboAlignment - 1)); |
| Ref<BufferBase> paramsBuffer; |
| { |
| DAWN_TRY_ASSIGN(paramsBuffer, |
| device->GetOrCreateTemporaryUniformBuffer(sizeof(uint32_t) * 4)); |
| |
| uint32_t params[4]; |
| params[0] = shaderReadOffset; |
| params[1] = src.bytesPerRow; |
| params[2] = dst.origin.x; |
| params[3] = dst.origin.y; |
| commandEncoder->APIWriteBuffer(paramsBuffer.Get(), 0, |
| reinterpret_cast<const uint8_t*>(¶ms[0]), |
| sizeof(params)); |
| } |
| |
| Ref<BindGroupBase> bindGroup; |
| DAWN_TRY_ASSIGN(bindGroup, utils::MakeBindGroup(device, bgl, |
| { |
| {0, buffer, srcBufferBindingOffset}, |
| {1, paramsBuffer}, |
| }, |
| UsageValidationMode::Internal)); |
| |
| RenderPassColorAttachment colorAttachment; |
| colorAttachment.view = dstView.Get(); |
| if (depthStep) { |
| colorAttachment.depthSlice = baseDepth + depthStep * z; |
| } |
| colorAttachment.loadOp = wgpu::LoadOp::Load; |
| colorAttachment.storeOp = wgpu::StoreOp::Store; |
| |
| RenderPassDescriptor rpDesc = {}; |
| rpDesc.colorAttachmentCount = 1; |
| rpDesc.colorAttachments = &colorAttachment; |
| |
| Ref<RenderPassEncoder> pass = commandEncoder->BeginRenderPass(&rpDesc); |
| // Bind the resources. |
| pass->APISetBindGroup(0, bindGroup.Get()); |
| pass->APISetViewport(dst.origin.x, dst.origin.y, copyExtent.width, copyExtent.height, 0.f, |
| 1.f); |
| |
| // Draw to perform the blit. |
| pass->APISetPipeline(pipeline.Get()); |
| pass->APIDraw(3, 1, 0, 0); |
| |
| pass->End(); |
| } |
| return {}; |
| } |
| |
| } // namespace dawn::native |