blob: f5a926c425168363832b96befd28b6b1d0796459 [file] [log] [blame]
// Copyright 2025 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dawn/native/BlitBufferToTexture.h"
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include "dawn/common/Assert.h"
#include "dawn/native/BindGroup.h"
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/CommandEncoder.h"
#include "dawn/native/Device.h"
#include "dawn/native/InternalPipelineStore.h"
#include "dawn/native/Queue.h"
#include "dawn/native/RenderPassEncoder.h"
#include "dawn/native/RenderPipeline.h"
#include "dawn/native/utils/WGPUHelpers.h"
namespace dawn::native {
namespace {
constexpr std::string_view kShaderCommonSrc = R"(
@vertex fn vert_fullscreen_quad(
@builtin(vertex_index) vertex_index : u32
) -> @builtin(position) vec4f {
const pos = array(
vec2f(-1.0, -1.0),
vec2f( 3.0, -1.0),
vec2f(-1.0, 3.0));
return vec4f(pos[vertex_index], 0.0, 1.0);
}
struct Params {
srcOffset : u32,
bytesPerRow : u32,
dstOrigin : vec2u
};
@group(0) @binding(0) var<storage, read> src_buf : array<u32>;
@group(0) @binding(1) var<uniform> params : Params;
fn loadU8AsU32(byteOffset: u32) -> u32 {
let uintOffset = byteOffset >> 2;
let uintModOffset = byteOffset & 3;
let bitShift = uintModOffset * 8;
return (src_buf[uintOffset] >> bitShift) & 0xff;
}
fn loadU16AsU32(byteOffset: u32) -> u32 {
let firstHalf = loadU8AsU32(byteOffset);
let secondHalf = loadU8AsU32(byteOffset + 1);
return firstHalf | (secondHalf << 8);
}
// byteOffset is expected to be aligned to 4.
fn loadU32(byteOffset: u32) -> u32 {
let uintOffset = byteOffset >> 2;
return src_buf[uintOffset];
}
// byteOffset is expected to be aligned to 4.
fn loadTwoU32s(byteOffset: u32) -> vec2u {
return vec2u(loadU32(byteOffset), loadU32(byteOffset + 4));
}
@fragment fn blit_buffer_to_texture(
@builtin(position) screen_position : vec4f
) -> @location(0) vec4f {
let iposition = vec2u(screen_position.xy) - params.dstOrigin;
let srcOffset = params.srcOffset + iposition.x * kPixelSize + iposition.y * params.bytesPerRow;
return unpackData(srcOffset);
}
)";
constexpr std::string_view kUnpackR8Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return unpack4x8unorm(loadU8AsU32(byteOffset));
}
)";
constexpr std::string_view kUnpackRG8Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return unpack4x8unorm(loadU16AsU32(byteOffset));
}
)";
constexpr std::string_view kUnpackRGBA8Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return unpack4x8unorm(loadU32(byteOffset));
}
)";
constexpr std::string_view kUnpackBGRA8Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return unpack4x8unorm(loadU32(byteOffset)).bgra;
}
)";
constexpr std::string_view kUnpackRGB10A2Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
let data = loadU32(byteOffset);
let r = f32((data & 0x3ff)) / 1023.0;
let g = f32(((data >> 10) & 0x3ff)) / 1023.0;
let b = f32(((data >> 20) & 0x3ff)) / 1023.0;
let a = f32(((data >> 30) & 0x3)) / 3.0;
return vec4f(r, g, b, a);
}
)";
constexpr std::string_view kUnpackR16Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return vec4f(unpack2x16float(loadU16AsU32(byteOffset)), 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackR16Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return vec4f(f32(loadU16AsU32(byteOffset)) / f32(0xffff), 0.0, 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackRG16Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return vec4f(unpack2x16float(loadU32(byteOffset)), 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackRG16Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
let word = loadU32(byteOffset);
let x = f32(word & 0xffff);
let y = f32(word >> 16);
return vec4f(vec2f(x, y) / f32(0xffff), 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackRGBA16Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
let data = loadTwoU32s(byteOffset);
return vec4f(unpack2x16float(data.x), unpack2x16float(data.y));
}
)";
constexpr std::string_view kUnpackRGBA16Unorm = R"(
fn unpackData(byteOffset: u32) -> vec4f {
let words = loadTwoU32s(byteOffset);
let x = f32(words[0] & 0xffff);
let y = f32(words[0] >> 16);
let z = f32(words[1] & 0xffff);
let w = f32(words[1] >> 16);
return vec4f(x, y, z, w) / f32(0xffff);
}
)";
constexpr std::string_view kUnpackR32Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return vec4f(bitcast<f32>(loadU32(byteOffset)), 0.0, 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackRG32Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
let color = bitcast<vec2f>(loadTwoU32s(byteOffset));
return vec4f(color, 0.0, 1.0);
}
)";
constexpr std::string_view kUnpackRGBA32Float = R"(
fn unpackData(byteOffset: u32) -> vec4f {
return vec4f(bitcast<vec2f>(loadTwoU32s(byteOffset)),
bitcast<vec2f>(loadTwoU32s(byteOffset + 8)));
}
)";
std::string GenerateShaderSource(wgpu::TextureFormat format) {
int pixelSize = 0;
std::ostringstream ss;
switch (format) {
case wgpu::TextureFormat::R8Unorm:
pixelSize = 1;
ss << kUnpackR8Unorm;
break;
case wgpu::TextureFormat::RG8Unorm:
pixelSize = 2;
ss << kUnpackRG8Unorm;
break;
case wgpu::TextureFormat::RGBA8Unorm:
pixelSize = 4;
ss << kUnpackRGBA8Unorm;
break;
case wgpu::TextureFormat::BGRA8Unorm:
pixelSize = 4;
ss << kUnpackBGRA8Unorm;
break;
case wgpu::TextureFormat::RGB10A2Unorm:
pixelSize = 4;
ss << kUnpackRGB10A2Unorm;
break;
case wgpu::TextureFormat::R16Float:
pixelSize = 2;
ss << kUnpackR16Float;
break;
case wgpu::TextureFormat::R16Unorm:
pixelSize = 2;
ss << kUnpackR16Unorm;
break;
case wgpu::TextureFormat::RG16Float:
pixelSize = 4;
ss << kUnpackRG16Float;
break;
case wgpu::TextureFormat::RG16Unorm:
pixelSize = 4;
ss << kUnpackRG16Unorm;
break;
case wgpu::TextureFormat::RGBA16Float:
pixelSize = 8;
ss << kUnpackRGBA16Float;
break;
case wgpu::TextureFormat::RGBA16Unorm:
pixelSize = 8;
ss << kUnpackRGBA16Unorm;
break;
case wgpu::TextureFormat::R32Float:
pixelSize = 4;
ss << kUnpackR32Float;
break;
case wgpu::TextureFormat::RG32Float:
pixelSize = 8;
ss << kUnpackRG32Float;
break;
case wgpu::TextureFormat::RGBA32Float:
pixelSize = 16;
ss << kUnpackRGBA32Float;
break;
default:
DAWN_UNREACHABLE();
}
ss << "const kPixelSize = " << pixelSize << ";\n";
ss << kShaderCommonSrc;
return ss.str();
}
ResultOrError<Ref<RenderPipelineBase>> GetOrCreatePipeline(DeviceBase* device,
wgpu::TextureFormat format) {
InternalPipelineStore* store = device->GetInternalPipelineStore();
{
auto it = store->blitBufferToTexturePipelines.find(format);
if (it != store->blitBufferToTexturePipelines.end()) {
return it->second;
}
}
// vertex shader's source.
ShaderSourceWGSL wgslDesc = {};
ShaderModuleDescriptor shaderModuleDesc = {};
shaderModuleDesc.nextInChain = &wgslDesc;
// shader's source will depend on format key.
std::string shaderCode = GenerateShaderSource(format);
wgslDesc.code = shaderCode.c_str();
Ref<ShaderModuleBase> shaderModule;
DAWN_TRY_ASSIGN(shaderModule, device->CreateShaderModule(&shaderModuleDesc));
FragmentState fragmentState = {};
fragmentState.module = shaderModule.Get();
fragmentState.entryPoint = "blit_buffer_to_texture";
// Color target states.
ColorTargetState colorTarget = {};
colorTarget.format = format;
colorTarget.writeMask = wgpu::ColorWriteMask::All;
fragmentState.targetCount = 1;
fragmentState.targets = &colorTarget;
RenderPipelineDescriptor renderPipelineDesc = {};
renderPipelineDesc.label = "blit_buffer_to_texture";
renderPipelineDesc.vertex.module = shaderModule.Get();
renderPipelineDesc.vertex.entryPoint = "vert_fullscreen_quad";
renderPipelineDesc.fragment = &fragmentState;
// Bind group layout.
Ref<BindGroupLayoutBase> bindGroupLayout;
DAWN_TRY_ASSIGN(bindGroupLayout,
utils::MakeBindGroupLayout(
device,
{
{0, wgpu::ShaderStage::Fragment, kInternalReadOnlyStorageBufferBinding},
{1, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Uniform},
},
/* allowInternalBinding */ true));
Ref<PipelineLayoutBase> pipelineLayout;
DAWN_TRY_ASSIGN(pipelineLayout, utils::MakeBasicPipelineLayout(device, bindGroupLayout));
renderPipelineDesc.layout = pipelineLayout.Get();
Ref<RenderPipelineBase> pipeline;
DAWN_TRY_ASSIGN(pipeline, device->CreateRenderPipeline(&renderPipelineDesc));
store->blitBufferToTexturePipelines.emplace(format, pipeline);
return pipeline;
}
} // anonymous namespace
bool IsFormatSupportedByBufferToTextureBlit(wgpu::TextureFormat format) {
// TODO(348653642): Eventually we should support all non-compressed formats. For now, just list
// a subset of them that we support.
switch (format) {
case wgpu::TextureFormat::R8Unorm:
case wgpu::TextureFormat::RG8Unorm:
case wgpu::TextureFormat::RGBA8Unorm:
case wgpu::TextureFormat::BGRA8Unorm:
case wgpu::TextureFormat::RGB10A2Unorm:
case wgpu::TextureFormat::R16Float:
case wgpu::TextureFormat::R16Unorm:
case wgpu::TextureFormat::RG16Float:
case wgpu::TextureFormat::RG16Unorm:
case wgpu::TextureFormat::RGBA16Float:
case wgpu::TextureFormat::RGBA16Unorm:
case wgpu::TextureFormat::R32Float:
case wgpu::TextureFormat::RG32Float:
case wgpu::TextureFormat::RGBA32Float:
return true;
default:
return false;
}
}
bool IsBufferToTextureBlitSupported(BufferBase* buffer,
const TextureCopy& dst,
const Extent3D& copyExtent) {
if (!(buffer->GetInternalUsage() &
(kReadOnlyStorageBuffer | kInternalStorageBuffer | wgpu::BufferUsage::Storage))) {
return false;
}
if (!IsFormatSupportedByBufferToTextureBlit(dst.texture->GetFormat().format)) {
return false;
}
if (dst.texture->GetDimension() == wgpu::TextureDimension::e1D) {
// 1D texture cannot be rendered to so skip it.
return false;
}
if (dst.aspect != Aspect::Color) {
// Don't support multiplanar copies yet.
return false;
}
// Must have non-zero copy size.
return copyExtent.width * copyExtent.height * copyExtent.depthOrArrayLayers > 0;
}
MaybeError BlitBufferToTexture(DeviceBase* device,
CommandEncoder* commandEncoder,
BufferBase* buffer,
const TexelCopyBufferLayout& src,
const TextureCopy& dst,
const Extent3D& copyExtent) {
DAWN_ASSERT(device->IsLockedByCurrentThreadIfNeeded());
// This function assumes bytesPerRow is multiples of 4. Normally it's required that
// bytesPerRow is aligned to 256. However some backends might enable
// DawnTexelCopyBufferRowAlignment feature to relax the alignment. Currently only D3D11 backend
// enables this feature, and the relaxed alignment there is 4.
DAWN_ASSERT((src.bytesPerRow % 4) == 0);
DAWN_ASSERT(buffer->GetInternalUsage() &
(kReadOnlyStorageBuffer | kInternalStorageBuffer | wgpu::BufferUsage::Storage));
DAWN_ASSERT(copyExtent.width > 0 && copyExtent.height > 0 && copyExtent.depthOrArrayLayers > 0);
// Allow internal usages since we need to use the destination
// as a render attachment.
auto scope = commandEncoder->MakeInternalUsageScope();
Ref<RenderPipelineBase> pipeline;
DAWN_TRY_ASSIGN(pipeline, GetOrCreatePipeline(device, dst.texture->GetFormat().format));
Ref<BindGroupLayoutBase> bgl;
DAWN_TRY_ASSIGN(bgl, pipeline->GetBindGroupLayout(0));
const auto ssboAlignment = device->GetLimits().v1.minStorageBufferOffsetAlignment;
DAWN_ASSERT(IsPowerOfTwo(ssboAlignment));
wgpu::TextureViewDimension viewDimension;
uint32_t baseDepth = 0;
uint32_t baseArray = 0;
uint32_t depthStep = 0;
uint32_t arrayStep = 0;
switch (dst.texture->GetDimension()) {
case wgpu::TextureDimension::e1D:
DAWN_UNREACHABLE();
break;
case wgpu::TextureDimension::e3D:
viewDimension = wgpu::TextureViewDimension::e3D;
baseDepth = dst.origin.z;
depthStep = 1;
break;
default:
viewDimension = wgpu::TextureViewDimension::e2D;
baseArray = dst.origin.z;
arrayStep = 1;
break;
}
for (uint32_t z = 0; z < copyExtent.depthOrArrayLayers; ++z) {
Ref<TextureViewBase> dstView;
{
TextureViewDescriptor viewDesc = {};
viewDesc.dimension = viewDimension;
viewDesc.baseArrayLayer = baseArray + arrayStep * z;
viewDesc.arrayLayerCount = 1;
viewDesc.baseMipLevel = dst.mipLevel;
viewDesc.mipLevelCount = 1;
DAWN_TRY_ASSIGN(dstView, dst.texture->CreateView(&viewDesc));
}
const uint64_t srcOffset = src.offset + z * src.rowsPerImage * src.bytesPerRow;
const uint64_t srcBufferBindingOffset = AlignDown(srcOffset, ssboAlignment);
const uint32_t shaderReadOffset = static_cast<uint32_t>(srcOffset & (ssboAlignment - 1));
Ref<BufferBase> paramsBuffer;
{
DAWN_TRY_ASSIGN(paramsBuffer,
device->GetOrCreateTemporaryUniformBuffer(sizeof(uint32_t) * 4));
uint32_t params[4];
params[0] = shaderReadOffset;
params[1] = src.bytesPerRow;
params[2] = dst.origin.x;
params[3] = dst.origin.y;
commandEncoder->APIWriteBuffer(paramsBuffer.Get(), 0,
reinterpret_cast<const uint8_t*>(&params[0]),
sizeof(params));
}
Ref<BindGroupBase> bindGroup;
DAWN_TRY_ASSIGN(bindGroup, utils::MakeBindGroup(device, bgl,
{
{0, buffer, srcBufferBindingOffset},
{1, paramsBuffer},
},
UsageValidationMode::Internal));
RenderPassColorAttachment colorAttachment;
colorAttachment.view = dstView.Get();
if (depthStep) {
colorAttachment.depthSlice = baseDepth + depthStep * z;
}
colorAttachment.loadOp = wgpu::LoadOp::Load;
colorAttachment.storeOp = wgpu::StoreOp::Store;
RenderPassDescriptor rpDesc = {};
rpDesc.colorAttachmentCount = 1;
rpDesc.colorAttachments = &colorAttachment;
Ref<RenderPassEncoder> pass = commandEncoder->BeginRenderPass(&rpDesc);
// Bind the resources.
pass->APISetBindGroup(0, bindGroup.Get());
pass->APISetViewport(dst.origin.x, dst.origin.y, copyExtent.width, copyExtent.height, 0.f,
1.f);
// Draw to perform the blit.
pass->APISetPipeline(pipeline.Get());
pass->APIDraw(3, 1, 0, 0);
pass->End();
}
return {};
}
} // namespace dawn::native