blob: b6473d8d01c9e0bdd0f9de54d9d06d9930d77b5d [file] [log] [blame]
// Copyright 2017 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn_native/d3d12/TextureCopySplitter.h"
#include "common/Assert.h"
#include "dawn_native/Format.h"
#include "dawn_native/d3d12/d3d12_platform.h"
namespace dawn_native { namespace d3d12 {
namespace {
Origin3D ComputeTexelOffsets(const TexelBlockInfo& blockInfo,
uint32_t offset,
uint32_t bytesPerRow) {
ASSERT(bytesPerRow != 0);
uint32_t byteOffsetX = offset % bytesPerRow;
uint32_t byteOffsetY = offset - byteOffsetX;
return {byteOffsetX / blockInfo.byteSize * blockInfo.width,
byteOffsetY / bytesPerRow * blockInfo.height, 0};
}
} // namespace
TextureCopySubresource Compute2DTextureCopySubresource(Origin3D origin,
Extent3D copySize,
const TexelBlockInfo& blockInfo,
uint64_t offset,
uint32_t bytesPerRow) {
TextureCopySubresource copy;
ASSERT(bytesPerRow % blockInfo.byteSize == 0);
// The copies must be 512-aligned. To do this, we calculate the first 512-aligned address
// preceding our data.
uint64_t alignedOffset =
offset & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1);
// If the provided offset to the data was already 512-aligned, we can simply copy the data
// without further translation.
if (offset == alignedOffset) {
copy.count = 1;
copy.copies[0].alignedOffset = alignedOffset;
copy.copies[0].textureOffset = origin;
copy.copies[0].copySize = copySize;
copy.copies[0].bufferOffset = {0, 0, 0};
copy.copies[0].bufferSize = copySize;
return copy;
}
ASSERT(alignedOffset < offset);
ASSERT(offset - alignedOffset < D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
// We must reinterpret our aligned offset into X and Y offsets with respect to the row
// pitch.
//
// You can visualize the data in the buffer like this:
// |-----------------------++++++++++++++++++++++++++++++++|
// ^ 512-aligned address ^ Aligned offset ^ End of copy data
//
// Now when you consider the row pitch, you can visualize the data like this:
// |~~~~~~~~~~~~~~~~|
// |~~~~~+++++++++++|
// |++++++++++++++++|
// |+++++~~~~~~~~~~~|
// |<---row pitch-->|
//
// The X and Y offsets calculated in ComputeTexelOffsets can be visualized like this:
// |YYYYYYYYYYYYYYYY|
// |XXXXXX++++++++++|
// |++++++++++++++++|
// |++++++~~~~~~~~~~|
// |<---row pitch-->|
Origin3D texelOffset = ComputeTexelOffsets(
blockInfo, static_cast<uint32_t>(offset - alignedOffset), bytesPerRow);
ASSERT(texelOffset.y <= blockInfo.height);
ASSERT(texelOffset.z == 0);
uint32_t copyBytesPerRowPitch = copySize.width / blockInfo.width * blockInfo.byteSize;
uint32_t byteOffsetInRowPitch = texelOffset.x / blockInfo.width * blockInfo.byteSize;
if (copyBytesPerRowPitch + byteOffsetInRowPitch <= bytesPerRow) {
// The region's rows fit inside the bytes per row. In this case, extend the width of the
// PlacedFootprint and copy the buffer with an offset location
// |<------------- bytes per row ------------->|
//
// |-------------------------------------------|
// | |
// | +++++++++++++++++~~~~~~~~~|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++~~~~~~~~~|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++~~~~~~~~~|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++~~~~~~~~~|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++ |
// |-------------------------------------------|
// Copy 0:
// |----------------------------------|
// | |
// | +++++++++++++++++|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++|
// |~~~~~~~~~~~~~~~~~+++++++++++++++++|
// |----------------------------------|
copy.count = 1;
copy.copies[0].alignedOffset = alignedOffset;
copy.copies[0].textureOffset = origin;
copy.copies[0].copySize = copySize;
copy.copies[0].bufferOffset = texelOffset;
copy.copies[0].bufferSize.width = copySize.width + texelOffset.x;
copy.copies[0].bufferSize.height = copySize.height + texelOffset.y;
copy.copies[0].bufferSize.depthOrArrayLayers = copySize.depthOrArrayLayers;
return copy;
}
// The region's rows straddle the bytes per row. Split the copy into two copies
// |<------------- bytes per row ------------->|
//
// |-------------------------------------------|
// | |
// | ++++++++|
// |+++++++++~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |+++++++++~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |+++++++++~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |+++++++++~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |+++++++++ |
// |-------------------------------------------|
// Copy 0:
// |-------------------------------------------|
// | |
// | ++++++++|
// |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~++++++++|
// |-------------------------------------------|
// Copy 1:
// |---------|
// | |
// | |
// |+++++++++|
// |+++++++++|
// |+++++++++|
// |+++++++++|
// |+++++++++|
// |---------|
copy.count = 2;
copy.copies[0].alignedOffset = alignedOffset;
copy.copies[0].textureOffset = origin;
ASSERT(bytesPerRow > byteOffsetInRowPitch);
uint32_t texelsPerRow = bytesPerRow / blockInfo.byteSize * blockInfo.width;
copy.copies[0].copySize.width = texelsPerRow - texelOffset.x;
copy.copies[0].copySize.height = copySize.height;
copy.copies[0].copySize.depthOrArrayLayers = copySize.depthOrArrayLayers;
copy.copies[0].bufferOffset = texelOffset;
copy.copies[0].bufferSize.width = texelsPerRow;
copy.copies[0].bufferSize.height = copySize.height + texelOffset.y;
copy.copies[0].bufferSize.depthOrArrayLayers = copySize.depthOrArrayLayers;
uint64_t offsetForCopy1 =
offset + copy.copies[0].copySize.width / blockInfo.width * blockInfo.byteSize;
uint64_t alignedOffsetForCopy1 =
offsetForCopy1 & ~static_cast<uint64_t>(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1);
Origin3D texelOffsetForCopy1 = ComputeTexelOffsets(
blockInfo, static_cast<uint32_t>(offsetForCopy1 - alignedOffsetForCopy1), bytesPerRow);
ASSERT(texelOffsetForCopy1.y <= blockInfo.height);
ASSERT(texelOffsetForCopy1.z == 0);
copy.copies[1].alignedOffset = alignedOffsetForCopy1;
copy.copies[1].textureOffset.x = origin.x + copy.copies[0].copySize.width;
copy.copies[1].textureOffset.y = origin.y;
copy.copies[1].textureOffset.z = origin.z;
ASSERT(copySize.width > copy.copies[0].copySize.width);
copy.copies[1].copySize.width = copySize.width - copy.copies[0].copySize.width;
copy.copies[1].copySize.height = copySize.height;
copy.copies[1].copySize.depthOrArrayLayers = copySize.depthOrArrayLayers;
copy.copies[1].bufferOffset = texelOffsetForCopy1;
copy.copies[1].bufferSize.width = copy.copies[1].copySize.width + texelOffsetForCopy1.x;
copy.copies[1].bufferSize.height = copySize.height + texelOffsetForCopy1.y;
copy.copies[1].bufferSize.depthOrArrayLayers = copySize.depthOrArrayLayers;
return copy;
}
TextureCopySplits Compute2DTextureCopySplits(Origin3D origin,
Extent3D copySize,
const TexelBlockInfo& blockInfo,
uint64_t offset,
uint32_t bytesPerRow,
uint32_t rowsPerImage) {
TextureCopySplits copies;
const uint64_t bytesPerLayer = bytesPerRow * rowsPerImage;
// The function Compute2DTextureCopySubresource() decides how to split the copy based on:
// - the alignment of the buffer offset with D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512)
// - the alignment of the buffer offset with D3D12_TEXTURE_DATA_PITCH_ALIGNMENT (256)
// Each layer of a 2D array might need to be split, but because of the WebGPU
// constraint that "bytesPerRow" must be a multiple of 256, all odd (resp. all even) layers
// will be at an offset multiple of 512 of each other, which means they will all result in
// the same 2D split. Thus we can just compute the copy splits for the first and second
// layers, and reuse them for the remaining layers by adding the related offset of each
// layer. Moreover, if "rowsPerImage" is even, both the first and second copy layers can
// share the same copy split, so in this situation we just need to compute copy split once
// and reuse it for all the layers.
Extent3D copyOneLayerSize = copySize;
Origin3D copyFirstLayerOrigin = origin;
copyOneLayerSize.depthOrArrayLayers = 1;
copyFirstLayerOrigin.z = 0;
copies.copySubresources[0] = Compute2DTextureCopySubresource(
copyFirstLayerOrigin, copyOneLayerSize, blockInfo, offset, bytesPerRow);
// When the copy only refers one texture 2D array layer,
// copies.copySubresources[1] will never be used so we can safely early return here.
if (copySize.depthOrArrayLayers == 1) {
return copies;
}
if (bytesPerLayer % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT == 0) {
copies.copySubresources[1] = copies.copySubresources[0];
copies.copySubresources[1].copies[0].alignedOffset += bytesPerLayer;
copies.copySubresources[1].copies[1].alignedOffset += bytesPerLayer;
} else {
const uint64_t bufferOffsetNextLayer = offset + bytesPerLayer;
copies.copySubresources[1] =
Compute2DTextureCopySubresource(copyFirstLayerOrigin, copyOneLayerSize, blockInfo,
bufferOffsetNextLayer, bytesPerRow);
}
return copies;
}
TextureCopySubresource Compute3DTextureCopySplits(Origin3D origin,
Extent3D copySize,
const TexelBlockInfo& blockInfo,
uint64_t offset,
uint32_t bytesPerRow,
uint32_t rowsPerImage) {
// To compute the copy region(s) for 3D textures, we call Compute2DTextureCopySubresource
// and get copy region(s) for the first slice of the copy, then extend to all depth slices
// and become a 3D copy. However, this doesn't work as easily as that due to some corner
// cases.
//
// For example, if bufferSize.height is greater than rowsPerImage in the generated copy
// region and we simply extend the 2D copy region to all copied depth slices, copied data
// will be incorrectly offset for each depth slice except the first one. This situation
// can be introduced by 2 special cases:
// - If there is an empty row at the beginning of the copy region due to alignment.
// - If copySize.height is 1 and one row of data straddle two rows.
//
// For these special cases, we need to recompute the copy regions for 3D textures like
// 1) split and modify the incorrect copy region to a few more copy regions, or 2) abandon
// the old copy region and regenerate the copy regions in different approach.
// Call Compute2DTextureCopySubresource and get copy regions. This function has already
// forwarded "copySize.depthOrArrayLayers" to all depth slices.
TextureCopySubresource copySubresource =
Compute2DTextureCopySubresource(origin, copySize, blockInfo, offset, bytesPerRow);
ASSERT(copySubresource.count <= 2);
// If copySize.depth is 1, we can return copySubresource. Because we don't need to extend
// the copy region(s) to other depth slice(s).
if (copySize.depthOrArrayLayers == 1) {
return copySubresource;
}
bool needRecompute = false;
uint32_t rowsPerImageInTexels = rowsPerImage * blockInfo.height;
for (uint32_t i = 0; i < copySubresource.count; ++i) {
// There can be one empty row at most in a copy region.
ASSERT(copySubresource.copies[i].bufferSize.height <= rowsPerImage + blockInfo.height);
Extent3D& bufferSize = copySubresource.copies[i].bufferSize;
if (bufferSize.height > rowsPerImageInTexels) {
needRecompute = true;
} else if (bufferSize.height < rowsPerImageInTexels) {
// If we are copying multiple depth slices, we should skip rowsPerImageInTexels rows
// at least for each slice even though we only copy partial rows in each slice
// sometimes.
bufferSize.height = rowsPerImageInTexels;
}
}
if (!needRecompute) {
return copySubresource;
}
// TODO(yunchao.he@intel.com): recompute copy regions for special cases for 3D textures,
// and return the revised copy regions.
ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
return copySubresource;
}
}} // namespace dawn_native::d3d12