D3D12: Allow relaxed B2T copy pitch and offset alignment on 2D textures

This patch implements the buffer-texture copy with relaxed copy pitch
and offset alignment (not required to be a multiple of 512 or 256) on
2D textures when `UnrestrictedBufferTextureCopyPitchSupported` is true
on the current D3D12 device so that we no longer need to split one
buffer-texture copy into multiple ones in some cases.

Bug: chromium:381000081
Test: dawn_end2end_tests
Change-Id: Ib4fbc819701ca82aeaaf791fe1b6de42b11b1d18
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/211949
Reviewed-by: Loko Kung <lokokung@google.com>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp
index eeef729..d072704 100644
--- a/src/dawn/native/Toggles.cpp
+++ b/src/dawn/native/Toggles.cpp
@@ -589,6 +589,11 @@
       "minWaveLaneCount and 8. Some D3D12 drivers is possible to run fragment shader with wave "
       "count 8 while reporting minWaveLaneCount 16.",
       "https://crbug.com/381969450", ToggleStage::Adapter}},
+    {Toggle::D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment,
+     {"d3d12_relax_buffer_texture_copy_pitch_and_offset_alignment",
+      "Don't require the alignments of D3D12_TEXTURE_DATA_PITCH_ALIGNMENT (256) for row pitch "
+      "and D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512) for offset in buffer-texture copies.",
+      "https://crbug.com/381000081", ToggleStage::Device}},
     {Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
      {"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target",
       "MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h
index 2d327de..f096d35 100644
--- a/src/dawn/native/Toggles.h
+++ b/src/dawn/native/Toggles.h
@@ -140,6 +140,7 @@
     VulkanMonolithicPipelineCache,
     MetalSerializeTimestampGenerationAndResolution,
     D3D12RelaxMinSubgroupSizeTo8,
+    D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment,
 
     // Unresolved issues.
     NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
diff --git a/src/dawn/native/d3d12/D3D12Info.cpp b/src/dawn/native/d3d12/D3D12Info.cpp
index 98910de..e8e7d2b 100644
--- a/src/dawn/native/d3d12/D3D12Info.cpp
+++ b/src/dawn/native/d3d12/D3D12Info.cpp
@@ -113,6 +113,8 @@
             D3D12_FEATURE_D3D12_OPTIONS13, &featureOptions13, sizeof(featureOptions13)))) {
         info.supportsTextureCopyBetweenDimensions =
             featureOptions13.TextureCopyBetweenDimensionsSupported;
+        info.supportsUnrestrictedBufferTextureCopyPitch =
+            featureOptions13.UnrestrictedBufferTextureCopyPitchSupported;
     }
 
     info.supportsRootSignatureVersion1_1 = false;
diff --git a/src/dawn/native/d3d12/D3D12Info.h b/src/dawn/native/d3d12/D3D12Info.h
index b9d8e5f..3d6385b 100644
--- a/src/dawn/native/d3d12/D3D12Info.h
+++ b/src/dawn/native/d3d12/D3D12Info.h
@@ -54,6 +54,7 @@
     bool use64KBAlignedMSAATexture;
     bool supportsHeapFlagCreateNotZeroed;
     bool supportsTextureCopyBetweenDimensions;
+    bool supportsUnrestrictedBufferTextureCopyPitch;
     // Whether the device support wave intrinsics
     bool supportsWaveOps;
     uint32_t waveLaneCountMin;
diff --git a/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp b/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
index 45a2679..c46df96 100644
--- a/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
@@ -648,6 +648,14 @@
     deviceToggles->Default(Toggle::D3D12CreateNotZeroedHeap,
                            GetDeviceInfo().supportsHeapFlagCreateNotZeroed);
 
+    // By default allow relaxed row pitch and offset in buffer-texture copies when possible,
+    // otherwise we should never enable this toggle.
+    if (!GetDeviceInfo().supportsUnrestrictedBufferTextureCopyPitch) {
+        deviceToggles->ForceSet(Toggle::D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment, false);
+    }
+    deviceToggles->Default(Toggle::D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment,
+                           GetDeviceInfo().supportsUnrestrictedBufferTextureCopyPitch);
+
     // Native support of packed 4x8 integer dot product required shader model 6.4 or higher, and
     // DXC 1.4 or higher.
     if (!(GetAppliedShaderModelUnderToggles(*deviceToggles) >= 64) ||
diff --git a/src/dawn/native/d3d12/TextureCopySplitter.cpp b/src/dawn/native/d3d12/TextureCopySplitter.cpp
index 67501cd..9c80fc4 100644
--- a/src/dawn/native/d3d12/TextureCopySplitter.cpp
+++ b/src/dawn/native/d3d12/TextureCopySplitter.cpp
@@ -682,4 +682,40 @@
 
     return copySubresource;
 }
+
+TextureCopySubresource Compute2DTextureCopySubresourceWithRelaxedRowPitchAndOffset(
+    BufferTextureCopyDirection direction,
+    Origin3D origin,
+    Extent3D copySize,
+    const TexelBlockInfo& blockInfo,
+    uint64_t offset,
+    uint32_t bytesPerRow) {
+    TextureCopySubresource copy;
+    auto* copyInfo = copy.AddCopy();
+
+    // You can visualize the data in the buffer (bufferLocation) like this:
+    // * copy data is visualized as '+'.
+    //
+    //                bufferOffset(0, 0, 0)
+    //                        ^
+    //                        |
+    // |<-------Offset------->|<-----------RowPitch----------->|----------|
+    // |----------------------|++++++++++++++++++++++~~~~~~~~~~|    |     |
+    //                        |++++++++++++++++++++++~~~~~~~~~~|CopyHeight|
+    //                        |++++++++++++++++++++++|         |    |     |
+    //                        |<-----CopyWidth------>|         |----------|
+    //
+    Origin3D textureOffset = {origin.x, origin.y, 0};
+    Origin3D bufferOffset = {0, 0, 0};
+    Extent3D copySizeOneLayer = {copySize.width, copySize.height, 1};
+    ComputeSourceRegionForCopyInfo(copyInfo, direction, bufferOffset, textureOffset,
+                                   copySizeOneLayer);
+
+    Extent3D bufferSize = {copySize.width, copySize.height, 1};
+
+    FillFootprintAndOffsetOfBufferLocation(&copyInfo->bufferLocation, offset, bufferSize,
+                                           bytesPerRow);
+
+    return copy;
+}
 }  // namespace dawn::native::d3d12
diff --git a/src/dawn/native/d3d12/TextureCopySplitter.h b/src/dawn/native/d3d12/TextureCopySplitter.h
index 4d0f5b8..5c6939d 100644
--- a/src/dawn/native/d3d12/TextureCopySplitter.h
+++ b/src/dawn/native/d3d12/TextureCopySplitter.h
@@ -117,6 +117,17 @@
                                                   uint64_t offset,
                                                   uint32_t bytesPerRow,
                                                   uint32_t rowsPerImage);
+
+// Compute the `TextureCopySubresource` for one subresource of a 2D texture with relaxed row pitch
+// and offset.
+TextureCopySubresource Compute2DTextureCopySubresourceWithRelaxedRowPitchAndOffset(
+    BufferTextureCopyDirection direction,
+    Origin3D origin,
+    Extent3D copySize,
+    const TexelBlockInfo& blockInfo,
+    uint64_t offset,
+    uint32_t bytesPerRow);
+
 }  // namespace dawn::native::d3d12
 
 #endif  // SRC_DAWN_NATIVE_D3D12_TEXTURECOPYSPLITTER_H_
diff --git a/src/dawn/native/d3d12/UtilsD3D12.cpp b/src/dawn/native/d3d12/UtilsD3D12.cpp
index f2b55c5..50602da 100644
--- a/src/dawn/native/d3d12/UtilsD3D12.cpp
+++ b/src/dawn/native/d3d12/UtilsD3D12.cpp
@@ -234,6 +234,31 @@
     }
 }
 
+void Record2DBufferTextureCopyWithRelaxedOffsetAndPitch(BufferTextureCopyDirection direction,
+                                                        ID3D12GraphicsCommandList* commandList,
+                                                        ID3D12Resource* bufferResource,
+                                                        const uint64_t offset,
+                                                        const uint32_t bytesPerRow,
+                                                        const uint32_t rowsPerImage,
+                                                        const TextureCopy& textureCopy,
+                                                        const TexelBlockInfo& blockInfo,
+                                                        const Extent3D& copySize) {
+    TextureCopySubresource copySubresource =
+        Compute2DTextureCopySubresourceWithRelaxedRowPitchAndOffset(
+            direction, textureCopy.origin, copySize, blockInfo, offset, bytesPerRow);
+
+    uint64_t bytesPerLayer = bytesPerRow * rowsPerImage;
+    uint64_t bufferOffsetForNextLayer = 0;
+    for (uint32_t copyLayer = 0; copyLayer < copySize.depthOrArrayLayers; ++copyLayer) {
+        uint32_t copyTextureLayer = copyLayer + textureCopy.origin.z;
+        RecordBufferTextureCopyFromSplits(direction, commandList, copySubresource, bufferResource,
+                                          bufferOffsetForNextLayer, bytesPerRow,
+                                          textureCopy.texture.Get(), textureCopy.mipLevel,
+                                          copyTextureLayer, textureCopy.aspect);
+        bufferOffsetForNextLayer += bytesPerLayer;
+    }
+}
+
 void RecordBufferTextureCopyWithBufferHandle(BufferTextureCopyDirection direction,
                                              ID3D12GraphicsCommandList* commandList,
                                              ID3D12Resource* bufferResource,
@@ -247,6 +272,9 @@
     TextureBase* texture = textureCopy.texture.Get();
     const TexelBlockInfo& blockInfo = texture->GetFormat().GetAspectInfo(textureCopy.aspect).block;
 
+    bool useRelaxedRowPitchAndOffset = texture->GetDevice()->IsToggleEnabled(
+        Toggle::D3D12RelaxBufferTextureCopyPitchAndOffsetAlignment);
+
     switch (texture->GetDimension()) {
         case wgpu::TextureDimension::Undefined:
             DAWN_UNREACHABLE();
@@ -255,9 +283,14 @@
             // 1D textures copy splits are a subset of the single-layer 2D texture copy splits,
             // at least while 1D textures can only have a single array layer.
             DAWN_ASSERT(texture->GetArrayLayers() == 1);
-
-            TextureCopySubresource copyRegions = Compute2DTextureCopySubresource(
-                direction, textureCopy.origin, copySize, blockInfo, offset, bytesPerRow);
+            TextureCopySubresource copyRegions;
+            if (useRelaxedRowPitchAndOffset) {
+                copyRegions = Compute2DTextureCopySubresourceWithRelaxedRowPitchAndOffset(
+                    direction, textureCopy.origin, copySize, blockInfo, offset, bytesPerRow);
+            } else {
+                copyRegions = Compute2DTextureCopySubresource(
+                    direction, textureCopy.origin, copySize, blockInfo, offset, bytesPerRow);
+            }
             RecordBufferTextureCopyFromSplits(direction, commandList, copyRegions, bufferResource,
                                               0, bytesPerRow, texture, textureCopy.mipLevel, 0,
                                               textureCopy.aspect);
@@ -267,9 +300,15 @@
         // Record the CopyTextureRegion commands for 2D textures, with special handling of array
         // layers since each require their own set of copies.
         case wgpu::TextureDimension::e2D:
-            Record2DBufferTextureCopyWithSplit(direction, commandList, bufferResource, offset,
-                                               bytesPerRow, rowsPerImage, textureCopy, blockInfo,
-                                               copySize);
+            if (useRelaxedRowPitchAndOffset) {
+                Record2DBufferTextureCopyWithRelaxedOffsetAndPitch(
+                    direction, commandList, bufferResource, offset, bytesPerRow, rowsPerImage,
+                    textureCopy, blockInfo, copySize);
+            } else {
+                Record2DBufferTextureCopyWithSplit(direction, commandList, bufferResource, offset,
+                                                   bytesPerRow, rowsPerImage, textureCopy,
+                                                   blockInfo, copySize);
+            }
             break;
 
         case wgpu::TextureDimension::e3D: {