Impl 3DTexture copy splitter for empty first row on D3D12 - 2

If the copy region generated from 2D texture copy splitter has an
empty first row due to alignment adjustment and we simply extend
it to all copied depth slices, the copy region will be incorrect
for 3D texture copy.

If there is an empty first row and the copy height is odd, we can
split the copy region into two:
  - copy0: copy the first depth slice
  - copy1: copy the rest depth slices because there is no empty
    row after alignment adjustment

This method resolves the remaining problem when copy height is 1.
It also is an optimization for other odd copy height cases, say
copy height is 3, 5, 7, etc.

This change implements the special situation when the copy region
generated from 2D texture copy splitter has an empty first row and
its copy height is odd.

Bug: dawn:547
Change-Id: Idf4f4a9b87f783c5328463e0f4182429a7e809f1
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/53885
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Yunchao He <yunchao.he@intel.com>
diff --git a/src/dawn_native/d3d12/TextureCopySplitter.cpp b/src/dawn_native/d3d12/TextureCopySplitter.cpp
index 600b721..7297f50 100644
--- a/src/dawn_native/d3d12/TextureCopySplitter.cpp
+++ b/src/dawn_native/d3d12/TextureCopySplitter.cpp
@@ -273,13 +273,14 @@
         return copies;
     }
 
-    void Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow(Origin3D origin,
-                                                                Extent3D copySize,
-                                                                const TexelBlockInfo& blockInfo,
-                                                                uint32_t bytesPerRow,
-                                                                uint32_t rowsPerImage,
-                                                                TextureCopySubresource& copy,
-                                                                uint32_t i) {
+    void Recompute3DTextureCopyRegionWithEmptyFirstRowAndEvenCopyHeight(
+        Origin3D origin,
+        Extent3D copySize,
+        const TexelBlockInfo& blockInfo,
+        uint32_t bytesPerRow,
+        uint32_t rowsPerImage,
+        TextureCopySubresource& copy,
+        uint32_t i) {
         // Let's assign data and show why copy region generated by ComputeTextureCopySubresource
         // is incorrect if there is an empty row at the beginning of the copy block.
         // Assuming that bytesPerRow is 256 and we are doing a B2T copy, and copy size is {width: 2,
@@ -415,6 +416,42 @@
         copy2->bufferSize.depthOrArrayLayers = 1;
     }
 
+    void Recompute3DTextureCopyRegionWithEmptyFirstRowAndOddCopyHeight(Extent3D copySize,
+                                                                       uint32_t bytesPerRow,
+                                                                       TextureCopySubresource& copy,
+                                                                       uint32_t i) {
+        // Read the comments of Recompute3DTextureCopyRegionWithEmptyFirstRowAndEvenCopyHeight() for
+        // the reason why it is incorrect if we simply extend the copy region to all depth slices
+        // when there is an empty first row at the copy region.
+        //
+        // If the copy height is odd, we can use two copies to make it correct:
+        //   - copy 0: only copy the first depth slice. Keep other arguments the same.
+        //   - copy 1: copy all rest depth slices because it will start without an empty row if
+        //     copy height is odd. Odd height + one (empty row) is even. An even row number times
+        //     bytesPerRow (256) will be aligned to D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512)
+
+        // Copy 0: copy the first depth slice (image 0)
+        TextureCopySubresource::CopyInfo& copy0 = copy.copies[i];
+        copy0.copySize.depthOrArrayLayers = 1;
+        copy0.bufferSize.depthOrArrayLayers = 1;
+
+        // Copy 1: copy the rest depth slices in one shot
+        TextureCopySubresource::CopyInfo* copy1 = copy.AddCopy();
+        *copy1 = copy0;
+        ASSERT(copySize.height % 2 == 1);
+        copy1->alignedOffset += (copySize.height + 1) * bytesPerRow;
+        ASSERT(copy1->alignedOffset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT == 0);
+        // textureOffset.z should add one because the first slice has already been copied in copy0.
+        copy1->textureOffset.z++;
+        // bufferOffset.y should be 0 because we skipped the first depth slice and there is no empty
+        // row in this copy region.
+        copy1->bufferOffset.y = 0;
+        copy1->copySize.height = copySize.height;
+        copy1->copySize.depthOrArrayLayers = copySize.depthOrArrayLayers - 1;
+        copy1->bufferSize.height = copySize.height;
+        copy1->bufferSize.depthOrArrayLayers = copySize.depthOrArrayLayers - 1;
+    }
+
     TextureCopySubresource Compute3DTextureCopySplits(Origin3D origin,
                                                       Extent3D copySize,
                                                       const TexelBlockInfo& blockInfo,
@@ -428,14 +465,10 @@
         //
         // For example, if bufferSize.height is greater than rowsPerImage in the generated copy
         // region and we simply extend the 2D copy region to all copied depth slices, copied data
-        // will be incorrectly offset for each depth slice except the first one. This situation
-        // can be introduced by 2 special cases:
-        //   - If there is an empty row at the beginning of the copy region due to alignment.
-        //   - If copySize.height is 1 and one row of data straddle two rows.
+        // will be incorrectly offset for each depth slice except the first one.
         //
-        // For these special cases, we need to recompute the copy regions for 3D textures like
-        // 1) split and modify the incorrect copy region to a few more copy regions, or 2) abandon
-        // the old copy region and regenerate the copy regions in different approach.
+        // For these special cases, we need to recompute the copy regions for 3D textures via
+        // split the incorrect copy region to a couple more copy regions.
 
         // Call Compute2DTextureCopySubresource and get copy regions. This function has already
         // forwarded "copySize.depthOrArrayLayers" to all depth slices.
@@ -456,17 +489,48 @@
         uint32_t originalCopyCount = copySubresource.count;
         for (uint32_t i = 0; i < originalCopyCount; ++i) {
             // There can be one empty row at most in a copy region.
-            ASSERT(copySubresource.copies[i].bufferSize.height <= rowsPerImage + blockInfo.height);
+            ASSERT(copySubresource.copies[i].bufferSize.height <=
+                   rowsPerImageInTexels + blockInfo.height);
             Extent3D& bufferSize = copySubresource.copies[i].bufferSize;
-            if (bufferSize.height > rowsPerImageInTexels) {
-                ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-                Recompute3DTextureCopyRegionsForBlockWithEmptyFirstRow(
-                    origin, copySize, blockInfo, bytesPerRow, rowsPerImage, copySubresource, i);
-                // TODO(crbug.com/dawn/547): recompute copy regions when copySize.height is 1.
-            } else {
+
+            if (bufferSize.height == rowsPerImageInTexels) {
+                // If the copy region's bufferSize.height equals to rowsPerImageInTexels, we can use
+                // this copy region without any modification.
+                continue;
+            }
+
+            if (bufferSize.height < rowsPerImageInTexels) {
                 // If we are copying multiple depth slices, we should skip rowsPerImageInTexels rows
                 // for each slice even though we only copy partial rows in each slice sometimes.
                 bufferSize.height = rowsPerImageInTexels;
+            } else {
+                // bufferSize.height > rowsPerImageInTexels. There is an empty row in this copy
+                // region due to alignment adjustment.
+
+                // bytesPerRow is definitely 256, and it is definitely a full copy on height.
+                // Otherwise, bufferSize.height wount be greater than rowsPerImageInTexels and
+                // there won't be an empty row at the beginning of this copy region.
+                ASSERT(bytesPerRow == D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+                ASSERT(copySize.height == rowsPerImageInTexels);
+
+                if (copySize.height % 2 == 0) {
+                    // If copySize.height is even and there is an empty row at the beginning of the
+                    // first slice of the copy region, the offset of all depth slices will never be
+                    // aligned to D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512) and there is always
+                    // an empty row at each depth slice. We need a totally different approach to
+                    // split the copy region.
+                    Recompute3DTextureCopyRegionWithEmptyFirstRowAndEvenCopyHeight(
+                        origin, copySize, blockInfo, bytesPerRow, rowsPerImage, copySubresource, i);
+                } else {
+                    // If copySize.height is odd and there is an empty row at the beginning of the
+                    // first slice of the copy region, we can split the copy region into two copies:
+                    // copy0 to copy the first slice, copy1 to copy the rest slices because the
+                    // offset of slice 1 is aligned to D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT (512)
+                    // without an empty row. This is an easier case relative to cases with even copy
+                    // height.
+                    Recompute3DTextureCopyRegionWithEmptyFirstRowAndOddCopyHeight(
+                        copySize, bytesPerRow, copySubresource, i);
+                }
             }
         }
 
diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp
index 0a5076d..6218d1d 100644
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -1075,8 +1075,6 @@
 }
 
 TEST_P(CopyTests_T2B, Texture3DSplitRowDataWithoutEmptyFirstRow) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 259;
     constexpr uint32_t kHeight = 127;
     constexpr uint32_t kDepth = 3;
@@ -1114,8 +1112,6 @@
 }
 
 TEST_P(CopyTests_T2B, Texture3DCopyHeightIsOneCopyWidthIsTiny) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 2;
     constexpr uint32_t kHeight = 1;
     constexpr uint32_t kDepth = 3;
@@ -1137,8 +1133,6 @@
 }
 
 TEST_P(CopyTests_T2B, Texture3DCopyHeightIsOneCopyWidthIsSmall) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 39;
     constexpr uint32_t kHeight = 1;
     constexpr uint32_t kDepth = 3;
@@ -1692,8 +1686,6 @@
 }
 
 TEST_P(CopyTests_B2T, Texture3DSplitRowDataWithoutEmptyFirstRow) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 259;
     constexpr uint32_t kHeight = 127;
     constexpr uint32_t kDepth = 3;
@@ -1731,8 +1723,6 @@
 }
 
 TEST_P(CopyTests_B2T, Texture3DCopyHeightIsOneCopyWidthIsTiny) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 2;
     constexpr uint32_t kHeight = 1;
     constexpr uint32_t kDepth = 3;
@@ -1754,8 +1744,6 @@
 }
 
 TEST_P(CopyTests_B2T, Texture3DCopyHeightIsOneCopyWidthIsSmall) {
-    DAWN_TEST_UNSUPPORTED_IF(IsD3D12());  // TODO(crbug.com/dawn/547): Implement on D3D12.
-
     constexpr uint32_t kWidth = 39;
     constexpr uint32_t kHeight = 1;
     constexpr uint32_t kDepth = 3;