Align offset to 512 when writing into depth stencil textures on some platforms

On the D3D12 platforms that don't support programmable sample positions,
the source box specifying a portion of the depth texture must all be 0,
or an error and a device lost will occur. This patch adds a workaround
for this issue by alignning the offset of internal staging buffer to 512
when calling Queue.WriteTexture() with depth stencil textures

Bug: dawn:727
Test: dawn_end2end_tests
Change-Id: I6bc5843d62d0aec3964ee5b544a06c0b2657031a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/98601
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 5026cc2..e3145a9 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -1928,4 +1928,10 @@
     return false;
 }
 
+uint64_t DeviceBase::GetBufferCopyOffsetAlignmentForDepthStencil() const {
+    // For depth-stencil texture, buffer offset must be a multiple of 4, which is required
+    // by WebGPU and Vulkan SPEC.
+    return 4u;
+}
+
 }  // namespace dawn::native
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index a74be9c..0a04645 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -362,6 +362,7 @@
     // BackendMetadata that we can query from the device.
     virtual uint32_t GetOptimalBytesPerRowAlignment() const = 0;
     virtual uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const = 0;
+    virtual uint64_t GetBufferCopyOffsetAlignmentForDepthStencil() const;
 
     virtual float GetTimestampPeriodInNS() const = 0;
 
diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp
index c17ed8f..9881d65 100644
--- a/src/dawn/native/Queue.cpp
+++ b/src/dawn/native/Queue.cpp
@@ -97,11 +97,11 @@
     // since both of them are powers of two, we only need to align to the max value.
     uint64_t offsetAlignment = std::max(optimalOffsetAlignment, uint64_t(blockInfo.byteSize));
 
-    // For depth-stencil texture, buffer offset must be a multiple of 4, which is required
-    // by WebGPU and Vulkan SPEC.
+    // Buffer offset alignments must follow additional restrictions when we copy with depth stencil
+    // formats.
     if (hasDepthOrStencil) {
-        constexpr uint64_t kOffsetAlignmentForDepthStencil = 4;
-        offsetAlignment = std::max(offsetAlignment, kOffsetAlignmentForDepthStencil);
+        offsetAlignment =
+            std::max(offsetAlignment, device->GetBufferCopyOffsetAlignmentForDepthStencil());
     }
 
     UploadHandle uploadHandle;
diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp
index 2634379..c0c734c 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@@ -899,4 +899,17 @@
     return ToBackend(renderPipelineBase)->UsesVertexOrInstanceIndex();
 }
 
+uint64_t Device::GetBufferCopyOffsetAlignmentForDepthStencil() const {
+    // On the D3D12 platforms where programmable MSAA is not supported, the source box specifying a
+    // portion of the depth texture must all be 0, or an error and a device lost will occur, so on
+    // these platforms the buffer copy offset must be a multiple of 512 when the texture is created
+    // with D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL. See https://crbug.com/dawn/727 for more
+    // details.
+    if (IsToggleEnabled(
+            Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset)) {
+        return D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
+    }
+    return DeviceBase::GetBufferCopyOffsetAlignmentForDepthStencil();
+}
+
 }  // namespace dawn::native::d3d12
diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h
index 399fd64..0373cf9 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.h
+++ b/src/dawn/native/d3d12/DeviceD3D12.h
@@ -162,6 +162,8 @@
 
     bool IsFeatureEnabled(Feature feature) const override;
 
+    uint64_t GetBufferCopyOffsetAlignmentForDepthStencil() const override;
+
     // Dawn APIs
     void SetLabelImpl() override;
 
diff --git a/src/dawn/tests/end2end/QueueTests.cpp b/src/dawn/tests/end2end/QueueTests.cpp
index a8f34f4..e3664eb 100644
--- a/src/dawn/tests/end2end/QueueTests.cpp
+++ b/src/dawn/tests/end2end/QueueTests.cpp
@@ -704,8 +704,76 @@
     EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data(), outputBuffer, 0, 8);
 }
 
+// Tests calling queue.writeTexture() to a depth texture after calling queue.writeTexture() on
+// another texture always works. On some D3D12 backends the buffer offset of buffer-to-texture
+// copies must be a multiple of 512 when the destination texture is a depth stencil texture.
+TEST_P(QueueWriteTextureTests, WriteDepthAspectAfterOtherQueueWriteTextureCalls) {
+    // Copies to a single aspect are unsupported on OpenGL.
+    DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+    wgpu::TextureDescriptor textureDescriptor;
+    textureDescriptor.format = wgpu::TextureFormat::Depth16Unorm;
+    textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
+    textureDescriptor.size = {1, 1, 1};
+    wgpu::Texture depthTexture1 = device.CreateTexture(&textureDescriptor);
+    wgpu::Texture depthTexture2 = device.CreateTexture(&textureDescriptor);
+
+    constexpr uint16_t kExpectedData1 = (204 << 8) | 205;
+    wgpu::ImageCopyTexture imageCopyTexture1 = utils::CreateImageCopyTexture(depthTexture1);
+    wgpu::TextureDataLayout textureDataLayout =
+        utils::CreateTextureDataLayout(0, sizeof(kExpectedData1));
+    queue.WriteTexture(&imageCopyTexture1, &kExpectedData1, sizeof(kExpectedData1),
+                       &textureDataLayout, &textureDescriptor.size);
+
+    constexpr uint16_t kExpectedData2 = (206 << 8) | 207;
+    wgpu::ImageCopyTexture imageCopyTexture2 = utils::CreateImageCopyTexture(depthTexture2);
+    queue.WriteTexture(&imageCopyTexture2, &kExpectedData2, sizeof(kExpectedData2),
+                       &textureDataLayout, &textureDescriptor.size);
+
+    EXPECT_TEXTURE_EQ(&kExpectedData1, depthTexture1, {0, 0}, {1, 1}, 0,
+                      wgpu::TextureAspect::DepthOnly);
+    EXPECT_TEXTURE_EQ(&kExpectedData2, depthTexture2, {0, 0}, {1, 1}, 0,
+                      wgpu::TextureAspect::DepthOnly);
+}
+
+// Tests calling queue.writeTexture() to the stencil aspect after calling queue.writeTexture() on
+// another texture always works. On some D3D12 backends the buffer offset of buffer-to-texture
+// copies must be a multiple of 512 when the destination texture is a depth stencil texture.
+TEST_P(QueueWriteTextureTests, WriteStencilAspectAfterOtherQueueWriteTextureCalls) {
+    // Copies to a single aspect are unsupported on OpenGL.
+    DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+    wgpu::TextureDescriptor textureDescriptor;
+    textureDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
+    textureDescriptor.size = {1, 1, 1};
+    wgpu::Texture depthStencilTexture1 = device.CreateTexture(&textureDescriptor);
+    wgpu::Texture depthStencilTexture2 = device.CreateTexture(&textureDescriptor);
+
+    constexpr uint8_t kExpectedData1 = 204u;
+    wgpu::ImageCopyTexture imageCopyTexture1 = utils::CreateImageCopyTexture(
+        depthStencilTexture1, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+    wgpu::TextureDataLayout textureDataLayout =
+        utils::CreateTextureDataLayout(0, sizeof(kExpectedData1));
+    queue.WriteTexture(&imageCopyTexture1, &kExpectedData1, sizeof(kExpectedData1),
+                       &textureDataLayout, &textureDescriptor.size);
+
+    constexpr uint8_t kExpectedData2 = 205;
+    wgpu::ImageCopyTexture imageCopyTexture2 = utils::CreateImageCopyTexture(
+        depthStencilTexture2, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+    queue.WriteTexture(&imageCopyTexture2, &kExpectedData2, sizeof(kExpectedData2),
+                       &textureDataLayout, &textureDescriptor.size);
+
+    EXPECT_TEXTURE_EQ(&kExpectedData1, depthStencilTexture1, {0, 0}, {1, 1}, 0,
+                      wgpu::TextureAspect::StencilOnly);
+    EXPECT_TEXTURE_EQ(&kExpectedData2, depthStencilTexture2, {0, 0}, {1, 1}, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
 DAWN_INSTANTIATE_TEST(QueueWriteTextureTests,
                       D3D12Backend(),
+                      D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_"
+                                    "copy_with_non_zero_buffer_offset"}),
                       MetalBackend(),
                       OpenGLBackend(),
                       OpenGLESBackend(),