Align offset to 512 when writing into depth stencil textures on some platforms
On the D3D12 platforms that don't support programmable sample positions,
the source box specifying a portion of the depth texture must all be 0,
or an error and a device lost will occur. This patch adds a workaround
for this issue by alignning the offset of internal staging buffer to 512
when calling Queue.WriteTexture() with depth stencil textures
Bug: dawn:727
Test: dawn_end2end_tests
Change-Id: I6bc5843d62d0aec3964ee5b544a06c0b2657031a
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/98601
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 5026cc2..e3145a9 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -1928,4 +1928,10 @@
return false;
}
+uint64_t DeviceBase::GetBufferCopyOffsetAlignmentForDepthStencil() const {
+ // For depth-stencil texture, buffer offset must be a multiple of 4, which is required
+ // by WebGPU and Vulkan SPEC.
+ return 4u;
+}
+
} // namespace dawn::native
diff --git a/src/dawn/native/Device.h b/src/dawn/native/Device.h
index a74be9c..0a04645 100644
--- a/src/dawn/native/Device.h
+++ b/src/dawn/native/Device.h
@@ -362,6 +362,7 @@
// BackendMetadata that we can query from the device.
virtual uint32_t GetOptimalBytesPerRowAlignment() const = 0;
virtual uint64_t GetOptimalBufferToTextureCopyOffsetAlignment() const = 0;
+ virtual uint64_t GetBufferCopyOffsetAlignmentForDepthStencil() const;
virtual float GetTimestampPeriodInNS() const = 0;
diff --git a/src/dawn/native/Queue.cpp b/src/dawn/native/Queue.cpp
index c17ed8f..9881d65 100644
--- a/src/dawn/native/Queue.cpp
+++ b/src/dawn/native/Queue.cpp
@@ -97,11 +97,11 @@
// since both of them are powers of two, we only need to align to the max value.
uint64_t offsetAlignment = std::max(optimalOffsetAlignment, uint64_t(blockInfo.byteSize));
- // For depth-stencil texture, buffer offset must be a multiple of 4, which is required
- // by WebGPU and Vulkan SPEC.
+ // Buffer offset alignments must follow additional restrictions when we copy with depth stencil
+ // formats.
if (hasDepthOrStencil) {
- constexpr uint64_t kOffsetAlignmentForDepthStencil = 4;
- offsetAlignment = std::max(offsetAlignment, kOffsetAlignmentForDepthStencil);
+ offsetAlignment =
+ std::max(offsetAlignment, device->GetBufferCopyOffsetAlignmentForDepthStencil());
}
UploadHandle uploadHandle;
diff --git a/src/dawn/native/d3d12/DeviceD3D12.cpp b/src/dawn/native/d3d12/DeviceD3D12.cpp
index 2634379..c0c734c 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/DeviceD3D12.cpp
@@ -899,4 +899,17 @@
return ToBackend(renderPipelineBase)->UsesVertexOrInstanceIndex();
}
+uint64_t Device::GetBufferCopyOffsetAlignmentForDepthStencil() const {
+ // On the D3D12 platforms where programmable MSAA is not supported, the source box specifying a
+ // portion of the depth texture must all be 0, or an error and a device lost will occur, so on
+ // these platforms the buffer copy offset must be a multiple of 512 when the texture is created
+ // with D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL. See https://crbug.com/dawn/727 for more
+ // details.
+ if (IsToggleEnabled(
+ Toggle::D3D12UseTempBufferInDepthStencilTextureAndBufferCopyWithNonZeroBufferOffset)) {
+ return D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT;
+ }
+ return DeviceBase::GetBufferCopyOffsetAlignmentForDepthStencil();
+}
+
} // namespace dawn::native::d3d12
diff --git a/src/dawn/native/d3d12/DeviceD3D12.h b/src/dawn/native/d3d12/DeviceD3D12.h
index 399fd64..0373cf9 100644
--- a/src/dawn/native/d3d12/DeviceD3D12.h
+++ b/src/dawn/native/d3d12/DeviceD3D12.h
@@ -162,6 +162,8 @@
bool IsFeatureEnabled(Feature feature) const override;
+ uint64_t GetBufferCopyOffsetAlignmentForDepthStencil() const override;
+
// Dawn APIs
void SetLabelImpl() override;
diff --git a/src/dawn/tests/end2end/QueueTests.cpp b/src/dawn/tests/end2end/QueueTests.cpp
index a8f34f4..e3664eb 100644
--- a/src/dawn/tests/end2end/QueueTests.cpp
+++ b/src/dawn/tests/end2end/QueueTests.cpp
@@ -704,8 +704,76 @@
EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data(), outputBuffer, 0, 8);
}
+// Tests calling queue.writeTexture() to a depth texture after calling queue.writeTexture() on
+// another texture always works. On some D3D12 backends the buffer offset of buffer-to-texture
+// copies must be a multiple of 512 when the destination texture is a depth stencil texture.
+TEST_P(QueueWriteTextureTests, WriteDepthAspectAfterOtherQueueWriteTextureCalls) {
+ // Copies to a single aspect are unsupported on OpenGL.
+ DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+ wgpu::TextureDescriptor textureDescriptor;
+ textureDescriptor.format = wgpu::TextureFormat::Depth16Unorm;
+ textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
+ textureDescriptor.size = {1, 1, 1};
+ wgpu::Texture depthTexture1 = device.CreateTexture(&textureDescriptor);
+ wgpu::Texture depthTexture2 = device.CreateTexture(&textureDescriptor);
+
+ constexpr uint16_t kExpectedData1 = (204 << 8) | 205;
+ wgpu::ImageCopyTexture imageCopyTexture1 = utils::CreateImageCopyTexture(depthTexture1);
+ wgpu::TextureDataLayout textureDataLayout =
+ utils::CreateTextureDataLayout(0, sizeof(kExpectedData1));
+ queue.WriteTexture(&imageCopyTexture1, &kExpectedData1, sizeof(kExpectedData1),
+ &textureDataLayout, &textureDescriptor.size);
+
+ constexpr uint16_t kExpectedData2 = (206 << 8) | 207;
+ wgpu::ImageCopyTexture imageCopyTexture2 = utils::CreateImageCopyTexture(depthTexture2);
+ queue.WriteTexture(&imageCopyTexture2, &kExpectedData2, sizeof(kExpectedData2),
+ &textureDataLayout, &textureDescriptor.size);
+
+ EXPECT_TEXTURE_EQ(&kExpectedData1, depthTexture1, {0, 0}, {1, 1}, 0,
+ wgpu::TextureAspect::DepthOnly);
+ EXPECT_TEXTURE_EQ(&kExpectedData2, depthTexture2, {0, 0}, {1, 1}, 0,
+ wgpu::TextureAspect::DepthOnly);
+}
+
+// Tests calling queue.writeTexture() to the stencil aspect after calling queue.writeTexture() on
+// another texture always works. On some D3D12 backends the buffer offset of buffer-to-texture
+// copies must be a multiple of 512 when the destination texture is a depth stencil texture.
+TEST_P(QueueWriteTextureTests, WriteStencilAspectAfterOtherQueueWriteTextureCalls) {
+ // Copies to a single aspect are unsupported on OpenGL.
+ DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+ wgpu::TextureDescriptor textureDescriptor;
+ textureDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+ textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
+ textureDescriptor.size = {1, 1, 1};
+ wgpu::Texture depthStencilTexture1 = device.CreateTexture(&textureDescriptor);
+ wgpu::Texture depthStencilTexture2 = device.CreateTexture(&textureDescriptor);
+
+ constexpr uint8_t kExpectedData1 = 204u;
+ wgpu::ImageCopyTexture imageCopyTexture1 = utils::CreateImageCopyTexture(
+ depthStencilTexture1, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+ wgpu::TextureDataLayout textureDataLayout =
+ utils::CreateTextureDataLayout(0, sizeof(kExpectedData1));
+ queue.WriteTexture(&imageCopyTexture1, &kExpectedData1, sizeof(kExpectedData1),
+ &textureDataLayout, &textureDescriptor.size);
+
+ constexpr uint8_t kExpectedData2 = 205;
+ wgpu::ImageCopyTexture imageCopyTexture2 = utils::CreateImageCopyTexture(
+ depthStencilTexture2, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
+ queue.WriteTexture(&imageCopyTexture2, &kExpectedData2, sizeof(kExpectedData2),
+ &textureDataLayout, &textureDescriptor.size);
+
+ EXPECT_TEXTURE_EQ(&kExpectedData1, depthStencilTexture1, {0, 0}, {1, 1}, 0,
+ wgpu::TextureAspect::StencilOnly);
+ EXPECT_TEXTURE_EQ(&kExpectedData2, depthStencilTexture2, {0, 0}, {1, 1}, 0,
+ wgpu::TextureAspect::StencilOnly);
+}
+
DAWN_INSTANTIATE_TEST(QueueWriteTextureTests,
D3D12Backend(),
+ D3D12Backend({"d3d12_use_temp_buffer_in_depth_stencil_texture_and_buffer_"
+ "copy_with_non_zero_buffer_offset"}),
MetalBackend(),
OpenGLBackend(),
OpenGLESBackend(),