d3d11: Support depth-stencil texture write.

This first copies the dest texture to a staging texture, then writes
to the staging texture, and finally copies back to the dest texture.

Bug: dawn:1924
Bug: dawn:1848
Bug: dawn:1705

Change-Id: Ie7a8f4d9098c613db4c96b0a1dfb221285bb7815
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/141860
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
index d30825e..aa490bf 100644
--- a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
@@ -233,7 +233,6 @@
 void PhysicalDevice::SetupBackendDeviceToggles(TogglesState* deviceToggles) const {
     // D3D11 can only clear RTV with float values.
     deviceToggles->Default(Toggle::ApplyClearBigIntegerColorValueWithDraw, true);
-    // TODO(dawn:1848): Support depth-stencil texture write.
     deviceToggles->Default(Toggle::UseBlitForBufferToStencilTextureCopy, true);
 }
 
diff --git a/src/dawn/native/d3d11/TextureD3D11.cpp b/src/dawn/native/d3d11/TextureD3D11.cpp
index b210de6..72c3eff 100644
--- a/src/dawn/native/d3d11/TextureD3D11.cpp
+++ b/src/dawn/native/d3d11/TextureD3D11.cpp
@@ -89,6 +89,39 @@
     }
 }
 
+// The memory layout of depth or stencil component inside a texel of depth-stencil format.
+struct DepthStencilAspectLayout {
+    // Texel size of a depth/stencil DXGI format in bytes.
+    uint32_t texelSize = 0u;
+    // Depth/Stencil component offset inside the texel in bytes.
+    uint32_t componentOffset = 0u;
+    // Depth/Stencil component size in bytes.
+    uint32_t componentSize = 0u;
+};
+
+DepthStencilAspectLayout DepthStencilAspectLayout(DXGI_FORMAT format, Aspect aspect) {
+    ASSERT(aspect == Aspect::Depth || aspect == Aspect::Stencil);
+    uint32_t texelSize = 0u;
+    uint32_t componentOffset = 0u;
+    uint32_t componentSize = 0u;
+
+    switch (format) {
+        case DXGI_FORMAT_D24_UNORM_S8_UINT:
+            componentOffset = aspect == Aspect::Stencil ? 3u : 0u;
+            componentSize = aspect == Aspect::Stencil ? 1u : 3u;
+            texelSize = 4u;
+            break;
+        case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+            componentOffset = aspect == Aspect::Stencil ? 4u : 0u;
+            componentSize = aspect == Aspect::Stencil ? 1u : 4u;
+            texelSize = 8u;
+            break;
+        default:
+            UNREACHABLE();
+    }
+    return {texelSize, componentOffset, componentSize};
+}
+
 }  // namespace
 
 MaybeError ValidateTextureCanBeWrapped(ID3D11Resource* d3d11Resource,
@@ -227,6 +260,10 @@
         GetFormat().HasDepthOrStencil() && (GetUsage() & wgpu::TextureUsage::TextureBinding);
     // We need to use the typeless format if view format reinterpretation is required.
     needsTypelessFormat |= GetViewFormats().any();
+    // We need to use the typeless format if it's a staging texture for writting to depth-stencil
+    // textures.
+    needsTypelessFormat |=
+        d3d::IsDepthStencil(d3d::DXGITextureFormat(GetFormat().format)) && mKind == Kind::Staging;
     desc.Format = needsTypelessFormat ? d3d::DXGITypelessTextureFormat(GetFormat().format)
                                       : d3d::DXGITextureFormat(GetFormat().format);
     desc.Usage = mKind == Kind::Staging ? D3D11_USAGE_STAGING : D3D11_USAGE_DEFAULT;
@@ -662,10 +699,12 @@
                                   uint32_t bytesPerRow,
                                   uint32_t rowsPerImage) {
     ASSERT(size.width != 0 && size.height != 0 && size.depthOrArrayLayers != 0);
+    ASSERT(subresources.levelCount == 1);
 
     if (d3d::IsDepthStencil(d3d::DXGITextureFormat(GetFormat().format))) {
-        // TODO(dawn:1848): support depth-stencil texture write
-        return DAWN_UNIMPLEMENTED_ERROR("Write combined depth/stencil textures");
+        DAWN_TRY(WriteDepthStencilInternal(commandContext, subresources, origin, size, data,
+                                           bytesPerRow, rowsPerImage));
+        return {};
     }
 
     D3D11_BOX dstBox;
@@ -699,6 +738,97 @@
     return {};
 }
 
+MaybeError Texture::WriteDepthStencilInternal(CommandRecordingContext* commandContext,
+                                              const SubresourceRange& subresources,
+                                              const Origin3D& origin,
+                                              const Extent3D& size,
+                                              const uint8_t* data,
+                                              uint32_t bytesPerRow,
+                                              uint32_t rowsPerImage) {
+    TextureDescriptor desc = {};
+    desc.label = "WriteStencilTextureStaging";
+    desc.dimension = GetDimension();
+    desc.size = size;
+    desc.format = GetFormat().format;
+    desc.mipLevelCount = 1;
+    desc.sampleCount = GetSampleCount();
+
+    Ref<Texture> stagingTexture;
+    DAWN_TRY_ASSIGN(stagingTexture, CreateInternal(ToBackend(GetDevice()), &desc, Kind::Staging));
+
+    // Depth-stencil subresources can only be written to completely and not partially.
+    ASSERT(IsCompleteSubresourceCopiedTo(this, size, subresources.baseMipLevel));
+
+    SubresourceRange otherRange = subresources;
+    Aspect otherAspects = GetFormat().aspects & ~subresources.aspects;
+    ASSERT(HasZeroOrOneBits(otherAspects));
+    otherRange.aspects = otherAspects;
+    // We need to copy the texture over if the other aspect is present and initialized so that it is
+    // preserved during the write.
+    bool shouldCopyExistingDataFirst =
+        HasOneBit(otherAspects) && IsSubresourceContentInitialized(otherRange);
+
+    if (shouldCopyExistingDataFirst) {
+        // Copy the dest texture to a staging texture.
+        CopyTextureToTextureCmd copyCmd;
+        copyCmd.source.texture = this;
+        copyCmd.source.origin = origin;
+        copyCmd.source.mipLevel = subresources.baseMipLevel;
+        copyCmd.source.aspect = Aspect::CombinedDepthStencil;
+        copyCmd.destination.texture = stagingTexture.Get();
+        copyCmd.destination.origin = {0, 0, 0};
+        copyCmd.destination.mipLevel = 0;
+        copyCmd.destination.aspect = Aspect::CombinedDepthStencil;
+        copyCmd.copySize = size;
+        DAWN_TRY(Texture::CopyInternal(commandContext, &copyCmd));
+    }
+
+    const auto aspectLayout =
+        DepthStencilAspectLayout(d3d::DXGITextureFormat(GetFormat().format), subresources.aspects);
+
+    // Map and write to the staging texture.
+    ID3D11DeviceContext1* d3d11DeviceContext1 = commandContext->GetD3D11DeviceContext1();
+    D3D11_MAPPED_SUBRESOURCE mappedResource;
+    const uint8_t* pSrcData = data;
+    for (uint32_t layer = 0; layer < size.depthOrArrayLayers; ++layer) {
+        DAWN_TRY(CheckHRESULT(d3d11DeviceContext1->Map(stagingTexture->GetD3D11Resource(), layer,
+                                                       D3D11_MAP_READ, 0, &mappedResource),
+                              "D3D11 map staging texture"));
+        uint8_t* pDstData = static_cast<uint8_t*>(mappedResource.pData);
+        for (uint32_t y = 0; y < size.height; ++y) {
+            const uint8_t* pSrcRow = pSrcData;
+            uint8_t* pDstRow = pDstData;
+            pDstRow += aspectLayout.componentOffset;
+            for (uint32_t x = 0; x < size.width; ++x) {
+                std::memcpy(pDstRow, pSrcRow, aspectLayout.componentSize);
+                pDstRow += aspectLayout.texelSize;
+                pSrcRow += aspectLayout.componentSize;
+            }
+            pDstData += mappedResource.RowPitch;
+            pSrcData += bytesPerRow;
+        }
+        d3d11DeviceContext1->Unmap(stagingTexture->GetD3D11Resource(), layer);
+        ASSERT(size.height <= rowsPerImage);
+        // Skip the padding rows.
+        pSrcData += (rowsPerImage - size.height) * bytesPerRow;
+    }
+
+    // Copy to the dest texture from the staging texture.
+    CopyTextureToTextureCmd copyCmd;
+    copyCmd.source.texture = stagingTexture.Get();
+    copyCmd.source.origin = {0, 0, 0};
+    copyCmd.source.mipLevel = 0;
+    copyCmd.source.aspect = Aspect::CombinedDepthStencil;
+    copyCmd.destination.texture = this;
+    copyCmd.destination.origin = origin;
+    copyCmd.destination.mipLevel = subresources.baseMipLevel;
+    copyCmd.destination.aspect = Aspect::CombinedDepthStencil;
+    copyCmd.copySize = size;
+    DAWN_TRY(Texture::CopyInternal(commandContext, &copyCmd));
+
+    return {};
+}
+
 MaybeError Texture::ReadStaging(CommandRecordingContext* commandContext,
                                 const SubresourceRange& subresources,
                                 const Origin3D& origin,
@@ -738,31 +868,18 @@
             } else if (hasStencil) {
                 // We need to read texel by texel for depth-stencil formats.
                 std::vector<uint8_t> depthOrStencilData(size.width * blockInfo.byteSize);
-                // Element size of a depth/stencil DXGI format in bytes.
-                uint32_t depthOrStencilStride = 0u;
-                // Depth/Stencil component offset inside the element in bytes.
-                uint32_t depthOrStencilOffset = 0u;
-                switch (d3d::DXGITextureFormat(GetFormat().format)) {
-                    case DXGI_FORMAT_D24_UNORM_S8_UINT:
-                        depthOrStencilOffset = subresources.aspects == Aspect::Stencil ? 3u : 0u;
-                        depthOrStencilStride = 4u;
-                        break;
-                    case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
-                        depthOrStencilOffset = subresources.aspects == Aspect::Stencil ? 4u : 0u;
-                        depthOrStencilStride = 8u;
-                        break;
-                    default:
-                        UNREACHABLE();
-                }
+                const auto aspectLayout = DepthStencilAspectLayout(
+                    d3d::DXGITextureFormat(GetFormat().format), subresources.aspects);
+                ASSERT(blockInfo.byteSize == aspectLayout.componentSize);
                 for (uint32_t y = 0; y < rowsPerImage; ++y) {
                     // Filter the depth/stencil data out.
                     uint8_t* src = pSrcData;
                     uint8_t* dst = depthOrStencilData.data();
-                    src += depthOrStencilOffset;
+                    src += aspectLayout.componentOffset;
                     for (uint32_t x = 0; x < size.width; ++x) {
-                        std::memcpy(dst, src, blockInfo.byteSize);
-                        src += depthOrStencilStride;
-                        dst += blockInfo.byteSize;
+                        std::memcpy(dst, src, aspectLayout.componentSize);
+                        src += aspectLayout.texelSize;
+                        dst += aspectLayout.componentSize;
                     }
                     DAWN_TRY(callback(depthOrStencilData.data(), dstOffset, bytesPerRow));
                     dstOffset += dstBytesPerRow;
@@ -882,6 +999,20 @@
                      ->EnsureSubresourceContentInitialized(commandContext, dstSubresources));
     }
 
+    DAWN_TRY(CopyInternal(commandContext, copy));
+
+    return {};
+}
+
+// static
+MaybeError Texture::CopyInternal(CommandRecordingContext* commandContext,
+                                 CopyTextureToTextureCmd* copy) {
+    auto& src = copy->source;
+    auto& dst = copy->destination;
+
+    SubresourceRange srcSubresources = GetSubresourcesAffectedByCopy(src, copy->copySize);
+    SubresourceRange dstSubresources = GetSubresourcesAffectedByCopy(dst, copy->copySize);
+
     D3D11_BOX srcBox;
     srcBox.left = src.origin.x;
     srcBox.right = src.origin.x + copy->copySize.width;
diff --git a/src/dawn/native/d3d11/TextureD3D11.h b/src/dawn/native/d3d11/TextureD3D11.h
index 659748b..7eb2788 100644
--- a/src/dawn/native/d3d11/TextureD3D11.h
+++ b/src/dawn/native/d3d11/TextureD3D11.h
@@ -135,6 +135,8 @@
                            uint32_t bytesPerRow,
                            uint32_t rowsPerImage,
                            ReadCallback callback);
+
+    // Write the texture without the content initialization bookkeeping.
     MaybeError WriteInternal(CommandRecordingContext* commandContext,
                              const SubresourceRange& subresources,
                              const Origin3D& origin,
@@ -143,6 +145,19 @@
                              uint32_t bytesPerRow,
                              uint32_t rowsPerImage);
 
+    // Write the depth-stencil texture without the content initialization bookkeeping.
+    MaybeError WriteDepthStencilInternal(CommandRecordingContext* commandContext,
+                                         const SubresourceRange& subresources,
+                                         const Origin3D& origin,
+                                         const Extent3D& size,
+                                         const uint8_t* data,
+                                         uint32_t bytesPerRow,
+                                         uint32_t rowsPerImage);
+
+    // Copy the textures without the content initialization bookkeeping.
+    static MaybeError CopyInternal(CommandRecordingContext* commandContext,
+                                   CopyTextureToTextureCmd* copy);
+
     const Kind mKind = Kind::Normal;
     ComPtr<ID3D11Resource> mD3d11Resource;
 };
diff --git a/src/dawn/tests/DawnTest.cpp b/src/dawn/tests/DawnTest.cpp
index 695642d..07eff47 100644
--- a/src/dawn/tests/DawnTest.cpp
+++ b/src/dawn/tests/DawnTest.cpp
@@ -856,6 +856,11 @@
                                      mParam.adapterProperties.deviceID);
 }
 
+bool DawnTestBase::IsIntelGen9() const {
+    return gpu_info::IsIntelGen9(mParam.adapterProperties.vendorID,
+                                 mParam.adapterProperties.deviceID);
+}
+
 bool DawnTestBase::IsIntelGen12() const {
     return gpu_info::IsIntelGen12LP(mParam.adapterProperties.vendorID,
                                     mParam.adapterProperties.deviceID) ||
diff --git a/src/dawn/tests/DawnTest.h b/src/dawn/tests/DawnTest.h
index 7ce48db..8bdbe0a 100644
--- a/src/dawn/tests/DawnTest.h
+++ b/src/dawn/tests/DawnTest.h
@@ -249,6 +249,7 @@
     bool IsANGLESwiftShader() const;
     bool IsWARP() const;
 
+    bool IsIntelGen9() const;
     bool IsIntelGen12() const;
 
     bool IsWindows() const;
diff --git a/src/dawn/tests/end2end/CopyTests.cpp b/src/dawn/tests/end2end/CopyTests.cpp
index d376935..9e8bac3 100644
--- a/src/dawn/tests/end2end/CopyTests.cpp
+++ b/src/dawn/tests/end2end/CopyTests.cpp
@@ -2748,11 +2748,6 @@
     DAWN_TEST_UNSUPPORTED_IF(GetParam().mTextureFormat == wgpu::TextureFormat::Stencil8 &&
                              (IsOpenGL() || IsOpenGLES()));
 
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_TEST_UNSUPPORTED_IF(
-        GetParam().mTextureFormat == wgpu::TextureFormat::Stencil8 &&
-        GetParam().mInitializationMethod == InitializationMethod::WriteTexture && IsD3D11());
-
     wgpu::TextureFormat format = GetParam().mTextureFormat;
 
     const uint32_t texelBlockSize = utils::GetTexelBlockSizeInBytes(format);
diff --git a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp
index b4532bc..17db845 100644
--- a/src/dawn/tests/end2end/DepthStencilCopyTests.cpp
+++ b/src/dawn/tests/end2end/DepthStencilCopyTests.cpp
@@ -1278,9 +1278,6 @@
     DAWN_TEST_UNSUPPORTED_IF(IsOpenGL());
     DAWN_TEST_UNSUPPORTED_IF(IsOpenGLES());
 
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
-
     // Create a stencil texture
     constexpr uint32_t kWidth = 4;
     constexpr uint32_t kHeight = 4;
diff --git a/src/dawn/tests/end2end/MultithreadTests.cpp b/src/dawn/tests/end2end/MultithreadTests.cpp
index 3ebb857..9881ca2 100644
--- a/src/dawn/tests/end2end/MultithreadTests.cpp
+++ b/src/dawn/tests/end2end/MultithreadTests.cpp
@@ -879,8 +879,8 @@
     // stencil.
     DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("disable_depth_stencil_read"));
 
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
+    // TODO(dawn:1924): Intel Gen9 specific.
+    DAWN_SUPPRESS_TEST_IF(IsD3D11() && IsIntelGen9());
 
     enum class Step {
         Begin,
diff --git a/src/dawn/tests/end2end/QueueTests.cpp b/src/dawn/tests/end2end/QueueTests.cpp
index 52c3e1f..0cbd7c7 100644
--- a/src/dawn/tests/end2end/QueueTests.cpp
+++ b/src/dawn/tests/end2end/QueueTests.cpp
@@ -644,9 +644,6 @@
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
 
-    // TODO(dawn:1848): Support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
-
     wgpu::TextureDescriptor textureDescriptor;
     textureDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
     textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
@@ -751,9 +748,6 @@
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
 
-    // TODO(dawn:1848): Support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
-
     wgpu::TextureDescriptor textureDescriptor;
     textureDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
     textureDescriptor.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
diff --git a/src/dawn/tests/end2end/TextureZeroInitTests.cpp b/src/dawn/tests/end2end/TextureZeroInitTests.cpp
index 18d2567..7d79db7 100644
--- a/src/dawn/tests/end2end/TextureZeroInitTests.cpp
+++ b/src/dawn/tests/end2end/TextureZeroInitTests.cpp
@@ -794,8 +794,6 @@
 TEST_P(TextureZeroInitTest, StencilCopyThenDiscardAndReadBySampling) {
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
 
     for (wgpu::TextureFormat format :
          {wgpu::TextureFormat::Stencil8, wgpu::TextureFormat::Depth24PlusStencil8}) {
@@ -827,8 +825,6 @@
 TEST_P(TextureZeroInitTest, StencilCopyThenDiscardAndReadByCopy) {
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
 
     for (wgpu::TextureFormat format :
          {wgpu::TextureFormat::Stencil8, wgpu::TextureFormat::Depth24PlusStencil8}) {
@@ -862,8 +858,6 @@
 TEST_P(TextureZeroInitTest, StencilCopyThenDiscardAndCopyToTextureThenReadByCopy) {
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
-    // TODO(dawn:1848): support depth-stencil texture write on D3D11.
-    DAWN_SUPPRESS_TEST_IF(IsD3D11());
 
     for (wgpu::TextureFormat format :
          {wgpu::TextureFormat::Stencil8, wgpu::TextureFormat::Depth24PlusStencil8}) {