Update depth-stencil copy validation

Depth/stencil copies of partial subresources is disallowed in
WebGPU because this is a D3D12 restriction. This restriction
need also to be enforced on B2T, T2B and WriteTexture.

This CL also fixes the subresource whole size calucation to use
the mip level. Previously, the 0th level size was always used.

This CL updates the validation to be correct and adds tests.
The DepthStencilCopy tests are factored into smaller helpers to
reduce code duplication.

Bug: dawn:439
Change-Id: I45d4836f6be1707c5171bddef875e535e935f7f4
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/26660
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index db4968c..88717c7 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -56,26 +56,11 @@
             return {};
         }
 
-        MaybeError ValidateTextureSampleCountInCopyCommands(const TextureBase* texture) {
+        MaybeError ValidateTextureSampleCountInBufferCopyCommands(const TextureBase* texture) {
             if (texture->GetSampleCount() > 1) {
-                return DAWN_VALIDATION_ERROR("The sample count of textures must be 1");
-            }
-
-            return {};
-        }
-
-        MaybeError ValidateEntireSubresourceCopied(const TextureCopyView& src,
-                                                   const TextureCopyView& dst,
-                                                   const Extent3D& copySize) {
-            Extent3D srcSize = src.texture->GetSize();
-
-            ASSERT(src.texture->GetDimension() == wgpu::TextureDimension::e2D &&
-                   dst.texture->GetDimension() == wgpu::TextureDimension::e2D);
-            if (dst.origin.x != 0 || dst.origin.y != 0 || srcSize.width != copySize.width ||
-                srcSize.height != copySize.height) {
                 return DAWN_VALIDATION_ERROR(
-                    "The entire subresource must be copied when using a depth/stencil texture or "
-                    "when samples are greater than 1.");
+                    "The sample count of textures must be 1 when copying between buffers and "
+                    "textures");
             }
 
             return {};
@@ -90,10 +75,6 @@
             if (srcSamples != dstSamples) {
                 return DAWN_VALIDATION_ERROR(
                     "Source and destination textures must have matching sample counts.");
-            } else if (srcSamples > 1) {
-                // D3D12 requires entire subresource to be copied when using CopyTextureRegion when
-                // samples > 1.
-                DAWN_TRY(ValidateEntireSubresourceCopied(src, dst, copySize));
             }
 
             if (src.texture->GetFormat().format != dst.texture->GetFormat().format) {
@@ -107,12 +88,6 @@
                     "Texture aspect must be \"all\" for texture to texture copies");
             }
 
-            if (src.texture->GetFormat().HasDepthOrStencil()) {
-                // D3D12 requires entire subresource to be copied when using CopyTextureRegion is
-                // used with depth/stencil.
-                DAWN_TRY(ValidateEntireSubresourceCopied(src, dst, copySize));
-            }
-
             if (src.texture == dst.texture && src.mipLevel == dst.mipLevel) {
                 ASSERT(src.texture->GetDimension() == wgpu::TextureDimension::e2D &&
                        dst.texture->GetDimension() == wgpu::TextureDimension::e2D);
@@ -643,9 +618,9 @@
                 DAWN_TRY(ValidateBufferCopyView(GetDevice(), *source));
                 DAWN_TRY(ValidateCanUseAs(source->buffer, wgpu::BufferUsage::CopySrc));
 
-                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination));
+                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination, *copySize));
                 DAWN_TRY(ValidateCanUseAs(destination->texture, wgpu::TextureUsage::CopyDst));
-                DAWN_TRY(ValidateTextureSampleCountInCopyCommands(destination->texture));
+                DAWN_TRY(ValidateTextureSampleCountInBufferCopyCommands(destination->texture));
 
                 // We validate texture copy range before validating linear texture data,
                 // because in the latter we divide copyExtent.width by blockWidth and
@@ -699,9 +674,9 @@
                                              const Extent3D* copySize) {
         mEncodingContext.TryEncode(this, [&](CommandAllocator* allocator) -> MaybeError {
             if (GetDevice()->IsValidationEnabled()) {
-                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *source));
+                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *source, *copySize));
                 DAWN_TRY(ValidateCanUseAs(source->texture, wgpu::TextureUsage::CopySrc));
-                DAWN_TRY(ValidateTextureSampleCountInCopyCommands(source->texture));
+                DAWN_TRY(ValidateTextureSampleCountInBufferCopyCommands(source->texture));
 
                 DAWN_TRY(ValidateBufferCopyView(GetDevice(), *destination));
                 DAWN_TRY(ValidateCanUseAs(destination->buffer, wgpu::BufferUsage::CopyDst));
@@ -764,8 +739,8 @@
                 DAWN_TRY(ValidateTextureCopyRange(*source, *copySize));
                 DAWN_TRY(ValidateTextureCopyRange(*destination, *copySize));
 
-                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *source));
-                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination));
+                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *source, *copySize));
+                DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination, *copySize));
 
                 DAWN_TRY(ValidateCanUseAs(source->texture, wgpu::TextureUsage::CopySrc));
                 DAWN_TRY(ValidateCanUseAs(destination->texture, wgpu::TextureUsage::CopyDst));
diff --git a/src/dawn_native/CommandValidation.cpp b/src/dawn_native/CommandValidation.cpp
index 1184dc3..b412ab6 100644
--- a/src/dawn_native/CommandValidation.cpp
+++ b/src/dawn_native/CommandValidation.cpp
@@ -482,18 +482,20 @@
     }
 
     MaybeError ValidateTextureCopyView(DeviceBase const* device,
-                                       const TextureCopyView& textureCopy) {
-        DAWN_TRY(device->ValidateObject(textureCopy.texture));
-        if (textureCopy.mipLevel >= textureCopy.texture->GetNumMipLevels()) {
+                                       const TextureCopyView& textureCopy,
+                                       const Extent3D& copySize) {
+        const TextureBase* texture = textureCopy.texture;
+        DAWN_TRY(device->ValidateObject(texture));
+        if (textureCopy.mipLevel >= texture->GetNumMipLevels()) {
             return DAWN_VALIDATION_ERROR("mipLevel out of range");
         }
 
-        if (textureCopy.origin.x % textureCopy.texture->GetFormat().blockWidth != 0) {
+        if (textureCopy.origin.x % texture->GetFormat().blockWidth != 0) {
             return DAWN_VALIDATION_ERROR(
                 "Offset.x must be a multiple of compressed texture format block width");
         }
 
-        if (textureCopy.origin.y % textureCopy.texture->GetFormat().blockHeight != 0) {
+        if (textureCopy.origin.y % texture->GetFormat().blockHeight != 0) {
             return DAWN_VALIDATION_ERROR(
                 "Offset.y must be a multiple of compressed texture format block height");
         }
@@ -502,13 +504,13 @@
             case wgpu::TextureAspect::All:
                 break;
             case wgpu::TextureAspect::DepthOnly:
-                if ((textureCopy.texture->GetFormat().aspects & Aspect::Depth) == 0) {
+                if ((texture->GetFormat().aspects & Aspect::Depth) == 0) {
                     return DAWN_VALIDATION_ERROR(
                         "Texture does not have depth aspect for texture copy");
                 }
                 break;
             case wgpu::TextureAspect::StencilOnly:
-                if ((textureCopy.texture->GetFormat().aspects & Aspect::Stencil) == 0) {
+                if ((texture->GetFormat().aspects & Aspect::Stencil) == 0) {
                     return DAWN_VALIDATION_ERROR(
                         "Texture does not have stencil aspect for texture copy");
                 }
@@ -518,6 +520,18 @@
                 break;
         }
 
+        if (texture->GetSampleCount() > 1 || texture->GetFormat().HasDepthOrStencil()) {
+            Extent3D subresourceSize = texture->GetMipLevelPhysicalSize(textureCopy.mipLevel);
+            ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
+            if (textureCopy.origin.x != 0 || textureCopy.origin.y != 0 ||
+                subresourceSize.width != copySize.width ||
+                subresourceSize.height != copySize.height) {
+                return DAWN_VALIDATION_ERROR(
+                    "The entire subresource must be copied when using a depth/stencil texture, or "
+                    "when sample count is greater than 1.");
+            }
+        }
+
         return {};
     }
 
diff --git a/src/dawn_native/CommandValidation.h b/src/dawn_native/CommandValidation.h
index 0aaa398..eed2e6f 100644
--- a/src/dawn_native/CommandValidation.h
+++ b/src/dawn_native/CommandValidation.h
@@ -57,7 +57,8 @@
     MaybeError ValidateBufferCopyView(DeviceBase const* device,
                                       const BufferCopyView& bufferCopyView);
     MaybeError ValidateTextureCopyView(DeviceBase const* device,
-                                       const TextureCopyView& textureCopyView);
+                                       const TextureCopyView& textureCopyView,
+                                       const Extent3D& copySize);
 
     MaybeError ValidateRowsPerImage(const Format& format,
                                     uint32_t rowsPerImage,
diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp
index d187281..d658a2a 100644
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@@ -364,7 +364,7 @@
         DAWN_TRY(GetDevice()->ValidateObject(this));
         DAWN_TRY(GetDevice()->ValidateObject(destination->texture));
 
-        DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination));
+        DAWN_TRY(ValidateTextureCopyView(GetDevice(), *destination, *writeSize));
 
         if (dataLayout->offset > dataSize) {
             return DAWN_VALIDATION_ERROR("Queue::WriteTexture out of range");
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index 8e8cdd7..01d1433 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -72,28 +72,32 @@
             }
         }
 
-        bool CanUseCopyResource(const Texture* src, const Texture* dst, const Extent3D& copySize) {
+        bool CanUseCopyResource(const TextureCopy& src,
+                                const TextureCopy& dst,
+                                const Extent3D& copySize) {
             // Checked by validation
-            ASSERT(src->GetSampleCount() == dst->GetSampleCount());
-            ASSERT(src->GetFormat().format == dst->GetFormat().format);
+            ASSERT(src.texture->GetSampleCount() == dst.texture->GetSampleCount());
+            ASSERT(src.texture->GetFormat().format == dst.texture->GetFormat().format);
+            ASSERT(src.aspect == dst.aspect);
 
-            const Extent3D& srcSize = src->GetSize();
-            const Extent3D& dstSize = dst->GetSize();
+            const Extent3D& srcSize = src.texture->GetSize();
+            const Extent3D& dstSize = dst.texture->GetSize();
 
             // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copyresource
             // In order to use D3D12's copy resource, the textures must be the same dimensions, and
             // the copy must be of the entire resource.
             // TODO(dawn:129): Support 1D textures.
-            return src->GetDimension() == dst->GetDimension() &&  //
-                   dst->GetNumMipLevels() == 1 &&                 //
-                   src->GetNumMipLevels() == 1 &&      // A copy command is of a single mip, so if a
-                                                       // resource has more than one, we definitely
-                                                       // cannot use CopyResource.
-                   copySize.width == dstSize.width &&  //
-                   copySize.width == srcSize.width &&  //
-                   copySize.height == dstSize.height &&  //
-                   copySize.height == srcSize.height &&  //
-                   copySize.depth == dstSize.depth &&    //
+            return src.aspect == src.texture->GetFormat().aspects &&
+                   src.texture->GetDimension() == dst.texture->GetDimension() &&  //
+                   dst.texture->GetNumMipLevels() == 1 &&                         //
+                   src.texture->GetNumMipLevels() == 1 &&  // A copy command is of a single mip, so
+                                                           // if a resource has more than one, we
+                                                           // definitely cannot use CopyResource.
+                   copySize.width == dstSize.width &&      //
+                   copySize.width == srcSize.width &&      //
+                   copySize.height == dstSize.height &&    //
+                   copySize.height == srcSize.height &&    //
+                   copySize.depth == dstSize.depth &&      //
                    copySize.depth == srcSize.depth;
         }
 
@@ -789,7 +793,7 @@
                                                             wgpu::TextureUsage::CopyDst, dstRange);
 
                     ASSERT(srcRange.aspects == dstRange.aspects);
-                    if (CanUseCopyResource(source, destination, copy->copySize)) {
+                    if (CanUseCopyResource(copy->source, copy->destination, copy->copySize)) {
                         commandList->CopyResource(destination->GetD3D12Resource(),
                                                   source->GetD3D12Resource());
                     } else {
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index 1cf5028..d8bc79b 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -18,6 +18,7 @@
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/CommandValidation.h"
 #include "dawn_native/Commands.h"
+#include "dawn_native/EnumMaskIterator.h"
 #include "dawn_native/RenderBundle.h"
 #include "dawn_native/vulkan/BindGroupVk.h"
 #include "dawn_native/vulkan/BufferVk.h"
@@ -59,7 +60,8 @@
 
         VkImageCopy ComputeImageCopyRegion(const TextureCopy& srcCopy,
                                            const TextureCopy& dstCopy,
-                                           const Extent3D& copySize) {
+                                           const Extent3D& copySize,
+                                           Aspect aspect) {
             const Texture* srcTexture = ToBackend(srcCopy.texture.Get());
             const Texture* dstTexture = ToBackend(dstCopy.texture.Get());
 
@@ -68,7 +70,7 @@
             // TODO(jiawei.shao@intel.com): support 1D and 3D textures
             ASSERT(srcTexture->GetDimension() == wgpu::TextureDimension::e2D &&
                    dstTexture->GetDimension() == wgpu::TextureDimension::e2D);
-            region.srcSubresource.aspectMask = VulkanAspectMask(srcCopy.aspect);
+            region.srcSubresource.aspectMask = VulkanAspectMask(aspect);
             region.srcSubresource.mipLevel = srcCopy.mipLevel;
             region.srcSubresource.baseArrayLayer = srcCopy.origin.z;
             region.srcSubresource.layerCount = copySize.depth;
@@ -77,7 +79,7 @@
             region.srcOffset.y = srcCopy.origin.y;
             region.srcOffset.z = 0;
 
-            region.dstSubresource.aspectMask = VulkanAspectMask(dstCopy.aspect);
+            region.dstSubresource.aspectMask = VulkanAspectMask(aspect);
             region.dstSubresource.mipLevel = dstCopy.mipLevel;
             region.dstSubresource.baseArrayLayer = dstCopy.origin.z;
             region.dstSubresource.layerCount = copySize.depth;
@@ -616,13 +618,18 @@
                     if (!copyUsingTemporaryBuffer) {
                         VkImage srcImage = ToBackend(src.texture)->GetHandle();
                         VkImage dstImage = ToBackend(dst.texture)->GetHandle();
-                        VkImageCopy region = ComputeImageCopyRegion(src, dst, copy->copySize);
 
-                        // Dawn guarantees dstImage be in the TRANSFER_DST_OPTIMAL layout after the
-                        // copy command.
-                        device->fn.CmdCopyImage(commands, srcImage, VK_IMAGE_LAYOUT_GENERAL,
-                                                dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
-                                                &region);
+                        for (Aspect aspect : IterateEnumMask(src.texture->GetFormat().aspects)) {
+                            ASSERT(dst.texture->GetFormat().aspects & aspect);
+                            VkImageCopy region =
+                                ComputeImageCopyRegion(src, dst, copy->copySize, aspect);
+
+                            // Dawn guarantees dstImage be in the TRANSFER_DST_OPTIMAL layout after
+                            // the copy command.
+                            device->fn.CmdCopyImage(commands, srcImage, VK_IMAGE_LAYOUT_GENERAL,
+                                                    dstImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                                    1, &region);
+                        }
                     } else {
                         RecordCopyImageWithTemporaryBuffer(recordingContext, src, dst,
                                                            copy->copySize);
diff --git a/src/tests/end2end/DepthStencilCopyTests.cpp b/src/tests/end2end/DepthStencilCopyTests.cpp
index 6d2ed23..bd8ceb7 100644
--- a/src/tests/end2end/DepthStencilCopyTests.cpp
+++ b/src/tests/end2end/DepthStencilCopyTests.cpp
@@ -18,6 +18,7 @@
 #include "common/Constants.h"
 #include "common/Math.h"
 #include "utils/ComboRenderPipelineDescriptor.h"
+#include "utils/TestUtils.h"
 #include "utils/TextureFormatUtils.h"
 #include "utils/WGPUHelpers.h"
 
@@ -34,118 +35,517 @@
                                     vec2(-1.f,  0.f), vec2(0.f, -1.f), vec2( 0.f,  0.f));
         gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
     })");
-
-        mFragmentModule = utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
-    #version 450
-    void main() {
-        gl_FragDepth = 0.3;
-    })");
     }
 
-    static constexpr float kWrittenDepthValue = 0.3;
+    wgpu::Texture CreateDepthStencilTexture(uint32_t width,
+                                            uint32_t height,
+                                            wgpu::TextureUsage usage,
+                                            uint32_t mipLevelCount = 1) {
+        wgpu::TextureDescriptor texDescriptor = {};
+        texDescriptor.size = {width, height, 1};
+        texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+        texDescriptor.usage = usage;
+        texDescriptor.mipLevelCount = mipLevelCount;
+        return device.CreateTexture(&texDescriptor);
+    }
+
+    wgpu::Texture CreateDepthTexture(uint32_t width,
+                                     uint32_t height,
+                                     wgpu::TextureUsage usage,
+                                     uint32_t mipLevelCount = 1) {
+        wgpu::TextureDescriptor texDescriptor = {};
+        texDescriptor.size = {width, height, 1};
+        texDescriptor.format = wgpu::TextureFormat::Depth32Float;
+        texDescriptor.usage = usage;
+        texDescriptor.mipLevelCount = mipLevelCount;
+        return device.CreateTexture(&texDescriptor);
+    }
+
+    void PopulatePipelineDescriptorWriteDepth(utils::ComboRenderPipelineDescriptor* desc,
+                                              wgpu::TextureFormat format,
+                                              float regionDepth) {
+        desc->vertexStage.module = mVertexModule;
+
+        std::string fsSource = R"(
+    #version 450
+    void main() {
+        gl_FragDepth = )" + std::to_string(regionDepth) +
+                               ";\n}";
+
+        desc->cFragmentStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, fsSource.c_str());
+        desc->cDepthStencilState.format = format;
+        desc->cDepthStencilState.depthWriteEnabled = true;
+        desc->depthStencilState = &desc->cDepthStencilState;
+        desc->colorStateCount = 0;
+    }
+
+    // Initialize the depth/stencil values for the texture using a render pass.
+    // The texture will be cleared to the "clear" value, and then bottom left corner will
+    // be written with the "region" value.
+    void InitializeDepthTextureRegion(wgpu::Texture texture,
+                                      float clearDepth,
+                                      float regionDepth,
+                                      uint32_t mipLevel = 0) {
+        wgpu::TextureViewDescriptor viewDesc = {};
+        viewDesc.baseMipLevel = mipLevel;
+        viewDesc.mipLevelCount = 1;
+
+        utils::ComboRenderPassDescriptor renderPassDesc({}, texture.CreateView(&viewDesc));
+        renderPassDesc.cDepthStencilAttachmentInfo.clearDepth = clearDepth;
+
+        utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+        PopulatePipelineDescriptorWriteDepth(&renderPipelineDesc, wgpu::TextureFormat::Depth32Float,
+                                             regionDepth);
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+        pass.SetPipeline(pipeline);
+        pass.Draw(6);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+
+    // Initialize the depth/stencil values for the texture using a render pass.
+    // The texture will be cleared to the "clear" values, and then bottom left corner will
+    // be written with the "region" values.
+    void InitializeDepthStencilTextureRegion(wgpu::Texture texture,
+                                             float clearDepth,
+                                             float regionDepth,
+                                             uint8_t clearStencil,
+                                             uint8_t regionStencil,
+                                             uint32_t mipLevel = 0) {
+        wgpu::TextureViewDescriptor viewDesc = {};
+        viewDesc.baseMipLevel = mipLevel;
+        viewDesc.mipLevelCount = 1;
+
+        utils::ComboRenderPassDescriptor renderPassDesc({}, texture.CreateView(&viewDesc));
+        renderPassDesc.cDepthStencilAttachmentInfo.clearDepth = clearDepth;
+        renderPassDesc.cDepthStencilAttachmentInfo.clearStencil = clearStencil;
+
+        utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
+        PopulatePipelineDescriptorWriteDepth(&renderPipelineDesc,
+                                             wgpu::TextureFormat::Depth24PlusStencil8, regionDepth);
+
+        renderPipelineDesc.cDepthStencilState.stencilFront.passOp = wgpu::StencilOperation::Replace;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+
+        // Draw the quad (two triangles)
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
+        pass.SetPipeline(pipeline);
+        pass.SetStencilReference(regionStencil);
+        pass.Draw(6);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
+
+    wgpu::Texture CreateInitializeDepthStencilTextureAndCopyT2T(float clearDepth,
+                                                                float regionDepth,
+                                                                uint8_t clearStencil,
+                                                                uint8_t regionStencil,
+                                                                uint32_t width,
+                                                                uint32_t height,
+                                                                wgpu::TextureUsage usage,
+                                                                uint32_t mipLevel = 0) {
+        wgpu::Texture src = CreateDepthStencilTexture(
+            width, height, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc,
+            mipLevel + 1);
+
+        wgpu::Texture dst = CreateDepthStencilTexture(
+            width, height, usage | wgpu::TextureUsage::CopyDst, mipLevel + 1);
+
+        InitializeDepthStencilTextureRegion(src, clearDepth, regionDepth, clearStencil,
+                                            regionStencil, mipLevel);
+
+        // Perform a T2T copy of all aspects
+        {
+            wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+            wgpu::TextureCopyView srcView = utils::CreateTextureCopyView(src, mipLevel, {0, 0, 0});
+            wgpu::TextureCopyView dstView = utils::CreateTextureCopyView(dst, mipLevel, {0, 0, 0});
+            wgpu::Extent3D copySize = {width >> mipLevel, height >> mipLevel, 1};
+            commandEncoder.CopyTextureToTexture(&srcView, &dstView, &copySize);
+
+            wgpu::CommandBuffer commands = commandEncoder.Finish();
+            queue.Submit(1, &commands);
+        }
+
+        return dst;
+    }
+
+    // Check depth by uploading expected data to a sampled texture, writing it out as a depth
+    // attachment, and then using the "equals" depth test to check the contents are the same.
+    void ExpectDepthData(wgpu::Texture depthTexture,
+                         wgpu::TextureFormat depthFormat,
+                         uint32_t width,
+                         uint32_t height,
+                         uint32_t mipLevel,
+                         std::vector<float> expected) {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Make the color attachment that we'll use to read back.
+        wgpu::TextureDescriptor colorTexDesc = {};
+        colorTexDesc.size = {width, height, 1};
+        colorTexDesc.format = wgpu::TextureFormat::R32Uint;
+        colorTexDesc.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
+        wgpu::Texture colorTexture = device.CreateTexture(&colorTexDesc);
+
+        // Make a sampleable texture to store the depth data. We'll sample this in the
+        // shader to output depth.
+        wgpu::TextureDescriptor depthDataDesc = {};
+        depthDataDesc.size = {width, height, 1};
+        depthDataDesc.format = wgpu::TextureFormat::R32Float;
+        depthDataDesc.usage = wgpu::TextureUsage::Sampled | wgpu::TextureUsage::CopyDst;
+        wgpu::Texture depthDataTexture = device.CreateTexture(&depthDataDesc);
+
+        // Upload the depth data.
+        uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(wgpu::TextureFormat::R32Float, width);
+        wgpu::BufferDescriptor uploadBufferDesc = {};
+        uploadBufferDesc.size = utils::RequiredBytesInCopy(bytesPerRow, height, depthDataDesc.size,
+                                                           wgpu::TextureFormat::R32Float);
+        uploadBufferDesc.usage = wgpu::BufferUsage::CopySrc;
+        uploadBufferDesc.mappedAtCreation = true;
+
+        // TODO(enga): Use WriteTexture when implemented on OpenGL.
+        wgpu::Buffer uploadBuffer = device.CreateBuffer(&uploadBufferDesc);
+        uint8_t* dst = static_cast<uint8_t*>(uploadBuffer.GetMappedRange());
+        float* src = expected.data();
+        for (uint32_t y = 0; y < height; ++y) {
+            memcpy(dst, src, width * sizeof(float));
+            dst += bytesPerRow;
+            src += width;
+        }
+        uploadBuffer.Unmap();
+
+        wgpu::BufferCopyView bufferCopy =
+            utils::CreateBufferCopyView(uploadBuffer, 0, bytesPerRow, height);
+        wgpu::TextureCopyView textureCopy =
+            utils::CreateTextureCopyView(depthDataTexture, 0, {0, 0, 0}, wgpu::TextureAspect::All);
+        commandEncoder.CopyBufferToTexture(&bufferCopy, &textureCopy, &depthDataDesc.size);
+
+        // Pipeline for a full screen quad.
+        utils::ComboRenderPipelineDescriptor pipelineDescriptor(device);
+
+        pipelineDescriptor.vertexStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
+        #version 450
+        void main() {
+            const vec2 pos[3] = vec2[3](vec2(-1.f, -1.f), vec2(3.f, -1.f), vec2(-1.f, 3.f));
+                        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+            gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
+        })");
+
+        // Sample the input texture and write out depth. |result| will only be set to 1 if we
+        // pass the depth test.
+        pipelineDescriptor.cFragmentStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+        #version 450
+        layout(set = 0, binding = 0) uniform sampler sampler0;
+        layout(set = 0, binding = 1) uniform texture2D texture0;
+
+        layout(location = 0) out uint result;
+        void main() {
+            result = 1u;
+            gl_FragDepth = texelFetch(sampler2D(texture0, sampler0), ivec2(gl_FragCoord), 0)[0];
+        })");
+
+        // Pass the depth test only if the depth is equal.
+        pipelineDescriptor.primitiveTopology = wgpu::PrimitiveTopology::TriangleList;
+        pipelineDescriptor.depthStencilState = &pipelineDescriptor.cDepthStencilState;
+        pipelineDescriptor.cDepthStencilState.format = depthFormat;
+        pipelineDescriptor.cDepthStencilState.depthCompare = wgpu::CompareFunction::Equal;
+        pipelineDescriptor.cColorStates[0].format = colorTexDesc.format;
+
+        // TODO(jiawei.shao@intel.com): The Intel Mesa Vulkan driver can't set gl_FragDepth unless
+        // depthWriteEnabled == true. This either needs to be fixed in the driver or restricted by
+        // the WebGPU API.
+        pipelineDescriptor.cDepthStencilState.depthWriteEnabled = true;
+
+        wgpu::TextureViewDescriptor viewDesc = {};
+        viewDesc.baseMipLevel = mipLevel;
+        viewDesc.mipLevelCount = 1;
+
+        utils::ComboRenderPassDescriptor passDescriptor({colorTexture.CreateView()},
+                                                        depthTexture.CreateView(&viewDesc));
+        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
+        passDescriptor.cDepthStencilAttachmentInfo.stencilLoadOp = wgpu::LoadOp::Load;
+
+        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDescriptor);
+
+        // Bind a sampler and the depth data texture.
+        wgpu::SamplerDescriptor samplerDesc = {};
+        wgpu::BindGroup bindGroup = utils::MakeBindGroup(
+            device, pipeline.GetBindGroupLayout(0),
+            {{0, device.CreateSampler(&samplerDesc)}, {1, depthDataTexture.CreateView()}});
+
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroup);
+        pass.Draw(3);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+
+        std::vector<uint32_t> colorData(width * height, 1u);
+        EXPECT_TEXTURE_EQ(colorData.data(), colorTexture, 0, 0, width, height, 0, 0);
+    }
 
     wgpu::ShaderModule mVertexModule;
-    wgpu::ShaderModule mFragmentModule;
 };
 
 // Test copying the depth-only aspect into a buffer.
 TEST_P(DepthStencilCopyTests, FromDepthAspect) {
-    // Create a depth texture
     constexpr uint32_t kWidth = 4;
     constexpr uint32_t kHeight = 4;
-    wgpu::TextureDescriptor texDescriptor = {};
-    texDescriptor.size = {kWidth, kHeight, 1};
-    texDescriptor.format = wgpu::TextureFormat::Depth32Float;
-    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
-    wgpu::Texture depthTexture = device.CreateTexture(&texDescriptor);
 
-    // Create a render pass which clears depth to 0
-    utils::ComboRenderPassDescriptor renderPassDesc({}, depthTexture.CreateView());
-    renderPassDesc.cDepthStencilAttachmentInfo.clearDepth = 0.f;
+    wgpu::Texture depthTexture = CreateDepthTexture(
+        kWidth, kHeight, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc);
 
-    // Create a render pipeline to render a bottom-left quad with depth 0.3.
-    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
-    renderPipelineDesc.vertexStage.module = mVertexModule;
-    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
-    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
-    renderPipelineDesc.cDepthStencilState.depthWriteEnabled = true;
-    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
-    renderPipelineDesc.colorStateCount = 0;
+    InitializeDepthTextureRegion(depthTexture, 0.f, 0.3f);
 
-    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
-
-    // Draw the quad (two triangles)
-    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
-    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
-    pass.SetPipeline(pipeline);
-    pass.Draw(6);
-    pass.EndPass();
-
-    wgpu::CommandBuffer commands = commandEncoder.Finish();
-    queue.Submit(1, &commands);
-
-    // Only the bottom left quad has depth values
-    std::vector<float> expected = {
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    std::vector<float> expectedData = {
         0.0, 0.0, 0.0, 0.0,  //
         0.0, 0.0, 0.0, 0.0,  //
         0.3, 0.3, 0.0, 0.0,  //
         0.3, 0.3, 0.0, 0.0,  //
     };
-
-    // This expectation is the test as it performs the CopyTextureToBuffer.
-    EXPECT_TEXTURE_EQ(expected.data(), depthTexture, 0, 0, kWidth, kHeight, 0, 0,
+    EXPECT_TEXTURE_EQ(expectedData.data(), depthTexture, 0, 0, kWidth, kHeight, 0, 0,
                       wgpu::TextureAspect::DepthOnly);
 }
 
 // Test copying the stencil-only aspect into a buffer.
 TEST_P(DepthStencilCopyTests, FromStencilAspect) {
-    // Create a stencil texture
     constexpr uint32_t kWidth = 4;
     constexpr uint32_t kHeight = 4;
-    wgpu::TextureDescriptor texDescriptor = {};
-    texDescriptor.size = {kWidth, kHeight, 1};
-    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
-    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
-    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
 
-    // Create a render pass which clears the stencil to 0 on load.
-    utils::ComboRenderPassDescriptor renderPassDesc({}, depthStencilTexture.CreateView());
-    renderPassDesc.cDepthStencilAttachmentInfo.clearStencil = 0;
+    wgpu::Texture depthStencilTexture = CreateDepthStencilTexture(
+        kWidth, kHeight, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc);
 
-    // Create a render pipline which increments the stencil value for passing fragments.
-    // A quad is drawn in the bottom left.
-    utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
-    renderPipelineDesc.vertexStage.module = mVertexModule;
-    renderPipelineDesc.cFragmentStage.module = mFragmentModule;
-    renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
-    renderPipelineDesc.cDepthStencilState.depthWriteEnabled = true;
-    renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
-        wgpu::StencilOperation::IncrementClamp;
-    renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
-    renderPipelineDesc.colorStateCount = 0;
+    InitializeDepthStencilTextureRegion(depthStencilTexture, 0.f, 0.3f, 0u, 1u);
 
-    wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDesc);
-
-    // Draw the quad (two triangles)
-    wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
-    wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&renderPassDesc);
-    pass.SetPipeline(pipeline);
-    pass.Draw(6);
-    pass.EndPass();
-
-    wgpu::CommandBuffer commands = commandEncoder.Finish();
-    queue.Submit(1, &commands);
-
-    // Only the bottom left quad has stencil values
-    std::vector<uint8_t> expected = {
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    std::vector<uint8_t> expectedData = {
         0u, 0u, 0u, 0u,  //
         0u, 0u, 0u, 0u,  //
         1u, 1u, 0u, 0u,  //
         1u, 1u, 0u, 0u,  //
     };
+    EXPECT_TEXTURE_EQ(expectedData.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test copying the non-zero mip, stencil-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromNonZeroMipStencilAspect) {
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    wgpu::Texture depthStencilTexture = CreateDepthStencilTexture(
+        9, 9, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc, 2);
+
+    InitializeDepthStencilTextureRegion(depthStencilTexture, 0.f, 0.3f, 0u, 1u, 1u);
 
     // This expectation is the test as it performs the CopyTextureToBuffer.
-    EXPECT_TEXTURE_EQ(expected.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
+    std::vector<uint8_t> expectedData = {
+        0u, 0u, 0u, 0u,  //
+        0u, 0u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+        1u, 1u, 0u, 0u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), depthStencilTexture, 0, 0, 4, 4, 1, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test copying the non-zero mip, depth-only aspect into a buffer.
+TEST_P(DepthStencilCopyTests, FromNonZeroMipDepthAspect) {
+    wgpu::Texture depthTexture = CreateDepthTexture(
+        9, 9, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc, 2);
+
+    InitializeDepthTextureRegion(depthTexture, 0.f, 0.4f, 1);
+
+    // This expectation is the test as it performs the CopyTextureToBuffer.
+    std::vector<float> expectedData = {
+        0.0, 0.0, 0.0, 0.0,  //
+        0.0, 0.0, 0.0, 0.0,  //
+        0.4, 0.4, 0.0, 0.0,  //
+        0.4, 0.4, 0.0, 0.0,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), depthTexture, 0, 0, 4, 4, 1, 0,
+                      wgpu::TextureAspect::DepthOnly);
+}
+
+// Test copying both aspects in a T2T copy, then copying only stencil.
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyStencil) {
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    // Maybe has to do with the OutputAttachment usage. Notably, a later test
+    // T2TBothAspectsThenCopyNonRenderableStencil does not use OutputAttachment and works correctly.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, kWidth, kHeight,
+        wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::OutputAttachment);
+
+    // Check the stencil
+    std::vector<uint8_t> expectedData = {
+        1u, 1u, 1u, 1u,  //
+        1u, 1u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), texture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test that part of a non-renderable stencil aspect can be copied. Notably,
+// this test has different behavior on some platforms than T2TBothAspectsThenCopyStencil.
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyNonRenderableStencil) {
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, kWidth, kHeight, wgpu::TextureUsage::CopySrc);
+
+    // Check the stencil
+    std::vector<uint8_t> expectedData = {
+        1u, 1u, 1u, 1u,  //
+        1u, 1u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), texture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test that part of a non-renderable, non-zero mip stencil aspect can be copied. Notably,
+// this test has different behavior on some platforms than T2TBothAspectsThenCopyStencil.
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyNonRenderableNonZeroMipStencil) {
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    // Maybe has to do with the non-zero mip. Notably, a previous test
+    // T2TBothAspectsThenCopyNonRenderableStencil works correctly.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, 9, 9, wgpu::TextureUsage::CopySrc, 1);
+
+    // Check the stencil
+    std::vector<uint8_t> expectedData = {
+        1u, 1u, 1u, 1u,  //
+        1u, 1u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), texture, 0, 0, 4, 4, 1, 0,
+                      wgpu::TextureAspect::StencilOnly);
+}
+
+// Test copying both aspects in a T2T copy, then copying only depth.
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyDepth) {
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, kWidth, kHeight, wgpu::TextureUsage::OutputAttachment);
+
+    // Check the depth
+    ExpectDepthData(texture, wgpu::TextureFormat::Depth24PlusStencil8, kWidth, kHeight, 0,
+                    {
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                    });
+}
+
+// Test copying both aspects in a T2T copy, then copying only depth at a nonzero mip.
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyNonZeroMipDepth) {
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, 8, 8, wgpu::TextureUsage::OutputAttachment, 1);
+
+    // Check the depth
+    ExpectDepthData(texture, wgpu::TextureFormat::Depth24PlusStencil8, 4, 4, 1,
+                    {
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                    });
+}
+
+// Test copying both aspects in a T2T copy, then copying stencil, then copying depth
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyStencilThenDepth) {
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, kWidth, kHeight,
+        wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::OutputAttachment);
+
+    // Check the stencil
+    std::vector<uint8_t> expectedData = {
+        1u, 1u, 1u, 1u,  //
+        1u, 1u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), texture, 0, 0, kWidth, kHeight, 0, 0,
+                      wgpu::TextureAspect::StencilOnly);
+
+    // Check the depth
+    ExpectDepthData(texture, wgpu::TextureFormat::Depth24PlusStencil8, kWidth, kHeight, 0,
+                    {
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                    });
+}
+
+// Test copying both aspects in a T2T copy, then copying depth, then copying stencil
+TEST_P(DepthStencilCopyTests, T2TBothAspectsThenCopyDepthThenStencil) {
+    // TODO(enga): Figure out why this fails on MacOS Intel Iris.
+    // It passes on AMD Radeon Pro and Intel HD Graphics 630.
+    // It seems like the depth readback copy mutates the stencil because the previous
+    // test T2TBothAspectsThenCopyStencil passes.
+    // T2TBothAspectsThenCopyStencilThenDepth which checks stencil first also passes.
+    DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
+
+    constexpr uint32_t kWidth = 4;
+    constexpr uint32_t kHeight = 4;
+
+    wgpu::Texture texture = CreateInitializeDepthStencilTextureAndCopyT2T(
+        0.1f, 0.3f, 1u, 3u, kWidth, kHeight,
+        wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::OutputAttachment);
+
+    // Check the depth
+    ExpectDepthData(texture, wgpu::TextureFormat::Depth24PlusStencil8, kWidth, kHeight, 0,
+                    {
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.1, 0.1, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                        0.3, 0.3, 0.1, 0.1,  //
+                    });
+
+    // Check the stencil
+    std::vector<uint8_t> expectedData = {
+        1u, 1u, 1u, 1u,  //
+        1u, 1u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+        3u, 3u, 1u, 1u,  //
+    };
+    EXPECT_TEXTURE_EQ(expectedData.data(), texture, 0, 0, kWidth, kHeight, 0, 0,
                       wgpu::TextureAspect::StencilOnly);
 }
 
@@ -154,32 +554,36 @@
     // Copies to a single aspect are unsupported on OpenGL.
     DAWN_SKIP_TEST_IF(IsOpenGL());
 
-    // TODO(enga): Figure out why this fails on Vulkan Intel
-    // Results are shifted by 1 byte on Windows, and crash/hang on Linux.
-    DAWN_SKIP_TEST_IF(IsVulkan() && IsIntel());
-
     // TODO(enga): Figure out why this fails on MacOS Intel Iris.
     // It passes on AMD Radeon Pro and Intel HD Graphics 630.
     DAWN_SKIP_TEST_IF(IsMetal() && IsIntel());
 
+    // TODO(enga): Figure out why this fails on Windows Intel Vulkan.
+    // Reading back the depth does not work.
+    DAWN_SKIP_TEST_IF(IsWindows() && IsIntel() && IsVulkan());
+
     // Create a stencil texture
     constexpr uint32_t kWidth = 4;
     constexpr uint32_t kHeight = 4;
-    wgpu::TextureDescriptor texDescriptor = {};
-    texDescriptor.size = {kWidth, kHeight, 1};
-    texDescriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
-    texDescriptor.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc |
-                          wgpu::TextureUsage::CopyDst;
-    wgpu::Texture depthStencilTexture = device.CreateTexture(&texDescriptor);
 
-    // Bytes per row for the stencil data we will upload.
-    // TODO(enga): Use WriteTexture when implemented everywhere.
-    uint32_t bytesPerRow = Align(kWidth * sizeof(uint8_t), kTextureBytesPerRowAlignment);
+    wgpu::Texture depthStencilTexture =
+        CreateDepthStencilTexture(kWidth, kHeight,
+                                  wgpu::TextureUsage::OutputAttachment |
+                                      wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst);
 
-    wgpu::BufferDescriptor bufferDesc = {};
-    bufferDesc.usage = wgpu::BufferUsage::CopySrc;
-    bufferDesc.size = kHeight * bytesPerRow;
-    bufferDesc.mappedAtCreation = true;
+    {
+        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+
+        // Clear depth to 0.7, so we can check that the stencil copy doesn't mutate the depth.
+        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
+        passDescriptor.cDepthStencilAttachmentInfo.clearDepth = 0.7;
+
+        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
+        pass.EndPass();
+
+        wgpu::CommandBuffer commands = commandEncoder.Finish();
+        queue.Submit(1, &commands);
+    }
 
     std::vector<uint8_t> stencilData = {
         1u,  2u,  3u,  4u,   //
@@ -197,42 +601,30 @@
         12u, 13u, 15u, 16u,  //
     };
 
-    // Copy the stencil data into the buffer.
-    wgpu::Buffer buffer = device.CreateBuffer(&bufferDesc);
-    uint8_t* mappedData = static_cast<uint8_t*>(buffer.GetMappedRange());
-    for (uint32_t r = 0; r < kHeight; ++r) {
-        memcpy(mappedData + r * bytesPerRow, &stencilData[r * kWidth], kWidth);
-    }
-    buffer.Unmap();
+    // Upload the stencil data.
+    wgpu::TextureDataLayout stencilDataLayout = {};
+    stencilDataLayout.bytesPerRow = kWidth * sizeof(uint8_t);
 
-    {
-        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
+    wgpu::TextureCopyView stencilDataCopyView = utils::CreateTextureCopyView(
+        depthStencilTexture, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
 
-        // Clear depth to 0.7, so we can check that the stencil copy doesn't mutate the depth.
-        utils::ComboRenderPassDescriptor passDescriptor({}, depthStencilTexture.CreateView());
-        passDescriptor.cDepthStencilAttachmentInfo.clearDepth = 0.7;
+    wgpu::Extent3D writeSize = {kWidth, kHeight, 1};
+    queue.WriteTexture(&stencilDataCopyView, stencilData.data(),
+                       stencilData.size() * sizeof(uint8_t), &stencilDataLayout, &writeSize);
 
-        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
-        pass.EndPass();
-
-        // Copy from the buffer into the stencil aspect of the texture.
-        wgpu::BufferCopyView bufferCopy = utils::CreateBufferCopyView(buffer, 0, bytesPerRow, 0);
-        wgpu::TextureCopyView textureCopy = utils::CreateTextureCopyView(
-            depthStencilTexture, 0, {0, 0, 0}, wgpu::TextureAspect::StencilOnly);
-
-        commandEncoder.CopyBufferToTexture(&bufferCopy, &textureCopy, &texDescriptor.size);
-
-        wgpu::CommandBuffer commands = commandEncoder.Finish();
-        queue.Submit(1, &commands);
-    }
+    // Decrement the stencil value in a render pass to ensure the data is visible to the pipeline.
     {
         wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
         // Create a render pipline which decrements the stencil value for passing fragments.
         // A quad is drawn in the bottom left.
         utils::ComboRenderPipelineDescriptor renderPipelineDesc(device);
         renderPipelineDesc.vertexStage.module = mVertexModule;
-        renderPipelineDesc.cFragmentStage.module = mFragmentModule;
-        renderPipelineDesc.cDepthStencilState.format = texDescriptor.format;
+        renderPipelineDesc.cFragmentStage.module =
+            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
+    #version 450
+    void main() {
+    })");
+        renderPipelineDesc.cDepthStencilState.format = wgpu::TextureFormat::Depth24PlusStencil8;
         renderPipelineDesc.cDepthStencilState.stencilFront.passOp =
             wgpu::StencilOperation::DecrementClamp;
         renderPipelineDesc.depthStencilState = &renderPipelineDesc.cDepthStencilState;
@@ -256,69 +648,18 @@
         queue.Submit(1, &commands);
     }
 
-    // Copy back the stencil data and check it is the same.
+    // Copy back the stencil data and check it is correct.
     EXPECT_TEXTURE_EQ(expectedStencilData.data(), depthStencilTexture, 0, 0, kWidth, kHeight, 0, 0,
                       wgpu::TextureAspect::StencilOnly);
 
-    // Check that the depth buffer isn't changed.
-    // We do this by running executing a draw call that only passes the depth test if
-    // the depth is equal to the current depth buffer.
-    {
-        wgpu::CommandEncoder commandEncoder = device.CreateCommandEncoder();
-
-        // Make the color attachment that we'll use to read back.
-        wgpu::TextureDescriptor colorTexDesc = {};
-        colorTexDesc.size = {kWidth, kHeight, 1};
-        colorTexDesc.format = wgpu::TextureFormat::R32Uint;
-        colorTexDesc.usage = wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc;
-        wgpu::Texture colorTexture = device.CreateTexture(&colorTexDesc);
-
-        // Pipeline for a full screen quad.
-        utils::ComboRenderPipelineDescriptor pipelineDescriptor(device);
-
-        pipelineDescriptor.vertexStage.module =
-            utils::CreateShaderModule(device, utils::SingleShaderStage::Vertex, R"(
-    #version 450
-    void main() {
-        const vec2 pos[3] = vec2[3](vec2(-1.f, -1.f), vec2(3.f, -1.f), vec2(-1.f, 3.f));
-                    gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
-        gl_Position = vec4(pos[gl_VertexIndex], 0.f, 1.f);
-    })");
-
-        // Write out 0.7 for depth. This is the same canary value we wrote previously.
-        pipelineDescriptor.cFragmentStage.module =
-            utils::CreateShaderModule(device, utils::SingleShaderStage::Fragment, R"(
-    #version 450
-
-    layout(location = 0) out uint result;
-    void main() {
-        result = 1u;
-        gl_FragDepth = 0.7;
-    })");
-
-        // Pass the depth test only if the depth is equal.
-        pipelineDescriptor.primitiveTopology = wgpu::PrimitiveTopology::TriangleList;
-        pipelineDescriptor.depthStencilState = &pipelineDescriptor.cDepthStencilState;
-        pipelineDescriptor.cDepthStencilState.format = texDescriptor.format;
-        pipelineDescriptor.cDepthStencilState.depthCompare = wgpu::CompareFunction::Equal;
-        pipelineDescriptor.cColorStates[0].format = colorTexDesc.format;
-
-        utils::ComboRenderPassDescriptor passDescriptor({colorTexture.CreateView()},
-                                                        depthStencilTexture.CreateView());
-        passDescriptor.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Load;
-
-        wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&pipelineDescriptor);
-        wgpu::RenderPassEncoder pass = commandEncoder.BeginRenderPass(&passDescriptor);
-        pass.SetPipeline(pipeline);
-        pass.Draw(3);
-        pass.EndPass();
-
-        wgpu::CommandBuffer commands = commandEncoder.Finish();
-        queue.Submit(1, &commands);
-
-        std::vector<uint32_t> colorData(16, 1u);
-        EXPECT_TEXTURE_EQ(colorData.data(), colorTexture, 0, 0, kWidth, kHeight, 0, 0);
-    }
+    ExpectDepthData(depthStencilTexture, wgpu::TextureFormat::Depth24PlusStencil8, kWidth, kHeight,
+                    0,
+                    {
+                        0.7, 0.7, 0.7, 0.7,  //
+                        0.7, 0.7, 0.7, 0.7,  //
+                        0.7, 0.7, 0.7, 0.7,  //
+                        0.7, 0.7, 0.7, 0.7,  //
+                    });
 }
 
 DAWN_INSTANTIATE_TEST(DepthStencilCopyTests,
diff --git a/src/tests/end2end/NonzeroTextureCreationTests.cpp b/src/tests/end2end/NonzeroTextureCreationTests.cpp
index 97759af..c5b62b3 100644
--- a/src/tests/end2end/NonzeroTextureCreationTests.cpp
+++ b/src/tests/end2end/NonzeroTextureCreationTests.cpp
@@ -49,9 +49,6 @@
     // Copies from depth textures not supported on the OpenGL backend right now.
     DAWN_SKIP_TEST_IF(IsOpenGL());
 
-    // Closing the pending command list crashes flakily on D3D12 NVIDIA only.
-    DAWN_SKIP_TEST_IF(IsD3D12() && IsNvidia());
-
     wgpu::TextureDescriptor descriptor;
     descriptor.dimension = wgpu::TextureDimension::e2D;
     descriptor.size.width = kSize;
@@ -66,7 +63,8 @@
     // format.
     // TODO(crbug.com/dawn/145): Test other formats via sampling.
     wgpu::Texture texture = device.CreateTexture(&descriptor);
-    EXPECT_PIXEL_FLOAT_EQ(1.f, texture, 0, 0);
+    std::vector<float> expected(kSize * kSize, 1.f);
+    EXPECT_TEXTURE_EQ(expected.data(), texture, 0, 0, kSize, kSize, 0, 0);
 }
 
 // Test that non-zero mip level clears 0xFF because toggle is enabled.
diff --git a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
index c6beb1f..f858e4c 100644
--- a/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
+++ b/src/tests/unittests/validation/CopyCommandsValidationTests.cpp
@@ -792,6 +792,66 @@
         TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 0, {0, 0, 0},
                     {16, 16, 1}, wgpu::TextureAspect::StencilOnly);
     }
+
+    // A copy fails when using a depth/stencil texture, and the entire subresource isn't copied
+    {
+        uint64_t bufferSize = BufferSizeForTextureCopy(16, 16, 1, wgpu::TextureFormat::R8Uint);
+        wgpu::Buffer source = CreateBuffer(bufferSize, wgpu::BufferUsage::CopySrc);
+
+        wgpu::Texture destination =
+            Create2DTexture(16, 16, 1, 1, wgpu::TextureFormat::Depth24PlusStencil8,
+                            wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::OutputAttachment);
+
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 0, {0, 0, 0},
+                    {15, 15, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 0, {0, 0, 0},
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
+
+    // Non-zero mip: A copy fails when using a depth/stencil texture, and the entire subresource
+    // isn't copied
+    {
+        uint64_t bufferSize = BufferSizeForTextureCopy(8, 8, 1, wgpu::TextureFormat::R8Uint);
+        wgpu::Buffer source = CreateBuffer(bufferSize, wgpu::BufferUsage::CopySrc);
+
+        wgpu::Texture destination =
+            Create2DTexture(16, 16, 2, 1, wgpu::TextureFormat::Depth24PlusStencil8,
+                            wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::OutputAttachment);
+
+        // Whole mip is success
+        TestB2TCopy(utils::Expectation::Success, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {8, 8, 1}, wgpu::TextureAspect::StencilOnly);
+
+        // Partial mip fails
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {7, 7, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
+
+    // Non-zero mip, non-pow-2: A copy fails when using a depth/stencil texture, and the entire
+    // subresource isn't copied
+    {
+        uint64_t bufferSize = BufferSizeForTextureCopy(8, 8, 1, wgpu::TextureFormat::R8Uint);
+        wgpu::Buffer source = CreateBuffer(bufferSize, wgpu::BufferUsage::CopySrc);
+
+        wgpu::Texture destination =
+            Create2DTexture(17, 17, 2, 1, wgpu::TextureFormat::Depth24PlusStencil8,
+                            wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::OutputAttachment);
+
+        // Whole mip is success
+        TestB2TCopy(utils::Expectation::Success, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {8, 8, 1}, wgpu::TextureAspect::StencilOnly);
+
+        // Partial mip fails
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {7, 7, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestB2TCopy(utils::Expectation::Failure, source, 0, 256, 0, destination, 1, {0, 0, 0},
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
 }
 
 // Test that CopyB2T throws an error when requiredBytesInCopy overflows uint64_t
@@ -1240,6 +1300,55 @@
         TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0}, destination, 0, 256, 0,
                     {16, 16, 1}, wgpu::TextureAspect::StencilOnly);
     }
+
+    // A copy fails when using a depth/stencil texture, and the entire subresource isn't
+    // copied
+    {
+        wgpu::Texture source = Create2DTexture(
+            16, 16, 1, 1, wgpu::TextureFormat::Depth24PlusStencil8, wgpu::TextureUsage::CopySrc);
+
+        TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0}, destination, 0, 256, 0,
+                    {15, 15, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestT2BCopy(utils::Expectation::Failure, source, 0, {0, 0, 0}, destination, 0, 256, 0,
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
+
+    // Non-zero mip: A copy fails when using a depth/stencil texture, and the entire
+    // subresource isn't copied
+    {
+        wgpu::Texture source = Create2DTexture(
+            16, 16, 2, 1, wgpu::TextureFormat::Depth24PlusStencil8, wgpu::TextureUsage::CopySrc);
+
+        // Whole mip is success
+        TestT2BCopy(utils::Expectation::Success, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {8, 8, 1}, wgpu::TextureAspect::StencilOnly);
+
+        // Partial mip fails
+        TestT2BCopy(utils::Expectation::Failure, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {7, 7, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestT2BCopy(utils::Expectation::Failure, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
+
+    // Non-zero mip, non-pow-2: A copy fails when using a depth/stencil texture, and the
+    // entire subresource isn't copied
+    {
+        wgpu::Texture source = Create2DTexture(
+            17, 17, 2, 1, wgpu::TextureFormat::Depth24PlusStencil8, wgpu::TextureUsage::CopySrc);
+
+        // Whole mip is success
+        TestT2BCopy(utils::Expectation::Success, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {8, 8, 1}, wgpu::TextureAspect::StencilOnly);
+
+        // Partial mip fails
+        TestT2BCopy(utils::Expectation::Failure, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {7, 7, 1}, wgpu::TextureAspect::StencilOnly);
+
+        TestT2BCopy(utils::Expectation::Failure, source, 1, {0, 0, 0}, destination, 0, 256, 0,
+                    {1, 1, 1}, wgpu::TextureAspect::StencilOnly);
+    }
 }
 
 // Test that CopyT2B throws an error when requiredBytesInCopy overflows uint64_t
@@ -1400,6 +1509,7 @@
 TEST_F(CopyCommandTest_T2T, 2DTextureDepthStencil) {
     wgpu::Texture source = Create2DTexture(16, 16, 1, 1, wgpu::TextureFormat::Depth24PlusStencil8,
                                            wgpu::TextureUsage::CopySrc);
+
     wgpu::Texture destination = Create2DTexture(
         16, 16, 1, 1, wgpu::TextureFormat::Depth24PlusStencil8, wgpu::TextureUsage::CopyDst);
 
diff --git a/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp b/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp
index cba3174..dc3c425 100644
--- a/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp
+++ b/src/tests/unittests/validation/QueueWriteTextureValidationTests.cpp
@@ -499,6 +499,11 @@
             ASSERT_DEVICE_ERROR(TestWriteTexture(dataSize - 1, 0, bytesPerRow, 0, destination, 0,
                                                  {0, 0, 0}, {4, 4, 1},
                                                  wgpu::TextureAspect::StencilOnly));
+
+            // It is invalid to write just part of the subresource size
+            ASSERT_DEVICE_ERROR(TestWriteTexture(dataSize, 0, bytesPerRow, 0, destination, 0,
+                                                 {0, 0, 0}, {3, 3, 1},
+                                                 wgpu::TextureAspect::StencilOnly));
         }
 
         // It is invalid to write into the stencil aspect of depth24plus (no stencil)