Support copying multiple array layers in one B2T and T2B copy command

This patch adds the support of copying with multiple texture array
layers in one buffer-to-texture and texture-to-buffer copy command.

BUG=dawn:453
TEST=dawn_end2end_tests

Change-Id: If009dbb29f2b0ef0667715eed0d66053b1491fd4
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23248
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
diff --git a/src/dawn_native/CommandBuffer.cpp b/src/dawn_native/CommandBuffer.cpp
index 3feebb4..401451d 100644
--- a/src/dawn_native/CommandBuffer.cpp
+++ b/src/dawn_native/CommandBuffer.cpp
@@ -44,8 +44,8 @@
                                        const uint32_t mipLevel) {
         Extent3D extent = texture->GetMipLevelPhysicalSize(mipLevel);
 
-        if (extent.depth == copySize.depth && extent.width == copySize.width &&
-            extent.height == copySize.height) {
+        ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
+        if (extent.width == copySize.width && extent.height == copySize.height) {
             return true;
         }
         return false;
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index b4d171a..a0df3b6 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -577,41 +577,59 @@
                     Texture* texture = ToBackend(copy->destination.texture.Get());
 
                     ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    SubresourceRange subresource = SubresourceRange::SingleSubresource(
-                        copy->destination.mipLevel, copy->destination.arrayLayer);
+                    // TODO(jiawei.shao@intel.com): use copy->destination.origin.z instead of
+                    // copy->destination.arrayLayer once GPUTextureCopyView.arrayLayer to
+                    // GPUTextureCopyView.origin.z is done.
+                    SubresourceRange subresources = {copy->destination.mipLevel, 1,
+                                                     copy->destination.arrayLayer,
+                                                     copy->copySize.depth};
                     if (IsCompleteSubresourceCopiedTo(texture, copy->copySize,
                                                       copy->destination.mipLevel)) {
-                        texture->SetIsSubresourceContentInitialized(true, subresource);
+                        texture->SetIsSubresourceContentInitialized(true, subresources);
                     } else {
-                        texture->EnsureSubresourceContentInitialized(commandContext, subresource);
+                        texture->EnsureSubresourceContentInitialized(commandContext, subresources);
                     }
 
                     buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopySrc);
                     texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopyDst,
-                                                        subresource);
+                                                        subresources);
 
-                    auto copySplit = ComputeTextureCopySplit(
-                        copy->destination.origin, copy->copySize, texture->GetFormat(),
-                        copy->source.offset, copy->source.bytesPerRow, copy->source.rowsPerImage);
+                    const uint64_t bytesPerSlice =
+                        copy->source.bytesPerRow * copy->source.rowsPerImage;
 
-                    D3D12_TEXTURE_COPY_LOCATION textureLocation =
-                        ComputeTextureCopyLocationForTexture(texture, copy->destination.mipLevel,
-                                                             copy->destination.arrayLayer);
+                    const dawn_native::Extent3D copyOneLayerSize = {copy->copySize.width,
+                                                                    copy->copySize.height, 1};
+                    uint64_t bufferOffsetForNextSlice = 0;
+                    for (uint32_t copySlice = copy->destination.arrayLayer;
+                         copySlice < copy->destination.arrayLayer + copy->copySize.depth;
+                         ++copySlice) {
+                        // TODO(jiawei.shao@intel.com): compute copySplit once for all texture array
+                        // layers when possible.
+                        auto copySplit = ComputeTextureCopySplit(
+                            copy->destination.origin, copyOneLayerSize, texture->GetFormat(),
+                            bufferOffsetForNextSlice + copy->source.offset,
+                            copy->source.bytesPerRow, copy->source.rowsPerImage);
 
-                    for (uint32_t i = 0; i < copySplit.count; ++i) {
-                        TextureCopySplit::CopyInfo& info = copySplit.copies[i];
+                        D3D12_TEXTURE_COPY_LOCATION textureLocation =
+                            ComputeTextureCopyLocationForTexture(
+                                texture, copy->destination.mipLevel, copySlice);
 
-                        D3D12_TEXTURE_COPY_LOCATION bufferLocation =
-                            ComputeBufferLocationForCopyTextureRegion(
-                                texture, buffer->GetD3D12Resource().Get(), info.bufferSize,
-                                copySplit.offset, copy->source.bytesPerRow);
-                        D3D12_BOX sourceRegion =
-                            ComputeD3D12BoxFromOffsetAndSize(info.bufferOffset, info.copySize);
+                        for (uint32_t i = 0; i < copySplit.count; ++i) {
+                            const TextureCopySplit::CopyInfo& info = copySplit.copies[i];
 
-                        commandList->CopyTextureRegion(&textureLocation, info.textureOffset.x,
-                                                       info.textureOffset.y, info.textureOffset.z,
-                                                       &bufferLocation, &sourceRegion);
+                            D3D12_TEXTURE_COPY_LOCATION bufferLocation =
+                                ComputeBufferLocationForCopyTextureRegion(
+                                    texture, buffer->GetD3D12Resource().Get(), info.bufferSize,
+                                    copySplit.offset, copy->source.bytesPerRow);
+                            D3D12_BOX sourceRegion =
+                                ComputeD3D12BoxFromOffsetAndSize(info.bufferOffset, info.copySize);
+
+                            commandList->CopyTextureRegion(
+                                &textureLocation, info.textureOffset.x, info.textureOffset.y,
+                                info.textureOffset.z, &bufferLocation, &sourceRegion);
+                        }
+
+                        bufferOffsetForNextSlice += bytesPerSlice;
                     }
                     break;
                 }
@@ -622,38 +640,53 @@
                     Buffer* buffer = ToBackend(copy->destination.buffer.Get());
 
                     ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    SubresourceRange subresource = SubresourceRange::SingleSubresource(
-                        copy->source.mipLevel, copy->source.arrayLayer);
-                    texture->EnsureSubresourceContentInitialized(commandContext, subresource);
+                    // TODO(jiawei.shao@intel.com): use copy->destination.origin.z instead of
+                    // copy->destination.arrayLayer once GPUTextureCopyView.arrayLayer to
+                    // GPUTextureCopyView.origin.z is done.
+                    SubresourceRange subresources = {copy->source.mipLevel, 1,
+                                                     copy->source.arrayLayer, copy->copySize.depth};
+                    texture->EnsureSubresourceContentInitialized(commandContext, subresources);
 
                     texture->TrackUsageAndTransitionNow(commandContext, wgpu::TextureUsage::CopySrc,
-                                                        subresource);
+                                                        subresources);
                     buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::CopyDst);
 
-                    TextureCopySplit copySplit = ComputeTextureCopySplit(
-                        copy->source.origin, copy->copySize, texture->GetFormat(),
-                        copy->destination.offset, copy->destination.bytesPerRow,
-                        copy->destination.rowsPerImage);
+                    const uint64_t bytesPerSlice =
+                        copy->destination.bytesPerRow * copy->destination.rowsPerImage;
 
-                    D3D12_TEXTURE_COPY_LOCATION textureLocation =
-                        ComputeTextureCopyLocationForTexture(texture, copy->source.mipLevel,
-                                                             copy->source.arrayLayer);
+                    const dawn_native::Extent3D copyOneLayerSize = {copy->copySize.width,
+                                                                    copy->copySize.height, 1};
+                    uint64_t bufferOffsetForNextSlice = 0;
+                    for (uint32_t copySlice = copy->source.arrayLayer;
+                         copySlice < copy->source.arrayLayer + copy->copySize.depth; ++copySlice) {
+                        // TODO(jiawei.shao@intel.com): compute copySplit once for all texture array
+                        // layers when possible.
+                        TextureCopySplit copySplit = ComputeTextureCopySplit(
+                            copy->source.origin, copyOneLayerSize, texture->GetFormat(),
+                            bufferOffsetForNextSlice + copy->destination.offset,
+                            copy->destination.bytesPerRow, copy->destination.rowsPerImage);
 
-                    for (uint32_t i = 0; i < copySplit.count; ++i) {
-                        TextureCopySplit::CopyInfo& info = copySplit.copies[i];
+                        D3D12_TEXTURE_COPY_LOCATION textureLocation =
+                            ComputeTextureCopyLocationForTexture(texture, copy->source.mipLevel,
+                                                                 copySlice);
 
-                        D3D12_TEXTURE_COPY_LOCATION bufferLocation =
-                            ComputeBufferLocationForCopyTextureRegion(
-                                texture, buffer->GetD3D12Resource().Get(), info.bufferSize,
-                                copySplit.offset, copy->destination.bytesPerRow);
+                        for (uint32_t i = 0; i < copySplit.count; ++i) {
+                            const TextureCopySplit::CopyInfo& info = copySplit.copies[i];
 
-                        D3D12_BOX sourceRegion =
-                            ComputeD3D12BoxFromOffsetAndSize(info.textureOffset, info.copySize);
+                            D3D12_TEXTURE_COPY_LOCATION bufferLocation =
+                                ComputeBufferLocationForCopyTextureRegion(
+                                    texture, buffer->GetD3D12Resource().Get(), info.bufferSize,
+                                    copySplit.offset, copy->destination.bytesPerRow);
 
-                        commandList->CopyTextureRegion(&bufferLocation, info.bufferOffset.x,
-                                                       info.bufferOffset.y, info.bufferOffset.z,
-                                                       &textureLocation, &sourceRegion);
+                            D3D12_BOX sourceRegion =
+                                ComputeD3D12BoxFromOffsetAndSize(info.textureOffset, info.copySize);
+
+                            commandList->CopyTextureRegion(&bufferLocation, info.bufferOffset.x,
+                                                           info.bufferOffset.y, info.bufferOffset.z,
+                                                           &textureLocation, &sourceRegion);
+                        }
+
+                        bufferOffsetForNextSlice += bytesPerSlice;
                     }
                     break;
                 }
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 2bbcdd4..d6257e8 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -748,22 +748,40 @@
                     EnsureDestinationTextureInitialized(texture, copy->copySize, copy->destination);
 
                     Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(dst.mipLevel);
-                    TextureBufferCopySplit splittedCopies = ComputeTextureBufferCopySplit(
-                        dst.origin, copySize, texture->GetFormat(), virtualSizeAtLevel,
-                        buffer->GetSize(), src.offset, src.bytesPerRow, src.rowsPerImage);
 
-                    for (uint32_t i = 0; i < splittedCopies.count; ++i) {
-                        const TextureBufferCopySplit::CopyInfo& copyInfo = splittedCopies.copies[i];
-                        [commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer()
-                                                        sourceOffset:copyInfo.bufferOffset
-                                                   sourceBytesPerRow:copyInfo.bytesPerRow
-                                                 sourceBytesPerImage:copyInfo.bytesPerImage
-                                                          sourceSize:copyInfo.copyExtent
-                                                           toTexture:texture->GetMTLTexture()
-                                                    destinationSlice:dst.arrayLayer
-                                                    destinationLevel:dst.mipLevel
-                                                   destinationOrigin:copyInfo.textureOrigin];
+                    uint64_t bufferOffsetForNextSlice = 0;
+                    const uint64_t bytesPerSlice = src.bytesPerRow * src.rowsPerImage;
+
+                    const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
+                                                                    1};
+                    // TODO(jiawei.shao@intel.com): use dst.origin.z instead of dst.arrayLayer once
+                    // GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
+                    for (uint32_t copySlice = dst.arrayLayer;
+                         copySlice < dst.arrayLayer + copySize.depth; ++copySlice) {
+                        // TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
+                        // array layers when possible.
+                        TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
+                            dst.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel,
+                            buffer->GetSize(), bufferOffsetForNextSlice + src.offset,
+                            src.bytesPerRow, src.rowsPerImage);
+
+                        for (uint32_t i = 0; i < splitCopies.count; ++i) {
+                            const TextureBufferCopySplit::CopyInfo& copyInfo =
+                                splitCopies.copies[i];
+                            [commandContext->EnsureBlit() copyFromBuffer:buffer->GetMTLBuffer()
+                                                            sourceOffset:copyInfo.bufferOffset
+                                                       sourceBytesPerRow:copyInfo.bytesPerRow
+                                                     sourceBytesPerImage:copyInfo.bytesPerImage
+                                                              sourceSize:copyInfo.copyExtent
+                                                               toTexture:texture->GetMTLTexture()
+                                                        destinationSlice:copySlice
+                                                        destinationLevel:dst.mipLevel
+                                                       destinationOrigin:copyInfo.textureOrigin];
+                        }
+
+                        bufferOffsetForNextSlice += bytesPerSlice;
                     }
+
                     break;
                 }
 
@@ -778,21 +796,37 @@
                     EnsureSourceTextureInitialized(texture, copy->copySize, copy->source);
 
                     Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
-                    TextureBufferCopySplit splittedCopies = ComputeTextureBufferCopySplit(
-                        src.origin, copySize, texture->GetFormat(), virtualSizeAtLevel,
-                        buffer->GetSize(), dst.offset, dst.bytesPerRow, dst.rowsPerImage);
 
-                    for (uint32_t i = 0; i < splittedCopies.count; ++i) {
-                        const TextureBufferCopySplit::CopyInfo& copyInfo = splittedCopies.copies[i];
-                        [commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture()
-                                                          sourceSlice:src.arrayLayer
-                                                          sourceLevel:src.mipLevel
-                                                         sourceOrigin:copyInfo.textureOrigin
-                                                           sourceSize:copyInfo.copyExtent
-                                                             toBuffer:buffer->GetMTLBuffer()
-                                                    destinationOffset:copyInfo.bufferOffset
-                                               destinationBytesPerRow:copyInfo.bytesPerRow
-                                             destinationBytesPerImage:copyInfo.bytesPerImage];
+                    uint64_t bufferOffsetForNextSlice = 0;
+                    const uint64_t bytesPerSlice = dst.bytesPerRow * dst.rowsPerImage;
+
+                    const dawn_native::Extent3D copyOneLayerSize = {copySize.width, copySize.height,
+                                                                    1};
+                    // TODO(jiawei.shao@intel.com): use src.origin.z instead of src.arrayLayer once
+                    // GPUTextureCopyView.arrayLayer to GPUTextureCopyView.origin.z is done.
+                    for (uint32_t copySlice = src.arrayLayer;
+                         copySlice < src.arrayLayer + copySize.depth; ++copySlice) {
+                        // TODO(jiawei.shao@intel.com): compute splitCopies once for all texture
+                        // array layers when possible.
+                        TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
+                            src.origin, copyOneLayerSize, texture->GetFormat(), virtualSizeAtLevel,
+                            buffer->GetSize(), bufferOffsetForNextSlice + dst.offset,
+                            dst.bytesPerRow, dst.rowsPerImage);
+
+                        for (uint32_t i = 0; i < splitCopies.count; ++i) {
+                            const TextureBufferCopySplit::CopyInfo& copyInfo =
+                                splitCopies.copies[i];
+                            [commandContext->EnsureBlit() copyFromTexture:texture->GetMTLTexture()
+                                                              sourceSlice:copySlice
+                                                              sourceLevel:src.mipLevel
+                                                             sourceOrigin:copyInfo.textureOrigin
+                                                               sourceSize:copyInfo.copyExtent
+                                                                 toBuffer:buffer->GetMTLBuffer()
+                                                        destinationOffset:copyInfo.bufferOffset
+                                                   destinationBytesPerRow:copyInfo.bytesPerRow
+                                                 destinationBytesPerImage:copyInfo.bytesPerImage];
+                        }
+                        bufferOffsetForNextSlice += bytesPerSlice;
                     }
                     break;
                 }
diff --git a/src/dawn_native/opengl/CommandBufferGL.cpp b/src/dawn_native/opengl/CommandBufferGL.cpp
index fec3705..28c606a 100644
--- a/src/dawn_native/opengl/CommandBufferGL.cpp
+++ b/src/dawn_native/opengl/CommandBufferGL.cpp
@@ -515,13 +515,15 @@
                     const GLFormat& format = texture->GetGLFormat();
 
                     ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    SubresourceRange subresource =
-                        SubresourceRange::SingleSubresource(dst.mipLevel, dst.arrayLayer);
+                    // TODO(jiawei.shao@intel.com): use copy->destination.origin.z instead of
+                    // copy->destination.arrayLayer once GPUTextureCopyView.arrayLayer to
+                    // GPUTextureCopyView.origin.z is done.
+                    SubresourceRange subresources = {dst.mipLevel, 1, dst.arrayLayer,
+                                                     copy->copySize.depth};
                     if (IsCompleteSubresourceCopiedTo(texture, copySize, dst.mipLevel)) {
-                        texture->SetIsSubresourceContentInitialized(true, subresource);
+                        texture->SetIsSubresourceContentInitialized(true, subresources);
                     } else {
-                        texture->EnsureSubresourceContentInitialized(subresource);
+                        texture->EnsureSubresourceContentInitialized(subresources);
                     }
 
                     gl.BindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer->GetHandle());
@@ -549,8 +551,8 @@
                         if (texture->GetArrayLayers() > 1) {
                             gl.CompressedTexSubImage3D(
                                 target, dst.mipLevel, dst.origin.x, dst.origin.y, dst.arrayLayer,
-                                copyExtent.width, copyExtent.height, 1, format.internalFormat,
-                                copyDataSize,
+                                copyExtent.width, copyExtent.height, copyExtent.depth,
+                                format.internalFormat, copyDataSize,
                                 reinterpret_cast<void*>(static_cast<uintptr_t>(src.offset)));
                         } else {
                             gl.CompressedTexSubImage2D(
@@ -564,7 +566,8 @@
                                 if (texture->GetArrayLayers() > 1) {
                                     gl.TexSubImage3D(target, dst.mipLevel, dst.origin.x,
                                                      dst.origin.y, dst.arrayLayer, copySize.width,
-                                                     copySize.height, 1, format.format, format.type,
+                                                     copySize.height, copySize.depth, format.format,
+                                                     format.type,
                                                      reinterpret_cast<void*>(
                                                          static_cast<uintptr_t>(src.offset)));
                                 } else {
@@ -606,9 +609,9 @@
                     }
 
                     ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    texture->EnsureSubresourceContentInitialized(
-                        SubresourceRange::SingleSubresource(src.mipLevel, src.arrayLayer));
+                    SubresourceRange subresources = {src.mipLevel, 1, src.arrayLayer,
+                                                     copy->copySize.depth};
+                    texture->EnsureSubresourceContentInitialized(subresources);
                     // The only way to move data from a texture to a buffer in GL is via
                     // glReadPixels with a pack buffer. Create a temporary FBO for the copy.
                     gl.BindTexture(target, texture->GetHandle());
@@ -636,29 +639,42 @@
                             break;
                     }
 
+                    gl.BindBuffer(GL_PIXEL_PACK_BUFFER, buffer->GetHandle());
+                    gl.PixelStorei(GL_PACK_ROW_LENGTH, dst.bytesPerRow / format.blockByteSize);
+                    gl.PixelStorei(GL_PACK_IMAGE_HEIGHT, dst.rowsPerImage);
+
+                    uint8_t* offset =
+                        reinterpret_cast<uint8_t*>(static_cast<uintptr_t>(dst.offset));
                     switch (texture->GetDimension()) {
-                        case wgpu::TextureDimension::e2D:
-                            if (texture->GetArrayLayers() > 1) {
-                                gl.FramebufferTextureLayer(GL_READ_FRAMEBUFFER, glAttachment,
-                                                           texture->GetHandle(), src.mipLevel,
-                                                           src.arrayLayer);
-                            } else {
+                        case wgpu::TextureDimension::e2D: {
+                            if (texture->GetArrayLayers() == 1) {
                                 gl.FramebufferTexture2D(GL_READ_FRAMEBUFFER, glAttachment, target,
                                                         texture->GetHandle(), src.mipLevel);
+                                gl.ReadPixels(src.origin.x, src.origin.y, copySize.width,
+                                              copySize.height, glFormat.format, glFormat.type,
+                                              offset);
+                                break;
                             }
+
+                            const uint64_t bytesPerImage = dst.bytesPerRow * dst.rowsPerImage;
+                            for (uint32_t layer = 0; layer < copySize.depth; ++layer) {
+                                gl.FramebufferTextureLayer(GL_READ_FRAMEBUFFER, glAttachment,
+                                                           texture->GetHandle(), src.mipLevel,
+                                                           src.arrayLayer + layer);
+                                gl.ReadPixels(src.origin.x, src.origin.y, copySize.width,
+                                              copySize.height, glFormat.format, glFormat.type,
+                                              offset);
+
+                                offset += bytesPerImage;
+                            }
+
                             break;
+                        }
 
                         default:
                             UNREACHABLE();
                     }
 
-                    gl.BindBuffer(GL_PIXEL_PACK_BUFFER, buffer->GetHandle());
-                    gl.PixelStorei(GL_PACK_ROW_LENGTH, dst.bytesPerRow / format.blockByteSize);
-                    gl.PixelStorei(GL_PACK_IMAGE_HEIGHT, dst.rowsPerImage);
-                    ASSERT(copySize.depth == 1 && src.origin.z == 0);
-                    void* offset = reinterpret_cast<void*>(static_cast<uintptr_t>(dst.offset));
-                    gl.ReadPixels(src.origin.x, src.origin.y, copySize.width, copySize.height,
-                                  glFormat.format, glFormat.type, offset);
                     gl.PixelStorei(GL_PACK_ROW_LENGTH, 0);
                     gl.PixelStorei(GL_PACK_IMAGE_HEIGHT, 0);
 
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index e32725a..575f66f 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -448,9 +448,8 @@
                     VkImageSubresourceLayers subresource = region.imageSubresource;
 
                     ASSERT(dst.texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    SubresourceRange range = SubresourceRange::SingleSubresource(
-                        subresource.mipLevel, subresource.baseArrayLayer);
+                    SubresourceRange range = {subresource.mipLevel, 1, subresource.baseArrayLayer,
+                                              subresource.layerCount};
                     if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), copy->copySize,
                                                       subresource.mipLevel)) {
                         // Since texture has been overwritten, it has been "initialized"
@@ -484,9 +483,9 @@
                     VkImageSubresourceLayers subresource = region.imageSubresource;
 
                     ASSERT(src.texture->GetDimension() == wgpu::TextureDimension::e2D);
-                    ASSERT(copy->copySize.depth == 1);
-                    SubresourceRange range = SubresourceRange::SingleSubresource(
-                        subresource.mipLevel, subresource.baseArrayLayer);
+                    const SubresourceRange range = {subresource.mipLevel, 1,
+                                                    subresource.baseArrayLayer,
+                                                    subresource.layerCount};
                     ToBackend(src.texture)
                         ->EnsureSubresourceContentInitialized(recordingContext, range);
 
diff --git a/src/dawn_native/vulkan/UtilsVulkan.cpp b/src/dawn_native/vulkan/UtilsVulkan.cpp
index 91b45ca..4ce513f 100644
--- a/src/dawn_native/vulkan/UtilsVulkan.cpp
+++ b/src/dawn_native/vulkan/UtilsVulkan.cpp
@@ -82,7 +82,6 @@
         region.imageSubresource.aspectMask = texture->GetVkAspectMask();
         region.imageSubresource.mipLevel = textureCopy.mipLevel;
         region.imageSubresource.baseArrayLayer = textureCopy.arrayLayer;
-        region.imageSubresource.layerCount = 1;
 
         region.imageOffset.x = textureCopy.origin.x;
         region.imageOffset.y = textureCopy.origin.y;
@@ -91,7 +90,10 @@
         Extent3D imageExtent = ComputeTextureCopyExtent(textureCopy, copySize);
         region.imageExtent.width = imageExtent.width;
         region.imageExtent.height = imageExtent.height;
-        region.imageExtent.depth = copySize.depth;
+
+        ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
+        region.imageSubresource.layerCount = copySize.depth;
+        region.imageExtent.depth = 1;
 
         return region;
     }
diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp
index 4b98414..d447f3e 100644
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -34,6 +34,7 @@
             uint64_t size;
             uint64_t offset;
             uint32_t bytesPerRow;
+            uint32_t rowsPerImage;
         };
 
         static std::vector<RGBA8> GetExpectedTextureData(
@@ -56,12 +57,14 @@
         }
 
         static BufferSpec MinimumBufferSpec(uint32_t width,
-                                            uint32_t height,
-                                            uint32_t arrayLayer = 1) {
+                                            uint32_t rowsPerImage,
+                                            uint32_t arrayLayer = 1,
+                                            bool testZeroRowsPerImage = true) {
             const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kTextureFormat, width);
             const uint32_t totalBufferSize = utils::GetBytesInBufferTextureCopy(
-                kTextureFormat, width, bytesPerRow, height, arrayLayer);
-            return {totalBufferSize, 0, bytesPerRow};
+                kTextureFormat, width, bytesPerRow, rowsPerImage, arrayLayer);
+            uint32_t appliedRowsPerImage = testZeroRowsPerImage ? 0 : rowsPerImage;
+            return {totalBufferSize, 0, bytesPerRow, appliedRowsPerImage};
         }
 
         static void PackTextureData(const RGBA8* srcData, uint32_t width, uint32_t height, uint32_t srcTexelsPerRow, RGBA8* dstData, uint32_t dstTexelsPerRow) {
@@ -90,30 +93,24 @@
           descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
           wgpu::Texture texture = device.CreateTexture(&descriptor);
 
-          const uint32_t rowsPerImage = textureSpec.textureSize.height >> textureSpec.level;
           const utils::BufferTextureCopyLayout copyLayout =
               utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
-                  kTextureFormat, textureSpec.textureSize, textureSpec.level, rowsPerImage);
+                  kTextureFormat, textureSpec.textureSize, textureSpec.level,
+                  bufferSpec.rowsPerImage);
 
           wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
           // Initialize the source texture
           std::vector<RGBA8> textureArrayData = GetExpectedTextureData(copyLayout);
-
-          // TODO(jiawei.shao@intel.com): copy into multiple texture array layers in one
-          // buffer-to-texture copy command.
-          wgpu::Buffer uploadBuffer = utils::CreateBufferFromData(
-              device, textureArrayData.data(), copyLayout.byteLength, wgpu::BufferUsage::CopySrc);
-          uint64_t uploadBufferOffset = 0;
-          for (uint32_t slice = 0; slice < textureSpec.textureSize.depth; ++slice) {
+          {
+              wgpu::Buffer uploadBuffer =
+                  utils::CreateBufferFromData(device, textureArrayData.data(),
+                                              copyLayout.byteLength, wgpu::BufferUsage::CopySrc);
               wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
-                  uploadBuffer, uploadBufferOffset, copyLayout.bytesPerRow, 0);
+                  uploadBuffer, 0, copyLayout.bytesPerRow, bufferSpec.rowsPerImage);
               wgpu::TextureCopyView textureCopyView =
-                  utils::CreateTextureCopyView(texture, textureSpec.level, {0, 0, slice});
-              wgpu::Extent3D copyOneLayerSize = {copyLayout.mipSize.width,
-                                                 copyLayout.mipSize.height, 1};
-              encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyOneLayerSize);
-              uploadBufferOffset += copyLayout.bytesPerImage;
+                  utils::CreateTextureCopyView(texture, textureSpec.level, {0, 0, 0});
+              encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyLayout.mipSize);
           }
 
           // Create a buffer of `size` and populate it with empty data (0,0,0,0) Note:
@@ -128,31 +125,21 @@
               utils::CreateBufferFromData(device, emptyData.data(), bufferSpec.size,
                                           wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
 
-          const uint32_t maxArrayLayer = textureSpec.copyOrigin.z + copySize.depth;
-
-          // TODO(jiawei.shao@intel.com): copy from multiple texture array layers in one
-          // texture-to-buffer copy command.
-          uint64_t bufferOffset = bufferSpec.offset;
-          for (uint32_t slice = textureSpec.copyOrigin.z; slice < maxArrayLayer; ++slice) {
-              // Copy the region [(`x`, `y`, slice), (`x + testCopySize.width, `y +
-              // testCopySize.height`, 1)] from the `level` mip into the buffer at `offset +
-              // bufferSpec.size * slice` and `bytesPerRow`
-              wgpu::TextureCopyView textureCopyView = utils::CreateTextureCopyView(
-                  texture, textureSpec.level,
-                  {textureSpec.copyOrigin.x, textureSpec.copyOrigin.y, slice});
-              wgpu::BufferCopyView bufferCopyView =
-                  utils::CreateBufferCopyView(buffer, bufferOffset, bufferSpec.bytesPerRow, 0);
-              wgpu::Extent3D copyOneLayerSize = {copySize.width, copySize.height, 1};
-              encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &copyOneLayerSize);
-              bufferOffset += copyLayout.bytesPerImage;
+          {
+              wgpu::TextureCopyView textureCopyView =
+                  utils::CreateTextureCopyView(texture, textureSpec.level, textureSpec.copyOrigin);
+              wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
+                  buffer, bufferSpec.offset, bufferSpec.bytesPerRow, bufferSpec.rowsPerImage);
+              encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &copySize);
           }
 
           wgpu::CommandBuffer commands = encoder.Finish();
           queue.Submit(1, &commands);
 
-          bufferOffset = bufferSpec.offset;
+          uint64_t bufferOffset = bufferSpec.offset;
           const uint32_t texelCountInCopyRegion =
               bufferSpec.bytesPerRow / bytesPerTexel * (copySize.height - 1) + copySize.width;
+          const uint32_t maxArrayLayer = textureSpec.copyOrigin.z + copySize.depth;
           std::vector<RGBA8> expected(texelCountInCopyRegion);
           for (uint32_t slice = textureSpec.copyOrigin.z; slice < maxArrayLayer; ++slice) {
               // Pack the data used to create the upload buffer in the specified copy region to have
@@ -218,34 +205,23 @@
 
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
-        const uint32_t rowsPerImage = textureSpec.textureSize.height >> textureSpec.level;
         const utils::BufferTextureCopyLayout copyLayout =
             utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
-                kTextureFormat, textureSpec.textureSize, textureSpec.level, rowsPerImage);
+                kTextureFormat, textureSpec.textureSize, textureSpec.level,
+                bufferSpec.rowsPerImage);
 
         const uint32_t maxArrayLayer = textureSpec.copyOrigin.z + copySize.depth;
 
-        // TODO(jiawei.shao@intel.com): support copying into multiple texture array layers in one
-        // copy command.
-        uint64_t bufferOffset = bufferSpec.offset;
-        for (uint32_t slice = textureSpec.copyOrigin.z; slice < maxArrayLayer; ++slice) {
-            // Copy to the region [(`x`, `y`, `slice`), (`x + testCopySize.width, `y +
-            // testCopySize.height`, 1] at the `level` mip
-            // from the buffer at the specified `offset` and `bytesPerRow`
-            wgpu::BufferCopyView bufferCopyView =
-                utils::CreateBufferCopyView(buffer, bufferOffset, bufferSpec.bytesPerRow, 0);
-            wgpu::TextureCopyView textureCopyView = utils::CreateTextureCopyView(
-                texture, textureSpec.level,
-                {textureSpec.copyOrigin.x, textureSpec.copyOrigin.y, slice});
-            wgpu::Extent3D copyOneLayerSize = {copySize.width, copySize.height, 1};
-            encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyOneLayerSize);
-            bufferOffset += copyLayout.bytesPerImage;
-        }
+        wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
+            buffer, bufferSpec.offset, bufferSpec.bytesPerRow, bufferSpec.rowsPerImage);
+        wgpu::TextureCopyView textureCopyView =
+            utils::CreateTextureCopyView(texture, textureSpec.level, textureSpec.copyOrigin);
+        encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copySize);
 
         wgpu::CommandBuffer commands = encoder.Finish();
         queue.Submit(1, &commands);
 
-        bufferOffset = bufferSpec.offset;
+        uint64_t bufferOffset = bufferSpec.offset;
         const uint32_t texelCountLastLayer =
             copyLayout.texelBlocksPerRow * (copyLayout.mipSize.height - 1) +
             copyLayout.mipSize.width;
@@ -305,30 +281,21 @@
 
         // Create an upload buffer and use it to populate the current slice of the texture in
         // `level` mip level
-        const uint32_t rowsPerImage = srcSpec.textureSize.height >> srcSpec.level;
         const utils::BufferTextureCopyLayout copyLayout =
             utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
                 kTextureFormat,
                 {srcSpec.textureSize.width, srcSpec.textureSize.height, copySize.depth},
-                srcSpec.level, rowsPerImage);
+                srcSpec.level, 0);
 
         const std::vector<RGBA8> textureArrayCopyData = GetExpectedTextureData(copyLayout);
 
-        // TODO(jiawei.shao@intel.com): support copying into multiple contiguous array layers in one
-        // copyBufferToTexture() call.
         wgpu::Buffer uploadBuffer = utils::CreateBufferFromData(
             device, textureArrayCopyData.data(), copyLayout.byteLength, wgpu::BufferUsage::CopySrc);
-        uint64_t uploadBufferOffset = 0;
-        for (uint32_t slice = 0; slice < copySize.depth; ++slice) {
-            wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
-                uploadBuffer, uploadBufferOffset, copyLayout.bytesPerRow, 0);
-            wgpu::TextureCopyView textureCopyView = utils::CreateTextureCopyView(
-                srcTexture, srcSpec.level, {0, 0, srcSpec.copyOrigin.z + slice});
-            wgpu::Extent3D copyOneLayerSize = {copyLayout.mipSize.width, copyLayout.mipSize.height,
-                                               1};
-            encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyOneLayerSize);
-            uploadBufferOffset += copyLayout.bytesPerImage;
-        }
+        wgpu::BufferCopyView bufferCopyView =
+            utils::CreateBufferCopyView(uploadBuffer, 0, copyLayout.bytesPerRow, 0);
+        wgpu::TextureCopyView textureCopyView =
+            utils::CreateTextureCopyView(srcTexture, srcSpec.level, {0, 0, srcSpec.copyOrigin.z});
+        encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyLayout.mipSize);
 
         // Perform the texture to texture copy
         wgpu::TextureCopyView srcTextureCopyView =
@@ -700,8 +667,12 @@
     }
 }
 
-// Test that copying regions of each texture 2D array layer works
+// Test that copying whole texture 2D array layers in one texture-to-buffer-copy works.
 TEST_P(CopyTests_T2B, Texture2DArrayRegion) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layer fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kWidth = 256;
     constexpr uint32_t kHeight = 128;
     constexpr uint32_t kLayers = 6u;
@@ -714,8 +685,12 @@
     DoTest(textureSpec, MinimumBufferSpec(kWidth, kHeight, kLayers), {kWidth, kHeight, kLayers});
 }
 
-// Test that copying a sub-region of each texture 2D array layer works
+// Test that copying a range of texture 2D array layers in one texture-to-buffer-copy works.
 TEST_P(CopyTests_T2B, Texture2DArraySubRegion) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layer fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kWidth = 256;
     constexpr uint32_t kHeight = 128;
     constexpr uint32_t kLayers = 6u;
@@ -733,6 +708,10 @@
 
 // Test that copying texture 2D array mips with 256-byte aligned sizes works
 TEST_P(CopyTests_T2B, Texture2DArrayMip) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kWidth = 256;
     constexpr uint32_t kHeight = 128;
     constexpr uint32_t kLayers = 6u;
@@ -750,6 +729,31 @@
     }
 }
 
+// Test that copying from a range of texture 2D array layers in one texture-to-buffer-copy when
+// RowsPerImage is not equal to the height of the texture works.
+TEST_P(CopyTests_T2B, Texture2DArrayRegionNonzeroRowsPerImage) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
+    constexpr uint32_t kWidth = 256;
+    constexpr uint32_t kHeight = 128;
+    constexpr uint32_t kLayers = 6u;
+    constexpr uint32_t kBaseLayer = 2u;
+    constexpr uint32_t kCopyLayers = 3u;
+
+    constexpr uint32_t kRowsPerImage = kHeight * 2;
+
+    TextureSpec textureSpec;
+    textureSpec.copyOrigin = {0, 0, kBaseLayer};
+    textureSpec.textureSize = {kWidth, kHeight, kLayers};
+    textureSpec.level = 0;
+
+    BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
+    bufferSpec.rowsPerImage = kRowsPerImage;
+    DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
+}
+
 DAWN_INSTANTIATE_TEST(CopyTests_T2B, D3D12Backend(), MetalBackend(), OpenGLBackend(), VulkanBackend());
 
 // Test that copying an entire texture with 256-byte aligned dimensions works
@@ -1038,8 +1042,12 @@
     }
 }
 
-// Test that copying into regions of each texture 2D array layer works
+// Test that copying whole texture 2D array layers in one texture-to-buffer-copy works.
 TEST_P(CopyTests_B2T, Texture2DArrayRegion) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kWidth = 256;
     constexpr uint32_t kHeight = 128;
     constexpr uint32_t kLayers = 6u;
@@ -1052,8 +1060,12 @@
     DoTest(textureSpec, MinimumBufferSpec(kWidth, kHeight, kLayers), {kWidth, kHeight, kLayers});
 }
 
-// Test that copying into a sub-region of each texture 2D array layer works
+// Test that copying a range of texture 2D array layers in one texture-to-buffer-copy works.
 TEST_P(CopyTests_B2T, Texture2DArraySubRegion) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kWidth = 256;
     constexpr uint32_t kHeight = 128;
     constexpr uint32_t kLayers = 6u;
@@ -1069,6 +1081,31 @@
            {kWidth, kHeight, kCopyLayers});
 }
 
+// Test that copying into a range of texture 2D array layers in one texture-to-buffer-copy when
+// RowsPerImage is not equal to the height of the texture works.
+TEST_P(CopyTests_B2T, Texture2DArrayRegionNonzeroRowsPerImage) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
+    constexpr uint32_t kWidth = 256;
+    constexpr uint32_t kHeight = 128;
+    constexpr uint32_t kLayers = 6u;
+    constexpr uint32_t kBaseLayer = 2u;
+    constexpr uint32_t kCopyLayers = 3u;
+
+    constexpr uint32_t kRowsPerImage = kHeight * 2;
+
+    TextureSpec textureSpec;
+    textureSpec.copyOrigin = {0, 0, kBaseLayer};
+    textureSpec.textureSize = {kWidth, kHeight, kLayers};
+    textureSpec.level = 0;
+
+    BufferSpec bufferSpec = MinimumBufferSpec(kWidth, kRowsPerImage, kCopyLayers, false);
+    bufferSpec.rowsPerImage = kRowsPerImage;
+    DoTest(textureSpec, bufferSpec, {kWidth, kHeight, kCopyLayers});
+}
+
 DAWN_INSTANTIATE_TEST(CopyTests_B2T, D3D12Backend(), MetalBackend(), OpenGLBackend(), VulkanBackend());
 
 TEST_P(CopyTests_T2T, Texture) {
diff --git a/src/tests/end2end/StorageTextureTests.cpp b/src/tests/end2end/StorageTextureTests.cpp
index d384d3f..d02973c 100644
--- a/src/tests/end2end/StorageTextureTests.cpp
+++ b/src/tests/end2end/StorageTextureTests.cpp
@@ -460,23 +460,14 @@
             CreateTexture(format, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopyDst, kWidth,
                           kHeight, arrayLayerCount);
 
-        const wgpu::Extent3D copyExtent = {kWidth, kHeight, 1};
-
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
-        // TODO(jiawei.shao@intel.com): copy multiple array layers in one CopyBufferToTexture() when
-        // it is supported.
-        for (uint32_t layer = 0; layer < arrayLayerCount; ++layer) {
-            wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
-                uploadBuffer, kTextureBytesPerRowAlignment * kHeight * layer,
-                kTextureBytesPerRowAlignment, 0);
-
-            wgpu::TextureCopyView textureCopyView;
-            textureCopyView.texture = outputTexture;
-            textureCopyView.origin.z = layer;
-
-            encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyExtent);
-        }
+        const wgpu::Extent3D copyExtent = {kWidth, kHeight, arrayLayerCount};
+        wgpu::BufferCopyView bufferCopyView =
+            utils::CreateBufferCopyView(uploadBuffer, 0, kTextureBytesPerRowAlignment, 0);
+        wgpu::TextureCopyView textureCopyView;
+        textureCopyView.texture = outputTexture;
+        encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copyExtent);
 
         wgpu::CommandBuffer commandBuffer = encoder.Finish();
         queue.Submit(1, &commandBuffer);
@@ -643,23 +634,14 @@
             static_cast<uint32_t>(expectedData.size() / texelSize / (kWidth * kHeight));
         wgpu::Buffer resultBuffer = CreateEmptyBufferForTextureCopy(texelSize, arrayLayerCount);
 
-        const wgpu::Extent3D copyExtent = {kWidth, kHeight, 1};
-
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
-        // TODO(jiawei.shao@intel.com): copy multiple array layers in one CopyTextureToBuffer() when
-        // it is supported.
-        for (uint32_t layer = 0; layer < arrayLayerCount; ++layer) {
-            wgpu::TextureCopyView textureCopyView;
-            textureCopyView.texture = writeonlyStorageTexture;
-            textureCopyView.origin.z = layer;
-
-            const uint64_t bufferOffset = kTextureBytesPerRowAlignment * kHeight * layer;
-            wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(
-                resultBuffer, bufferOffset, kTextureBytesPerRowAlignment, 0);
-
-            encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &copyExtent);
-        }
+        const wgpu::Extent3D copyExtent = {kWidth, kHeight, arrayLayerCount};
+        wgpu::TextureCopyView textureCopyView =
+            utils::CreateTextureCopyView(writeonlyStorageTexture, 0, {0, 0, 0});
+        wgpu::BufferCopyView bufferCopyView =
+            utils::CreateBufferCopyView(resultBuffer, 0, kTextureBytesPerRowAlignment, 0);
+        encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &copyExtent);
         wgpu::CommandBuffer commandBuffer = encoder.Finish();
         queue.Submit(1, &commandBuffer);
 
@@ -955,6 +937,10 @@
     // bug in spvc parser is fixed.
     DAWN_SKIP_TEST_IF(IsSpvcParserBeingUsed());
 
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layer fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kArrayLayerCount = 3u;
 
     constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
@@ -993,6 +979,10 @@
     // bug in spvc parser is fixed.
     DAWN_SKIP_TEST_IF(IsD3D12() && IsSpvcParserBeingUsed());
 
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layer fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
     constexpr uint32_t kArrayLayerCount = 3u;
 
     constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
diff --git a/src/tests/end2end/TextureZeroInitTests.cpp b/src/tests/end2end/TextureZeroInitTests.cpp
index d145161..3c0d9e1 100644
--- a/src/tests/end2end/TextureZeroInitTests.cpp
+++ b/src/tests/end2end/TextureZeroInitTests.cpp
@@ -130,6 +130,49 @@
     EXPECT_EQ(true, dawn_native::IsTextureSubresourceInitialized(texture.Get(), 0, 1, 0, 1));
 }
 
+// This tests that the code path of CopyTextureToBuffer with multiple texture array layers clears
+// correctly to Zero after first usage
+TEST_P(TextureZeroInitTest, CopyMultipleTextureArrayLayersToBufferSource) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
+    constexpr uint32_t kArrayLayers = 6u;
+
+    const wgpu::TextureDescriptor descriptor = CreateTextureDescriptor(
+        1, kArrayLayers, wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::CopySrc,
+        kColorFormat);
+    wgpu::Texture texture = device.CreateTexture(&descriptor);
+
+    const uint32_t bytesPerRow = utils::GetMinimumBytesPerRow(kColorFormat, kSize);
+    wgpu::BufferDescriptor bufferDescriptor;
+    bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    bufferDescriptor.size =
+        utils::GetBytesInBufferTextureCopy(kColorFormat, kSize, bytesPerRow, kSize, kArrayLayers);
+    wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor);
+
+    const wgpu::BufferCopyView bufferCopyView =
+        utils::CreateBufferCopyView(buffer, 0, bytesPerRow, 0);
+    const wgpu::TextureCopyView textureCopyView =
+        utils::CreateTextureCopyView(texture, 0, {0, 0, 0});
+    const wgpu::Extent3D copySize = {kSize, kSize, kArrayLayers};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &copySize);
+    wgpu::CommandBuffer commandBuffer = encoder.Finish();
+
+    // Expect texture to be lazy initialized.
+    EXPECT_LAZY_CLEAR(1u, queue.Submit(1, &commandBuffer));
+
+    // Expect texture subresource initialized to be true
+    EXPECT_TRUE(dawn_native::IsTextureSubresourceInitialized(texture.Get(), 0, 1, 0, kArrayLayers));
+
+    const std::vector<RGBA8> kExpectedAllZero(kSize * kSize, {0, 0, 0, 0});
+    for (uint32_t layer = 0; layer < kArrayLayers; ++layer) {
+        EXPECT_TEXTURE_RGBA8_EQ(kExpectedAllZero.data(), texture, 0, 0, kSize, kSize, 0, layer);
+    }
+}
+
 // Test that non-zero mip level clears subresource to Zero after first use
 // This goes through the BeginRenderPass's code path
 TEST_P(TextureZeroInitTest, RenderingMipMapClearsToZero) {
@@ -282,6 +325,47 @@
     EXPECT_EQ(true, dawn_native::IsTextureSubresourceInitialized(texture.Get(), 0, 1, 0, 1));
 }
 
+// This tests CopyBufferToTexture fully overwrites a range of subresources, so lazy initialization
+// is needed for neither the subresources involved in the copy nor the other subresources.
+TEST_P(TextureZeroInitTest, CopyBufferToTextureMultipleArrayLayers) {
+    // TODO(jiawei.shao@intel.com): investigate why copies with multiple texture array layers fail
+    // with swiftshader.
+    DAWN_SKIP_TEST_IF(IsSwiftshader());
+
+    wgpu::TextureDescriptor descriptor = CreateTextureDescriptor(
+        1, 6, wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc, kColorFormat);
+    wgpu::Texture texture = device.CreateTexture(&descriptor);
+
+    constexpr uint32_t kBaseArrayLayer = 2u;
+    constexpr uint32_t kCopyLayerCount = 3u;
+    std::vector<uint8_t> data(kFormatBlockByteSize * kSize * kSize * kCopyLayerCount, 100);
+    wgpu::Buffer stagingBuffer = utils::CreateBufferFromData(
+        device, data.data(), static_cast<uint32_t>(data.size()), wgpu::BufferUsage::CopySrc);
+
+    const wgpu::BufferCopyView bufferCopyView =
+        utils::CreateBufferCopyView(stagingBuffer, 0, kSize * kFormatBlockByteSize, 0);
+    const wgpu::TextureCopyView textureCopyView =
+        utils::CreateTextureCopyView(texture, 0, {0, 0, kBaseArrayLayer});
+    const wgpu::Extent3D copySize = {kSize, kSize, kCopyLayerCount};
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.CopyBufferToTexture(&bufferCopyView, &textureCopyView, &copySize);
+    wgpu::CommandBuffer commands = encoder.Finish();
+
+    // The copy overwrites the whole subresources su we don't need to do lazy initialization on
+    // them.
+    EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commands));
+
+    // Expect texture subresource initialized to be true
+    EXPECT_TRUE(dawn_native::IsTextureSubresourceInitialized(texture.Get(), 0, 1, kBaseArrayLayer,
+                                                             kCopyLayerCount));
+
+    const std::vector<RGBA8> expected100(kSize * kSize, {100, 100, 100, 100});
+    for (uint32_t layer = kBaseArrayLayer; layer < kBaseArrayLayer + kCopyLayerCount; ++layer) {
+        EXPECT_TEXTURE_RGBA8_EQ(expected100.data(), texture, 0, 0, kSize, kSize, 0, layer);
+    }
+}
+
 // This tests CopyTextureToTexture fully overwrites copy so lazy init is not needed.
 TEST_P(TextureZeroInitTest, CopyTextureToTexture) {
     wgpu::TextureDescriptor srcDescriptor = CreateTextureDescriptor(
diff --git a/src/utils/WGPUHelpers.cpp b/src/utils/WGPUHelpers.cpp
index b44b866..686b223 100644
--- a/src/utils/WGPUHelpers.cpp
+++ b/src/utils/WGPUHelpers.cpp
@@ -384,6 +384,7 @@
                                          uint32_t bytesPerRow,
                                          uint32_t rowsPerImage,
                                          uint32_t copyArrayLayerCount) {
+        ASSERT(rowsPerImage > 0);
         const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
         const uint32_t bytesAtLastImage = bytesPerRow * (rowsPerImage - 1) + bytesPerTexel * width;
         return bytesPerRow * rowsPerImage * (copyArrayLayerCount - 1) + bytesAtLastImage;
@@ -401,11 +402,13 @@
                           textureSizeAtLevel0.height >> mipmapLevel, textureSizeAtLevel0.depth};
 
         layout.bytesPerRow = GetMinimumBytesPerRow(format, layout.mipSize.width);
-        layout.bytesPerImage = layout.bytesPerRow * rowsPerImage;
+
+        uint32_t appliedRowsPerImage = rowsPerImage > 0 ? rowsPerImage : layout.mipSize.height;
+        layout.bytesPerImage = layout.bytesPerRow * appliedRowsPerImage;
 
         layout.byteLength =
             GetBytesInBufferTextureCopy(format, layout.mipSize.width, layout.bytesPerRow,
-                                        layout.mipSize.height, textureSizeAtLevel0.depth);
+                                        appliedRowsPerImage, textureSizeAtLevel0.depth);
 
         const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(format);
         layout.texelBlocksPerRow = layout.bytesPerRow / bytesPerTexel;