Implementing Queue::WriteTexture in Metal

Added implementation of writeTexture in Metal. It's using a
staging buffer instead of writing directly from the CPU to
the texture, because Dawn uses the private storage mode for
most of the Metal textures.

Bug: dawn:483
Change-Id: I6b85ee8bbe343881337bdb203a122dc1f1523177
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24581
Commit-Queue: Tomek Ponitka <tommek@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/CommandValidation.cpp b/src/dawn_native/CommandValidation.cpp
index 8444655..db8714c 100644
--- a/src/dawn_native/CommandValidation.cpp
+++ b/src/dawn_native/CommandValidation.cpp
@@ -120,36 +120,6 @@
             return {};
         }
 
-        void ComputeRequiredBytesInCopy(const Format& textureFormat,
-                                        const Extent3D& copySize,
-                                        uint32_t bytesPerRow,
-                                        uint32_t rowsPerImage,
-                                        uint32_t* result) {
-            // Default value for rowsPerImage
-            if (rowsPerImage == 0) {
-                rowsPerImage = copySize.height;
-            }
-            ASSERT(rowsPerImage >= copySize.height);
-            if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) {
-                *result = 0;
-                return;
-            }
-
-            uint32_t blockByteSize = textureFormat.blockByteSize;
-            uint32_t blockWidth = textureFormat.blockWidth;
-            uint32_t blockHeight = textureFormat.blockHeight;
-
-            // TODO(cwallez@chromium.org): check for overflows
-            uint32_t slicePitch = bytesPerRow * rowsPerImage / blockWidth;
-
-            ASSERT(copySize.height >= 1);
-            uint32_t sliceSize = bytesPerRow * (copySize.height / blockHeight - 1) +
-                                 (copySize.width / blockWidth) * blockByteSize;
-
-            ASSERT(copySize.depth >= 1);
-            *result = (slicePitch * (copySize.depth - 1)) + sliceSize;
-        }
-
     }  // namespace
 
     MaybeError ValidateCanPopDebugGroup(uint64_t debugGroupStackSize) {
@@ -400,6 +370,30 @@
                static_cast<uint64_t>(maxStart);
     }
 
+    uint32_t ComputeRequiredBytesInCopy(const Format& textureFormat,
+                                        const Extent3D& copySize,
+                                        uint32_t bytesPerRow,
+                                        uint32_t rowsPerImage) {
+        // Default value for rowsPerImage
+        if (rowsPerImage == 0) {
+            rowsPerImage = copySize.height;
+        }
+        ASSERT(rowsPerImage >= copySize.height);
+        if (copySize.width == 0 || copySize.height == 0 || copySize.depth == 0) {
+            return 0;
+        }
+
+        ASSERT(copySize.height >= 1);
+        ASSERT(copySize.depth >= 1);
+
+        uint64_t texelBlockRowsPerImage = rowsPerImage / textureFormat.blockHeight;
+        uint64_t bytesPerImage = bytesPerRow * texelBlockRowsPerImage;
+        uint64_t bytesInLastSlice =
+            bytesPerRow * (copySize.height / textureFormat.blockHeight - 1) +
+            (copySize.width / textureFormat.blockWidth * textureFormat.blockByteSize);
+        return bytesPerImage * (copySize.depth - 1) + bytesInLastSlice;
+    }
+
     MaybeError ValidateCopySizeFitsInBuffer(const Ref<BufferBase>& buffer,
                                             uint64_t offset,
                                             uint64_t size) {
@@ -423,9 +417,8 @@
 
         // TODO(tommek@google.com): to match the spec this should only be checked when
         // copyExtent.depth > 1.
-        uint32_t requiredBytesInCopy = 0;
-        ComputeRequiredBytesInCopy(format, copyExtent, layout.bytesPerRow, layout.rowsPerImage,
-                                   &requiredBytesInCopy);
+        uint32_t requiredBytesInCopy =
+            ComputeRequiredBytesInCopy(format, copyExtent, layout.bytesPerRow, layout.rowsPerImage);
 
         bool fitsInData =
             layout.offset <= byteSize && (requiredBytesInCopy <= (byteSize - layout.offset));
diff --git a/src/dawn_native/CommandValidation.h b/src/dawn_native/CommandValidation.h
index cee2b13..72d876d 100644
--- a/src/dawn_native/CommandValidation.h
+++ b/src/dawn_native/CommandValidation.h
@@ -40,6 +40,11 @@
 
     MaybeError ValidateTimestampQuery(QuerySetBase* querySet, uint32_t queryIndex);
 
+    uint32_t ComputeRequiredBytesInCopy(const Format& textureFormat,
+                                        const Extent3D& copySize,
+                                        uint32_t bytesPerRow,
+                                        uint32_t rowsPerImage);
+
     MaybeError ValidateLinearTextureData(const TextureDataLayout& layout,
                                          uint64_t byteSize,
                                          const Format& format,
diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp
index e70a378..9ac92f3 100644
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@@ -147,6 +147,11 @@
                                                const TextureDataLayout* dataLayout,
                                                const Extent3D* writeSize) {
         DAWN_TRY(ValidateWriteTexture(destination, dataSize, dataLayout, writeSize));
+
+        if (writeSize->width == 0 || writeSize->height == 0 || writeSize->depth == 0) {
+            return {};
+        }
+
         return WriteTextureImpl(destination, data, dataSize, dataLayout, writeSize);
     }
 
diff --git a/src/dawn_native/Texture.cpp b/src/dawn_native/Texture.cpp
index 948c06d..9cf2d68 100644
--- a/src/dawn_native/Texture.cpp
+++ b/src/dawn_native/Texture.cpp
@@ -541,6 +541,19 @@
         return extent;
     }
 
+    Extent3D TextureBase::ClampToMipLevelVirtualSize(uint32_t level,
+                                                     const Origin3D& origin,
+                                                     const Extent3D& extent) const {
+        const Extent3D virtualSizeAtLevel = GetMipLevelVirtualSize(level);
+        uint32_t clampedCopyExtentWidth = (origin.x + extent.width > virtualSizeAtLevel.width)
+                                              ? (virtualSizeAtLevel.width - origin.x)
+                                              : extent.width;
+        uint32_t clampedCopyExtentHeight = (origin.y + extent.height > virtualSizeAtLevel.height)
+                                               ? (virtualSizeAtLevel.height - origin.y)
+                                               : extent.height;
+        return {clampedCopyExtentWidth, clampedCopyExtentHeight, extent.depth};
+    }
+
     TextureViewBase* TextureBase::CreateView(const TextureViewDescriptor* descriptor) {
         return GetDevice()->CreateTextureView(this, descriptor);
     }
diff --git a/src/dawn_native/Texture.h b/src/dawn_native/Texture.h
index ce1bc90..7c476ba 100644
--- a/src/dawn_native/Texture.h
+++ b/src/dawn_native/Texture.h
@@ -91,6 +91,9 @@
         // required to be a multiple of the block size and used in texture sampling.
         Extent3D GetMipLevelPhysicalSize(uint32_t level) const;
         Extent3D GetMipLevelVirtualSize(uint32_t level) const;
+        Extent3D ClampToMipLevelVirtualSize(uint32_t level,
+                                            const Origin3D& origin,
+                                            const Extent3D& extent) const;
 
         // Dawn API
         TextureViewBase* CreateView(const TextureViewDescriptor* descriptor);
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 0f923f4..97c0ad1 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -26,6 +26,7 @@
 #include "dawn_native/metal/RenderPipelineMTL.h"
 #include "dawn_native/metal/SamplerMTL.h"
 #include "dawn_native/metal/TextureMTL.h"
+#include "dawn_native/metal/UtilsMetal.h"
 
 namespace dawn_native { namespace metal {
 
@@ -309,149 +310,6 @@
             }
         };
 
-        struct TextureBufferCopySplit {
-            static constexpr uint32_t kMaxTextureBufferCopyRegions = 3;
-
-            struct CopyInfo {
-                NSUInteger bufferOffset;
-                NSUInteger bytesPerRow;
-                NSUInteger bytesPerImage;
-                Origin3D textureOrigin;
-                Extent3D copyExtent;
-            };
-
-            uint32_t count = 0;
-            std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
-        };
-
-        TextureBufferCopySplit ComputeTextureBufferCopySplit(wgpu::TextureDimension dimension,
-                                                             Origin3D origin,
-                                                             Extent3D copyExtent,
-                                                             Format textureFormat,
-                                                             Extent3D virtualSizeAtLevel,
-                                                             uint64_t bufferSize,
-                                                             uint64_t bufferOffset,
-                                                             uint32_t bytesPerRow,
-                                                             uint32_t rowsPerImage) {
-            TextureBufferCopySplit copy;
-
-            // When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
-            // compute the correct range when checking if the buffer is big enough to contain the
-            // data for the whole copy. Instead of looking at the position of the last texel in the
-            // buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
-            // format.blockHeight) * copySize.depth. For example considering the pixel buffer below
-            // where in memory, each row data (D) of the texture is followed by some padding data
-            // (P):
-            //     |DDDDDDD|PP|
-            //     |DDDDDDD|PP|
-            //     |DDDDDDD|PP|
-            //     |DDDDDDD|PP|
-            //     |DDDDDDA|PP|
-            // The last pixel read will be A, but the driver will think it is the whole last padding
-            // row, causing it to generate an error when the pixel buffer is just big enough.
-
-            // We work around this limitation by detecting when Metal would complain and copy the
-            // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
-            uint32_t dataRowsPerImage = rowsPerImage / textureFormat.blockHeight;
-            uint32_t bytesPerImage = bytesPerRow * dataRowsPerImage;
-
-            // Metal validation layer requires that if the texture's pixel format is a compressed
-            // format, the sourceSize must be a multiple of the pixel format's block size or be
-            // clamped to the edge of the texture if the block extends outside the bounds of a
-            // texture.
-            uint32_t clampedCopyExtentWidth =
-                (origin.x + copyExtent.width > virtualSizeAtLevel.width)
-                    ? (virtualSizeAtLevel.width - origin.x)
-                    : copyExtent.width;
-            uint32_t clampedCopyExtentHeight =
-                (origin.y + copyExtent.height > virtualSizeAtLevel.height)
-                    ? (virtualSizeAtLevel.height - origin.y)
-                    : copyExtent.height;
-
-            ASSERT(dimension == wgpu::TextureDimension::e2D);
-
-            // Check whether buffer size is big enough.
-            bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
-            if (!needWorkaround) {
-                copy.count = 1;
-                copy.copies[0].bufferOffset = bufferOffset;
-                copy.copies[0].bytesPerRow = bytesPerRow;
-                copy.copies[0].bytesPerImage = bytesPerImage;
-                copy.copies[0].textureOrigin = origin;
-                copy.copies[0].copyExtent = {clampedCopyExtentWidth, clampedCopyExtentHeight,
-                                             copyExtent.depth};
-                return copy;
-            }
-
-            uint64_t currentOffset = bufferOffset;
-
-            // Doing all the copy except the last image.
-            if (copyExtent.depth > 1) {
-                copy.copies[copy.count].bufferOffset = currentOffset;
-                copy.copies[copy.count].bytesPerRow = bytesPerRow;
-                copy.copies[copy.count].bytesPerImage = bytesPerImage;
-                copy.copies[copy.count].textureOrigin = origin;
-                copy.copies[copy.count].copyExtent = {
-                    clampedCopyExtentWidth, clampedCopyExtentHeight, copyExtent.depth - 1};
-
-                ++copy.count;
-
-                // Update offset to copy to the last image.
-                currentOffset += (copyExtent.depth - 1) * bytesPerImage;
-            }
-
-            // Doing all the copy in last image except the last row.
-            uint32_t copyBlockRowCount = copyExtent.height / textureFormat.blockHeight;
-            if (copyBlockRowCount > 1) {
-                copy.copies[copy.count].bufferOffset = currentOffset;
-                copy.copies[copy.count].bytesPerRow = bytesPerRow;
-                copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
-                copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
-                                                         origin.z + copyExtent.depth - 1};
-
-                ASSERT(copyExtent.height - textureFormat.blockHeight < virtualSizeAtLevel.height);
-                copy.copies[copy.count].copyExtent = {
-                    clampedCopyExtentWidth, copyExtent.height - textureFormat.blockHeight, 1};
-
-                ++copy.count;
-
-                // Update offset to copy to the last row.
-                currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
-            }
-
-            // Doing the last row copy with the exact number of bytes in last row.
-            // Workaround this issue in a way just like the copy to a 1D texture.
-            uint32_t lastRowDataSize =
-                (copyExtent.width / textureFormat.blockWidth) * textureFormat.blockByteSize;
-            uint32_t lastRowCopyExtentHeight =
-                textureFormat.blockHeight + clampedCopyExtentHeight - copyExtent.height;
-            ASSERT(lastRowCopyExtentHeight <= textureFormat.blockHeight);
-
-            copy.copies[copy.count].bufferOffset = currentOffset;
-            copy.copies[copy.count].bytesPerRow = lastRowDataSize;
-            copy.copies[copy.count].bytesPerImage = lastRowDataSize;
-            copy.copies[copy.count].textureOrigin = {
-                origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
-                origin.z + copyExtent.depth - 1};
-            copy.copies[copy.count].copyExtent = {clampedCopyExtentWidth, lastRowCopyExtentHeight,
-                                                  1};
-            ++copy.count;
-
-            return copy;
-        }
-
-        void EnsureDestinationTextureInitialized(Texture* texture,
-                                                 const TextureCopy& dst,
-                                                 const Extent3D& size) {
-            ASSERT(texture == dst.texture.Get());
-            SubresourceRange range = GetSubresourcesAffectedByCopy(dst, size);
-            if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), size, dst.mipLevel)) {
-                texture->SetIsSubresourceContentInitialized(true, range);
-            } else {
-                texture->EnsureSubresourceContentInitialized(range);
-            }
-        }
-
         // Keeps track of the dirty bind groups so they can be lazily applied when we know the
         // pipeline state.
         // Bind groups may be inherited because bind groups are packed in the buffer /
@@ -745,13 +603,9 @@
 
                     EnsureDestinationTextureInitialized(texture, copy->destination, copy->copySize);
 
-                    const Extent3D virtualSizeAtLevel =
-                        texture->GetMipLevelVirtualSize(dst.mipLevel);
-
                     TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
-                        texture->GetDimension(), dst.origin, copySize, texture->GetFormat(),
-                        virtualSizeAtLevel, buffer->GetSize(), src.offset, src.bytesPerRow,
-                        src.rowsPerImage);
+                        texture, dst.mipLevel, dst.origin, copySize, buffer->GetSize(), src.offset,
+                        src.bytesPerRow, src.rowsPerImage);
 
                     for (uint32_t i = 0; i < splitCopies.count; ++i) {
                         const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
@@ -793,11 +647,9 @@
                     texture->EnsureSubresourceContentInitialized(
                         GetSubresourcesAffectedByCopy(src, copySize));
 
-                    Extent3D virtualSizeAtLevel = texture->GetMipLevelVirtualSize(src.mipLevel);
                     TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
-                        texture->GetDimension(), src.origin, copySize, texture->GetFormat(),
-                        virtualSizeAtLevel, buffer->GetSize(), dst.offset, dst.bytesPerRow,
-                        dst.rowsPerImage);
+                        texture, src.mipLevel, src.origin, copySize, buffer->GetSize(), dst.offset,
+                        dst.bytesPerRow, dst.rowsPerImage);
 
                     for (uint32_t i = 0; i < splitCopies.count; ++i) {
                         const TextureBufferCopySplit::CopyInfo& copyInfo = splitCopies.copies[i];
diff --git a/src/dawn_native/metal/DeviceMTL.h b/src/dawn_native/metal/DeviceMTL.h
index 6baa728..87a697b 100644
--- a/src/dawn_native/metal/DeviceMTL.h
+++ b/src/dawn_native/metal/DeviceMTL.h
@@ -18,6 +18,7 @@
 #include "dawn_native/dawn_platform.h"
 
 #include "common/Serial.h"
+#include "dawn_native/Commands.h"
 #include "dawn_native/Device.h"
 #include "dawn_native/metal/CommandRecordingContext.h"
 #include "dawn_native/metal/Forward.h"
@@ -63,6 +64,10 @@
                                            BufferBase* destination,
                                            uint64_t destinationOffset,
                                            uint64_t size) override;
+        MaybeError CopyFromStagingToTexture(StagingBufferBase* source,
+                                            const TextureDataLayout& dataLayout,
+                                            TextureCopy* dst,
+                                            const Extent3D copySize);
 
       private:
         Device(AdapterBase* adapter, id<MTLDevice> mtlDevice, const DeviceDescriptor* descriptor);
diff --git a/src/dawn_native/metal/DeviceMTL.mm b/src/dawn_native/metal/DeviceMTL.mm
index 31ae40b..d5dbbc5 100644
--- a/src/dawn_native/metal/DeviceMTL.mm
+++ b/src/dawn_native/metal/DeviceMTL.mm
@@ -16,6 +16,7 @@
 
 #include "dawn_native/BackendConnection.h"
 #include "dawn_native/BindGroupLayout.h"
+#include "dawn_native/Commands.h"
 #include "dawn_native/ErrorData.h"
 #include "dawn_native/metal/BindGroupLayoutMTL.h"
 #include "dawn_native/metal/BindGroupMTL.h"
@@ -30,6 +31,7 @@
 #include "dawn_native/metal/StagingBufferMTL.h"
 #include "dawn_native/metal/SwapChainMTL.h"
 #include "dawn_native/metal/TextureMTL.h"
+#include "dawn_native/metal/UtilsMetal.h"
 #include "dawn_platform/DawnPlatform.h"
 #include "dawn_platform/tracing/TraceEvent.h"
 
@@ -266,6 +268,54 @@
         return {};
     }
 
+    MaybeError Device::CopyFromStagingToTexture(StagingBufferBase* source,
+                                                const TextureDataLayout& dataLayout,
+                                                TextureCopy* dst,
+                                                const Extent3D copySize) {
+        Texture* texture = ToBackend(dst->texture.Get());
+
+        // This function assumes data is perfectly aligned. Otherwise, it might be necessary
+        // to split copying to several stages: see ComputeTextureBufferCopySplit.
+        uint32_t blockSize = dst->texture->GetFormat().blockByteSize;
+        uint32_t blockWidth = dst->texture->GetFormat().blockWidth;
+        uint32_t blockHeight = dst->texture->GetFormat().blockHeight;
+        ASSERT(dataLayout.rowsPerImage == (copySize.height));
+        ASSERT(dataLayout.bytesPerRow == (copySize.width) / blockWidth * blockSize);
+
+        // TODO(tommek@google.com): Add tests for this in TextureZeroInitTests.
+        EnsureDestinationTextureInitialized(texture, *dst, copySize);
+
+        // Metal validation layer requires that if the texture's pixel format is a compressed
+        // format, the sourceSize must be a multiple of the pixel format's block size or be
+        // clamped to the edge of the texture if the block extends outside the bounds of a
+        // texture.
+        const Extent3D clampedSize =
+            texture->ClampToMipLevelVirtualSize(dst->mipLevel, dst->origin, copySize);
+        const uint32_t copyBaseLayer = dst->origin.z;
+        const uint32_t copyLayerCount = copySize.depth;
+        const uint64_t bytesPerImage =
+            dataLayout.rowsPerImage * dataLayout.bytesPerRow / blockHeight;
+
+        uint64_t bufferOffset = dataLayout.offset;
+        for (uint32_t copyLayer = copyBaseLayer; copyLayer < copyBaseLayer + copyLayerCount;
+             ++copyLayer) {
+            [GetPendingCommandContext()->EnsureBlit()
+                     copyFromBuffer:ToBackend(source)->GetBufferHandle()
+                       sourceOffset:bufferOffset
+                  sourceBytesPerRow:dataLayout.bytesPerRow
+                sourceBytesPerImage:bytesPerImage
+                         sourceSize:MTLSizeMake(clampedSize.width, clampedSize.height, 1)
+                          toTexture:texture->GetMTLTexture()
+                   destinationSlice:copyLayer
+                   destinationLevel:dst->mipLevel
+                  destinationOrigin:MTLOriginMake(dst->origin.x, dst->origin.y, 0)];
+
+            bufferOffset += bytesPerImage;
+        }
+
+        return {};
+    }
+
     TextureBase* Device::CreateTextureWrappingIOSurface(const ExternalImageDescriptor* descriptor,
                                                         IOSurfaceRef ioSurface,
                                                         uint32_t plane) {
diff --git a/src/dawn_native/metal/QueueMTL.h b/src/dawn_native/metal/QueueMTL.h
index 2dd718e..bda47eb 100644
--- a/src/dawn_native/metal/QueueMTL.h
+++ b/src/dawn_native/metal/QueueMTL.h
@@ -28,6 +28,11 @@
 
       private:
         MaybeError SubmitImpl(uint32_t commandCount, CommandBufferBase* const* commands) override;
+        MaybeError WriteTextureImpl(const TextureCopyView* destination,
+                                    const void* data,
+                                    size_t dataSize,
+                                    const TextureDataLayout* dataLayout,
+                                    const Extent3D* writeSize) override;
     };
 
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/metal/QueueMTL.mm b/src/dawn_native/metal/QueueMTL.mm
index ffe6ca1..c0245b1 100644
--- a/src/dawn_native/metal/QueueMTL.mm
+++ b/src/dawn_native/metal/QueueMTL.mm
@@ -14,12 +14,62 @@
 
 #include "dawn_native/metal/QueueMTL.h"
 
+#include "common/Math.h"
+#include "dawn_native/Buffer.h"
+#include "dawn_native/CommandValidation.h"
+#include "dawn_native/Commands.h"
+#include "dawn_native/DynamicUploader.h"
 #include "dawn_native/metal/CommandBufferMTL.h"
 #include "dawn_native/metal/DeviceMTL.h"
 #include "dawn_platform/DawnPlatform.h"
 #include "dawn_platform/tracing/TraceEvent.h"
 
 namespace dawn_native { namespace metal {
+    namespace {
+        ResultOrError<UploadHandle> UploadTextureDataAligningBytesPerRow(
+            DeviceBase* device,
+            const void* data,
+            size_t dataSize,
+            uint32_t alignedBytesPerRow,
+            uint32_t alignedRowsPerImage,
+            const TextureDataLayout* dataLayout,
+            const Format& textureFormat,
+            const Extent3D* writeSize) {
+            uint32_t newDataSize = ComputeRequiredBytesInCopy(
+                textureFormat, *writeSize, alignedBytesPerRow, alignedRowsPerImage);
+
+            UploadHandle uploadHandle;
+            DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
+                                              newDataSize, device->GetPendingCommandSerial()));
+            ASSERT(uploadHandle.mappedBuffer != nullptr);
+
+            // TODO(tommek@google.com): Add an optimization to do a single memcpy if the data
+            // is already correctly packed.
+            uint8_t* dstPointer = static_cast<uint8_t*>(uploadHandle.mappedBuffer);
+            const uint8_t* srcPointer = static_cast<const uint8_t*>(data);
+            srcPointer += dataLayout->offset;
+
+            uint32_t alignedRowsPerImageInBlock = alignedRowsPerImage / textureFormat.blockHeight;
+            uint32_t dataRowsPerImageInBlock = dataLayout->rowsPerImage / textureFormat.blockHeight;
+            if (dataRowsPerImageInBlock == 0) {
+                dataRowsPerImageInBlock = writeSize->height / textureFormat.blockHeight;
+            }
+
+            ASSERT(dataRowsPerImageInBlock >= alignedRowsPerImageInBlock);
+            uint64_t imageAdditionalStride =
+                dataLayout->bytesPerRow * (dataRowsPerImageInBlock - alignedRowsPerImageInBlock);
+            for (uint32_t d = 0; d < writeSize->depth; ++d) {
+                for (uint32_t h = 0; h < alignedRowsPerImageInBlock; ++h) {
+                    memcpy(dstPointer, srcPointer, alignedBytesPerRow);
+                    dstPointer += alignedBytesPerRow;
+                    srcPointer += dataLayout->bytesPerRow;
+                }
+                srcPointer += imageAdditionalStride;
+            }
+
+            return uploadHandle;
+        }
+    }
 
     Queue::Queue(Device* device) : QueueBase(device) {
     }
@@ -39,4 +89,41 @@
         return {};
     }
 
+    // We don't write from the CPU to the texture directly which can be done in Metal using the
+    // replaceRegion function, because the function requires a non-private storage mode and Dawn
+    // sets the private storage mode by default for all textures except IOSurfaces on macOS.
+    MaybeError Queue::WriteTextureImpl(const TextureCopyView* destination,
+                                       const void* data,
+                                       size_t dataSize,
+                                       const TextureDataLayout* dataLayout,
+                                       const Extent3D* writeSize) {
+        uint32_t blockSize = destination->texture->GetFormat().blockByteSize;
+        uint32_t blockWidth = destination->texture->GetFormat().blockWidth;
+        // We are only copying the part of the data that will appear in the texture.
+        // Note that validating texture copy range ensures that writeSize->width and
+        // writeSize->height are multiples of blockWidth and blockHeight respectively.
+        uint32_t alignedBytesPerRow = (writeSize->width) / blockWidth * blockSize;
+        uint32_t alignedRowsPerImage = writeSize->height;
+
+        UploadHandle uploadHandle;
+        DAWN_TRY_ASSIGN(uploadHandle,
+                        UploadTextureDataAligningBytesPerRow(
+                            GetDevice(), data, dataSize, alignedBytesPerRow, alignedRowsPerImage,
+                            dataLayout, destination->texture->GetFormat(), writeSize));
+
+        TextureDataLayout passDataLayout = *dataLayout;
+        passDataLayout.offset = uploadHandle.startOffset;
+        passDataLayout.bytesPerRow = alignedBytesPerRow;
+        passDataLayout.rowsPerImage = alignedRowsPerImage;
+
+        TextureCopy textureCopy;
+        textureCopy.texture = destination->texture;
+        textureCopy.mipLevel = destination->mipLevel;
+        textureCopy.origin = destination->origin;
+
+        return ToBackend(GetDevice())
+            ->CopyFromStagingToTexture(uploadHandle.stagingBuffer, passDataLayout, &textureCopy,
+                                       *writeSize);
+    }
+
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/metal/UtilsMetal.h b/src/dawn_native/metal/UtilsMetal.h
index 091d828..fe0e228 100644
--- a/src/dawn_native/metal/UtilsMetal.h
+++ b/src/dawn_native/metal/UtilsMetal.h
@@ -16,6 +16,8 @@
 #define DAWNNATIVE_METAL_UTILSMETAL_H_
 
 #include "dawn_native/dawn_platform.h"
+#include "dawn_native/metal/DeviceMTL.h"
+#include "dawn_native/metal/TextureMTL.h"
 
 #import <Metal/Metal.h>
 
@@ -23,6 +25,34 @@
 
     MTLCompareFunction ToMetalCompareFunction(wgpu::CompareFunction compareFunction);
 
+    struct TextureBufferCopySplit {
+        static constexpr uint32_t kMaxTextureBufferCopyRegions = 3;
+
+        struct CopyInfo {
+            NSUInteger bufferOffset;
+            NSUInteger bytesPerRow;
+            NSUInteger bytesPerImage;
+            Origin3D textureOrigin;
+            Extent3D copyExtent;
+        };
+
+        uint32_t count = 0;
+        std::array<CopyInfo, kMaxTextureBufferCopyRegions> copies;
+    };
+
+    TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
+                                                         uint32_t mipLevel,
+                                                         Origin3D origin,
+                                                         Extent3D copyExtent,
+                                                         uint64_t bufferSize,
+                                                         uint64_t bufferOffset,
+                                                         uint32_t bytesPerRow,
+                                                         uint32_t rowsPerImage);
+
+    void EnsureDestinationTextureInitialized(Texture* texture,
+                                             const TextureCopy& dst,
+                                             const Extent3D& size);
+
 }}  // namespace dawn_native::metal
 
 #endif  // DAWNNATIVE_METAL_UTILSMETAL_H_
diff --git a/src/dawn_native/metal/UtilsMetal.mm b/src/dawn_native/metal/UtilsMetal.mm
index aeb4f7a..13b4668 100644
--- a/src/dawn_native/metal/UtilsMetal.mm
+++ b/src/dawn_native/metal/UtilsMetal.mm
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "dawn_native/metal/UtilsMetal.h"
+#include "dawn_native/CommandBuffer.h"
 
 #include "common/Assert.h"
 
@@ -41,4 +42,126 @@
         }
     }
 
+    TextureBufferCopySplit ComputeTextureBufferCopySplit(const Texture* texture,
+                                                         uint32_t mipLevel,
+                                                         Origin3D origin,
+                                                         Extent3D copyExtent,
+                                                         uint64_t bufferSize,
+                                                         uint64_t bufferOffset,
+                                                         uint32_t bytesPerRow,
+                                                         uint32_t rowsPerImage) {
+        TextureBufferCopySplit copy;
+        const Format textureFormat = texture->GetFormat();
+
+        // When copying textures from/to an unpacked buffer, the Metal validation layer doesn't
+        // compute the correct range when checking if the buffer is big enough to contain the
+        // data for the whole copy. Instead of looking at the position of the last texel in the
+        // buffer, it computes the volume of the 3D box with bytesPerRow * (rowsPerImage /
+        // format.blockHeight) * copySize.depth. For example considering the pixel buffer below
+        // where in memory, each row data (D) of the texture is followed by some padding data
+        // (P):
+        //     |DDDDDDD|PP|
+        //     |DDDDDDD|PP|
+        //     |DDDDDDD|PP|
+        //     |DDDDDDD|PP|
+        //     |DDDDDDA|PP|
+        // The last pixel read will be A, but the driver will think it is the whole last padding
+        // row, causing it to generate an error when the pixel buffer is just big enough.
+
+        // We work around this limitation by detecting when Metal would complain and copy the
+        // last image and row separately using tight sourceBytesPerRow or sourceBytesPerImage.
+        uint32_t dataRowsPerImage = rowsPerImage / textureFormat.blockHeight;
+        uint32_t bytesPerImage = bytesPerRow * dataRowsPerImage;
+
+        // Metal validation layer requires that if the texture's pixel format is a compressed
+        // format, the sourceSize must be a multiple of the pixel format's block size or be
+        // clamped to the edge of the texture if the block extends outside the bounds of a
+        // texture.
+        const Extent3D clampedCopyExtent =
+            texture->ClampToMipLevelVirtualSize(mipLevel, origin, copyExtent);
+
+        ASSERT(texture->GetDimension() == wgpu::TextureDimension::e2D);
+
+        // Check whether buffer size is big enough.
+        bool needWorkaround = bufferSize - bufferOffset < bytesPerImage * copyExtent.depth;
+        if (!needWorkaround) {
+            copy.count = 1;
+            copy.copies[0].bufferOffset = bufferOffset;
+            copy.copies[0].bytesPerRow = bytesPerRow;
+            copy.copies[0].bytesPerImage = bytesPerImage;
+            copy.copies[0].textureOrigin = origin;
+            copy.copies[0].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
+                                         copyExtent.depth};
+            return copy;
+        }
+
+        uint64_t currentOffset = bufferOffset;
+
+        // Doing all the copy except the last image.
+        if (copyExtent.depth > 1) {
+            copy.copies[copy.count].bufferOffset = currentOffset;
+            copy.copies[copy.count].bytesPerRow = bytesPerRow;
+            copy.copies[copy.count].bytesPerImage = bytesPerImage;
+            copy.copies[copy.count].textureOrigin = origin;
+            copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, clampedCopyExtent.height,
+                                                  copyExtent.depth - 1};
+
+            ++copy.count;
+
+            // Update offset to copy to the last image.
+            currentOffset += (copyExtent.depth - 1) * bytesPerImage;
+        }
+
+        // Doing all the copy in last image except the last row.
+        uint32_t copyBlockRowCount = copyExtent.height / textureFormat.blockHeight;
+        if (copyBlockRowCount > 1) {
+            copy.copies[copy.count].bufferOffset = currentOffset;
+            copy.copies[copy.count].bytesPerRow = bytesPerRow;
+            copy.copies[copy.count].bytesPerImage = bytesPerRow * (copyBlockRowCount - 1);
+            copy.copies[copy.count].textureOrigin = {origin.x, origin.y,
+                                                     origin.z + copyExtent.depth - 1};
+
+            ASSERT(copyExtent.height - textureFormat.blockHeight <
+                   texture->GetMipLevelVirtualSize(mipLevel).height);
+            copy.copies[copy.count].copyExtent = {clampedCopyExtent.width,
+                                                  copyExtent.height - textureFormat.blockHeight, 1};
+
+            ++copy.count;
+
+            // Update offset to copy to the last row.
+            currentOffset += (copyBlockRowCount - 1) * bytesPerRow;
+        }
+
+        // Doing the last row copy with the exact number of bytes in last row.
+        // Workaround this issue in a way just like the copy to a 1D texture.
+        uint32_t lastRowDataSize =
+            (copyExtent.width / textureFormat.blockWidth) * textureFormat.blockByteSize;
+        uint32_t lastRowCopyExtentHeight =
+            textureFormat.blockHeight + clampedCopyExtent.height - copyExtent.height;
+        ASSERT(lastRowCopyExtentHeight <= textureFormat.blockHeight);
+
+        copy.copies[copy.count].bufferOffset = currentOffset;
+        copy.copies[copy.count].bytesPerRow = lastRowDataSize;
+        copy.copies[copy.count].bytesPerImage = lastRowDataSize;
+        copy.copies[copy.count].textureOrigin = {
+            origin.x, origin.y + copyExtent.height - textureFormat.blockHeight,
+            origin.z + copyExtent.depth - 1};
+        copy.copies[copy.count].copyExtent = {clampedCopyExtent.width, lastRowCopyExtentHeight, 1};
+        ++copy.count;
+
+        return copy;
+    }
+
+    void EnsureDestinationTextureInitialized(Texture* texture,
+                                             const TextureCopy& dst,
+                                             const Extent3D& size) {
+        ASSERT(texture == dst.texture.Get());
+        SubresourceRange range = GetSubresourcesAffectedByCopy(dst, size);
+        if (IsCompleteSubresourceCopiedTo(dst.texture.Get(), size, dst.mipLevel)) {
+            texture->SetIsSubresourceContentInitialized(true, range);
+        } else {
+            texture->EnsureSubresourceContentInitialized(range);
+        }
+    }
+
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_wire/client/Queue.cpp b/src/dawn_wire/client/Queue.cpp
index f9ee04e..ad11673 100644
--- a/src/dawn_wire/client/Queue.cpp
+++ b/src/dawn_wire/client/Queue.cpp
@@ -80,6 +80,7 @@
         QueueWriteTextureInternalCmd cmd;
         cmd.queueId = id;
         cmd.destination = destination;
+        cmd.data = static_cast<const uint8_t*>(data);
         cmd.dataSize = dataSize;
         cmd.dataLayout = dataLayout;
         cmd.writeSize = writeSize;
diff --git a/src/tests/end2end/CompressedTextureFormatTests.cpp b/src/tests/end2end/CompressedTextureFormatTests.cpp
index dd0302e..d1a6cd0 100644
--- a/src/tests/end2end/CompressedTextureFormatTests.cpp
+++ b/src/tests/end2end/CompressedTextureFormatTests.cpp
@@ -47,51 +47,54 @@
         return mIsBCFormatSupported;
     }
 
-    // Copy the compressed texture data into the destination texture as is specified in copyConfig.
-    void InitializeDataInCompressedTexture(wgpu::Texture bcCompressedTexture,
-                                           const CopyConfig& copyConfig) {
-        ASSERT(IsBCFormatSupported());
-
-        // Compute the upload buffer size with bytesPerRowAlignment and the copy region.
-        const wgpu::Extent3D textureSize = copyConfig.textureDescriptor.size;
-        uint32_t actualWidthAtLevel = textureSize.width >> copyConfig.viewMipmapLevel;
-        uint32_t actualHeightAtLevel = textureSize.height >> copyConfig.viewMipmapLevel;
-        uint32_t copyWidthInBlockAtLevel =
-            (actualWidthAtLevel + kBCBlockWidthInTexels - 1) / kBCBlockWidthInTexels;
-        uint32_t copyHeightInBlockAtLevel =
-            (actualHeightAtLevel + kBCBlockHeightInTexels - 1) / kBCBlockHeightInTexels;
-        uint32_t bufferRowPitchInBytes = 0;
+    // Compute the upload data for the copyConfig.
+    std::vector<uint8_t> UploadData(const CopyConfig& copyConfig) {
+        uint32_t copyWidthInBlock = copyConfig.copyExtent3D.width / kBCBlockWidthInTexels;
+        uint32_t copyHeightInBlock = copyConfig.copyExtent3D.height / kBCBlockHeightInTexels;
+        uint32_t rowPitchInBytes = 0;
         if (copyConfig.bytesPerRowAlignment != 0) {
-            bufferRowPitchInBytes = copyConfig.bytesPerRowAlignment;
+            rowPitchInBytes = copyConfig.bytesPerRowAlignment;
         } else {
-            bufferRowPitchInBytes =
-                copyWidthInBlockAtLevel *
-                utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format);
+            rowPitchInBytes = copyWidthInBlock *
+                              utils::GetTexelBlockSizeInBytes(copyConfig.textureDescriptor.format);
         }
-        uint32_t copyBytesPerImage = bufferRowPitchInBytes * copyHeightInBlockAtLevel;
+        uint32_t copyRowsPerImageInBlock = copyConfig.rowsPerImage / kBCBlockHeightInTexels;
+        if (copyRowsPerImageInBlock == 0) {
+            copyRowsPerImageInBlock = copyHeightInBlock;
+        }
+        uint32_t copyBytesPerImage = rowPitchInBytes * copyRowsPerImageInBlock;
         uint32_t uploadBufferSize =
             copyConfig.bufferOffset + copyBytesPerImage * copyConfig.copyExtent3D.depth;
 
-        // Fill uploadData with the pre-prepared one-block compressed texture data.
-        std::vector<uint8_t> uploadData(uploadBufferSize, 0);
+        // Fill data with the pre-prepared one-block compressed texture data.
+        std::vector<uint8_t> data(uploadBufferSize, 0);
         std::vector<uint8_t> oneBlockCompressedTextureData =
             GetOneBlockBCFormatTextureData(copyConfig.textureDescriptor.format);
         for (uint32_t layer = 0; layer < copyConfig.copyExtent3D.depth; ++layer) {
-            for (uint32_t h = 0; h < copyHeightInBlockAtLevel; ++h) {
-                for (uint32_t w = 0; w < copyWidthInBlockAtLevel; ++w) {
-                    uint32_t uploadBufferOffset =
-                        copyConfig.bufferOffset + copyBytesPerImage * layer +
-                        bufferRowPitchInBytes * h + oneBlockCompressedTextureData.size() * w;
-                    std::memcpy(&uploadData[uploadBufferOffset],
-                                oneBlockCompressedTextureData.data(),
+            for (uint32_t h = 0; h < copyHeightInBlock; ++h) {
+                for (uint32_t w = 0; w < copyWidthInBlock; ++w) {
+                    uint32_t uploadBufferOffset = copyConfig.bufferOffset +
+                                                  copyBytesPerImage * layer + rowPitchInBytes * h +
+                                                  oneBlockCompressedTextureData.size() * w;
+                    std::memcpy(&data[uploadBufferOffset], oneBlockCompressedTextureData.data(),
                                 oneBlockCompressedTextureData.size() * sizeof(uint8_t));
                 }
             }
         }
 
+        return data;
+    }
+
+    // Copy the compressed texture data into the destination texture as is specified in copyConfig.
+    void InitializeDataInCompressedTexture(wgpu::Texture bcCompressedTexture,
+                                           const CopyConfig& copyConfig) {
+        ASSERT(IsBCFormatSupported());
+
+        std::vector<uint8_t> data = UploadData(copyConfig);
+
         // Copy texture data from a staging buffer to the destination texture.
-        wgpu::Buffer stagingBuffer = utils::CreateBufferFromData(
-            device, uploadData.data(), uploadBufferSize, wgpu::BufferUsage::CopySrc);
+        wgpu::Buffer stagingBuffer = utils::CreateBufferFromData(device, data.data(), data.size(),
+                                                                 wgpu::BufferUsage::CopySrc);
         wgpu::BufferCopyView bufferCopyView =
             utils::CreateBufferCopyView(stagingBuffer, copyConfig.bufferOffset,
                                         copyConfig.bytesPerRowAlignment, copyConfig.rowsPerImage);
@@ -176,7 +179,6 @@
                                             const std::vector<RGBA8>& expected) {
         ASSERT(IsBCFormatSupported());
 
-        ASSERT(expected.size() == renderTargetSize.width * renderTargetSize.height);
         utils::BasicRenderPass renderPass =
             utils::CreateBasicRenderPass(device, renderTargetSize.width, renderTargetSize.height);
 
@@ -204,6 +206,10 @@
 
         wgpu::Texture bcTexture = CreateTextureWithCompressedData(config);
 
+        VerifyBCTexture(config, bcTexture);
+    }
+
+    void VerifyBCTexture(const CopyConfig& config, wgpu::Texture bcTexture) {
         wgpu::RenderPipeline renderPipeline = CreateRenderPipelineForTest();
 
         wgpu::Extent3D virtualSizeAtLevel = GetVirtualSizeAtLevel(config);
@@ -221,7 +227,7 @@
         noPaddingExtent3D.depth = 1u;
 
         std::vector<RGBA8> expectedData =
-            GetExpectedData(config.textureDescriptor.format, virtualSizeAtLevel);
+            GetExpectedData(config.textureDescriptor.format, noPaddingExtent3D);
 
         wgpu::Origin3D firstLayerCopyOrigin = {config.copyOrigin3D.x, config.copyOrigin3D.y, 0};
         for (uint32_t layer = config.copyOrigin3D.z;
@@ -1067,3 +1073,102 @@
                       OpenGLBackend(),
                       VulkanBackend(),
                       VulkanBackend({"use_temporary_buffer_in_texture_to_texture_copy"}));
+
+class CompressedTextureWriteTextureTest : public CompressedTextureBCFormatTest {
+  protected:
+    void SetUp() override {
+        CompressedTextureBCFormatTest::SetUp();
+        DAWN_SKIP_TEST_IF(!IsBCFormatSupported());
+    }
+
+    // Write the compressed texture data into the destination texture as is specified in copyConfig.
+    void WriteToCompressedTexture(wgpu::Texture bcCompressedTexture, const CopyConfig& copyConfig) {
+        ASSERT(IsBCFormatSupported());
+
+        std::vector<uint8_t> data = UploadData(copyConfig);
+
+        wgpu::TextureDataLayout textureDataLayout = utils::CreateTextureDataLayout(
+            copyConfig.bufferOffset, copyConfig.bytesPerRowAlignment, copyConfig.rowsPerImage);
+
+        wgpu::TextureCopyView textureCopyView = utils::CreateTextureCopyView(
+            bcCompressedTexture, copyConfig.viewMipmapLevel, copyConfig.copyOrigin3D);
+
+        queue.WriteTexture(&textureCopyView, data.data(), data.size(), &textureDataLayout,
+                           &copyConfig.copyExtent3D);
+    }
+
+    // Run the tests that write pre-prepared BC format data into a BC texture and verifies if we
+    // can render correctly with the pixel values sampled from the BC texture.
+    void TestWriteRegionIntoBCFormatTextures(const CopyConfig& config) {
+        ASSERT(IsBCFormatSupported());
+
+        wgpu::Texture bcTexture = device.CreateTexture(&config.textureDescriptor);
+        WriteToCompressedTexture(bcTexture, config);
+
+        VerifyBCTexture(config, bcTexture);
+    }
+};
+
+// Test WriteTexture to a 2D texture with all parameters non-default
+// with BC formats.
+TEST_P(CompressedTextureWriteTextureTest, Basic) {
+    CopyConfig config;
+    config.textureDescriptor.usage = kDefaultBCFormatTextureUsage;
+    config.textureDescriptor.size = {20, 24, 1};
+
+    config.copyOrigin3D = {4, 8, 0};
+    config.copyExtent3D = {12, 16, 1};
+    config.bytesPerRowAlignment = 511;
+    config.rowsPerImage = 20;
+
+    for (wgpu::TextureFormat format : kBCFormats) {
+        config.textureDescriptor.format = format;
+        TestWriteRegionIntoBCFormatTextures(config);
+    }
+}
+
+// Test writing to multiple 2D texture array layers with BC formats.
+TEST_P(CompressedTextureWriteTextureTest, WriteMultiple2DArrayLayers) {
+    CopyConfig config;
+    config.textureDescriptor.usage = kDefaultBCFormatTextureUsage;
+    config.textureDescriptor.size = {20, 24, 9};
+
+    config.copyOrigin3D = {4, 8, 3};
+    config.copyExtent3D = {12, 16, 6};
+    config.bytesPerRowAlignment = 511;
+    config.rowsPerImage = 20;
+
+    for (wgpu::TextureFormat format : kBCFormats) {
+        config.textureDescriptor.format = format;
+        TestWriteRegionIntoBCFormatTextures(config);
+    }
+}
+
+// Test BC format write textures where the physical size of the destination
+// subresource is different from its virtual size.
+TEST_P(CompressedTextureWriteTextureTest,
+       WriteIntoSubresourceWithPhysicalSizeNotEqualToVirtualSize) {
+    // Texture virtual size at mipLevel 2 will be {15, 15, 1} while the physical
+    // size will be {16, 16, 1}.
+    // Setting copyExtent.width or copyExtent.height to 16 fits in
+    // the texture physical size, but doesn't fit in the virtual size.
+    for (unsigned int w : {12, 16}) {
+        for (unsigned int h : {12, 16}) {
+            for (wgpu::TextureFormat format : kBCFormats) {
+                CopyConfig config;
+                config.textureDescriptor.usage = kDefaultBCFormatTextureUsage;
+                config.textureDescriptor.size = {60, 60, 1};
+                config.textureDescriptor.mipLevelCount = 4;
+                config.viewMipmapLevel = 2;
+
+                config.copyOrigin3D = {0, 0, 0};
+                config.copyExtent3D = {w, h, 1};
+                config.bytesPerRowAlignment = 256;
+                config.textureDescriptor.format = format;
+                TestWriteRegionIntoBCFormatTextures(config);
+            }
+        }
+    }
+}
+
+DAWN_INSTANTIATE_TEST(CompressedTextureWriteTextureTest, MetalBackend());
diff --git a/src/tests/end2end/CopyTests.cpp b/src/tests/end2end/CopyTests.cpp
index 4b9eac7..78ebf83 100644
--- a/src/tests/end2end/CopyTests.cpp
+++ b/src/tests/end2end/CopyTests.cpp
@@ -37,7 +37,7 @@
         uint32_t rowsPerImage;
     };
 
-    static std::vector<RGBA8> GetExpectedTextureData(const utils::BufferTextureCopyLayout& layout) {
+    static std::vector<RGBA8> GetExpectedTextureData(const utils::TextureDataCopyLayout& layout) {
         std::vector<RGBA8> textureData(layout.texelBlockCount);
         for (uint32_t layer = 0; layer < layout.mipSize.depth; ++layer) {
             const uint32_t texelIndexOffsetPerSlice = layout.texelBlocksPerImage * layer;
@@ -97,8 +97,8 @@
         descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
         wgpu::Texture texture = device.CreateTexture(&descriptor);
 
-        const utils::BufferTextureCopyLayout copyLayout =
-            utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
+        const utils::TextureDataCopyLayout copyLayout =
+            utils::GetTextureDataCopyLayoutForTexture2DAtLevel(
                 kTextureFormat, textureSpec.textureSize, textureSpec.level,
                 bufferSpec.rowsPerImage);
 
@@ -205,8 +205,8 @@
 
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
 
-        const utils::BufferTextureCopyLayout copyLayout =
-            utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
+        const utils::TextureDataCopyLayout copyLayout =
+            utils::GetTextureDataCopyLayoutForTexture2DAtLevel(
                 kTextureFormat, textureSpec.textureSize, textureSpec.level,
                 bufferSpec.rowsPerImage);
 
@@ -281,8 +281,8 @@
 
         // Create an upload buffer and use it to populate the current slice of the texture in
         // `level` mip level
-        const utils::BufferTextureCopyLayout copyLayout =
-            utils::GetBufferTextureCopyLayoutForTexture2DAtLevel(
+        const utils::TextureDataCopyLayout copyLayout =
+            utils::GetTextureDataCopyLayoutForTexture2DAtLevel(
                 kTextureFormat,
                 {srcSpec.textureSize.width, srcSpec.textureSize.height, copySize.depth},
                 srcSpec.level, 0);
diff --git a/src/tests/end2end/QueueTests.cpp b/src/tests/end2end/QueueTests.cpp
index b3f57a4..a145807 100644
--- a/src/tests/end2end/QueueTests.cpp
+++ b/src/tests/end2end/QueueTests.cpp
@@ -19,6 +19,10 @@
 
 #include "tests/DawnTest.h"
 
+#include "common/Math.h"
+#include "utils/TextureFormatUtils.h"
+#include "utils/WGPUHelpers.h"
+
 class QueueTests : public DawnTest {};
 
 // Test that GetDefaultQueue always returns the same object.
@@ -171,3 +175,319 @@
                       MetalBackend(),
                       OpenGLBackend(),
                       VulkanBackend());
+
+class QueueWriteTextureTests : public DawnTest {
+  protected:
+    static constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::RGBA8Unorm;
+
+    struct TextureSpec {
+        wgpu::Origin3D copyOrigin;
+        wgpu::Extent3D textureSize;
+        uint32_t level;
+    };
+
+    struct DataSpec {
+        uint64_t size;
+        uint64_t offset;
+        uint32_t bytesPerRow;
+        uint32_t rowsPerImage;
+    };
+
+    static DataSpec MinimumDataSpec(wgpu::Extent3D writeSize,
+                                    uint32_t bytesPerRow = 0,
+                                    uint32_t rowsPerImage = 0) {
+        if (bytesPerRow == 0) {
+            bytesPerRow = writeSize.width * utils::GetTexelBlockSizeInBytes(kTextureFormat);
+        }
+        if (rowsPerImage == 0) {
+            rowsPerImage = writeSize.height;
+        }
+        uint32_t totalDataSize =
+            utils::RequiredBytesInCopy(bytesPerRow, rowsPerImage, writeSize, kTextureFormat);
+        return {totalDataSize, 0, bytesPerRow, rowsPerImage};
+    }
+
+    static void PackTextureData(const uint8_t* srcData,
+                                uint32_t width,
+                                uint32_t height,
+                                uint32_t srcBytesPerRow,
+                                RGBA8* dstData,
+                                uint32_t dstTexelPerRow,
+                                uint32_t texelBlockSize) {
+        for (uint64_t y = 0; y < height; ++y) {
+            for (uint64_t x = 0; x < width; ++x) {
+                uint64_t src = x * texelBlockSize + y * srcBytesPerRow;
+                uint64_t dst = x + y * dstTexelPerRow;
+
+                dstData[dst] = {srcData[src], srcData[src + 1], srcData[src + 2], srcData[src + 3]};
+            }
+        }
+    }
+
+    static void FillData(uint8_t* data, size_t count) {
+        for (size_t i = 0; i < count; ++i) {
+            data[i] = static_cast<uint8_t>(i % 253);
+        }
+    }
+
+    void DoTest(const TextureSpec& textureSpec,
+                const DataSpec& dataSpec,
+                const wgpu::Extent3D& copySize) {
+        // Create data of size `size` and populate it
+        std::vector<uint8_t> data(dataSpec.size);
+        FillData(data.data(), data.size());
+
+        // Create a texture that is `width` x `height` with (`level` + 1) mip levels.
+        wgpu::TextureDescriptor descriptor = {};
+        descriptor.dimension = wgpu::TextureDimension::e2D;
+        descriptor.size = textureSpec.textureSize;
+        descriptor.format = kTextureFormat;
+        descriptor.mipLevelCount = textureSpec.level + 1;
+        descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc;
+        wgpu::Texture texture = device.CreateTexture(&descriptor);
+
+        wgpu::TextureDataLayout textureDataLayout = utils::CreateTextureDataLayout(
+            dataSpec.offset, dataSpec.bytesPerRow, dataSpec.rowsPerImage);
+
+        wgpu::TextureCopyView textureCopyView =
+            utils::CreateTextureCopyView(texture, textureSpec.level, textureSpec.copyOrigin);
+
+        queue.WriteTexture(&textureCopyView, data.data(), dataSpec.size, &textureDataLayout,
+                           &copySize);
+
+        const uint32_t bytesPerTexel = utils::GetTexelBlockSizeInBytes(kTextureFormat);
+        wgpu::Extent3D mipSize = {textureSpec.textureSize.width >> textureSpec.level,
+                                  textureSpec.textureSize.height >> textureSpec.level,
+                                  textureSpec.textureSize.depth};
+        uint32_t alignedBytesPerRow = Align(dataSpec.bytesPerRow, bytesPerTexel);
+        uint32_t appliedRowsPerImage =
+            dataSpec.rowsPerImage > 0 ? dataSpec.rowsPerImage : mipSize.height;
+        uint32_t bytesPerImage = dataSpec.bytesPerRow * appliedRowsPerImage;
+
+        const uint32_t maxArrayLayer = textureSpec.copyOrigin.z + copySize.depth;
+
+        uint64_t dataOffset = dataSpec.offset;
+        const uint32_t texelCountLastLayer =
+            (alignedBytesPerRow / bytesPerTexel) * (mipSize.height - 1) + mipSize.width;
+        for (uint32_t slice = textureSpec.copyOrigin.z; slice < maxArrayLayer; ++slice) {
+            // Pack the data in the specified copy region to have the same
+            // format as the expected texture data.
+            std::vector<RGBA8> expected(texelCountLastLayer);
+            PackTextureData(&data[dataOffset], copySize.width, copySize.height,
+                            dataSpec.bytesPerRow, expected.data(), copySize.width, bytesPerTexel);
+
+            EXPECT_TEXTURE_RGBA8_EQ(expected.data(), texture, textureSpec.copyOrigin.x,
+                                    textureSpec.copyOrigin.y, copySize.width, copySize.height,
+                                    textureSpec.level, slice)
+                << "Write to texture failed copying " << dataSpec.size << "-byte data with offset "
+                << dataSpec.offset << " and bytes per row " << dataSpec.bytesPerRow << " to [("
+                << textureSpec.copyOrigin.x << ", " << textureSpec.copyOrigin.y << "), ("
+                << textureSpec.copyOrigin.x + copySize.width << ", "
+                << textureSpec.copyOrigin.y + copySize.height << ")) region of "
+                << textureSpec.textureSize.width << " x " << textureSpec.textureSize.height
+                << " texture at mip level " << textureSpec.level << " layer " << slice << std::endl;
+
+            dataOffset += bytesPerImage;
+        }
+    }
+};
+
+// Test writing the whole texture for varying texture sizes.
+TEST_P(QueueWriteTextureTests, VaryingTextureSize) {
+    for (unsigned int w : {127, 128}) {
+        for (unsigned int h : {63, 64}) {
+            for (unsigned int d : {1, 3, 4}) {
+                TextureSpec textureSpec;
+                textureSpec.textureSize = {w, h, d};
+                textureSpec.copyOrigin = {0, 0, 0};
+                textureSpec.level = 0;
+
+                DoTest(textureSpec, MinimumDataSpec({w, h, d}), {w, h, d});
+            }
+        }
+    }
+}
+
+// Test writing a pixel with an offset.
+TEST_P(QueueWriteTextureTests, VaryingTextureOffset) {
+    constexpr uint32_t kWidth = 259;
+    constexpr uint32_t kHeight = 127;
+    DataSpec pixelData = MinimumDataSpec({1, 1, 1});
+
+    constexpr wgpu::Extent3D kCopySize = {1, 1, 1};
+    constexpr wgpu::Extent3D kTextureSize = {kWidth, kHeight, 1};
+    TextureSpec defaultTextureSpec;
+    defaultTextureSpec.textureSize = kTextureSize;
+    defaultTextureSpec.level = 0;
+
+    for (unsigned int w : {0u, kWidth / 7, kWidth / 3, kWidth - 1}) {
+        for (unsigned int h : {0u, kHeight / 7, kHeight / 3, kHeight - 1}) {
+            TextureSpec textureSpec = defaultTextureSpec;
+            textureSpec.copyOrigin = {w, h, 0};
+            DoTest(textureSpec, pixelData, kCopySize);
+        }
+    }
+}
+
+// Test writing a pixel with an offset to a texture array
+TEST_P(QueueWriteTextureTests, VaryingTextureArrayOffset) {
+    constexpr uint32_t kWidth = 259;
+    constexpr uint32_t kHeight = 127;
+    constexpr uint32_t kDepth = 62;
+    DataSpec pixelData = MinimumDataSpec({1, 1, 1});
+
+    constexpr wgpu::Extent3D kCopySize = {1, 1, 1};
+    constexpr wgpu::Extent3D kTextureSize = {kWidth, kHeight, kDepth};
+    TextureSpec defaultTextureSpec;
+    defaultTextureSpec.textureSize = kTextureSize;
+    defaultTextureSpec.level = 0;
+
+    for (unsigned int w : {0u, kWidth / 7, kWidth / 3, kWidth - 1}) {
+        for (unsigned int h : {0u, kHeight / 7, kHeight / 3, kHeight - 1}) {
+            for (unsigned int d : {0u, kDepth / 7, kDepth / 3, kDepth - 1}) {
+                TextureSpec textureSpec = defaultTextureSpec;
+                textureSpec.copyOrigin = {w, h, d};
+                DoTest(textureSpec, pixelData, kCopySize);
+            }
+        }
+    }
+}
+
+// Test writing with varying write sizes.
+TEST_P(QueueWriteTextureTests, VaryingWriteSize) {
+    constexpr uint32_t kWidth = 257;
+    constexpr uint32_t kHeight = 127;
+    for (unsigned int w : {13, 63, 128, 256}) {
+        for (unsigned int h : {16, 19, 32, 63}) {
+            TextureSpec textureSpec;
+            textureSpec.copyOrigin = {0, 0, 0};
+            textureSpec.level = 0;
+            textureSpec.textureSize = {kWidth, kHeight, 1};
+            DoTest(textureSpec, MinimumDataSpec({w, h, 1}), {w, h, 1});
+        }
+    }
+}
+
+// Test writing with varying write sizes to texture arrays.
+TEST_P(QueueWriteTextureTests, VaryingArrayWriteSize) {
+    constexpr uint32_t kWidth = 257;
+    constexpr uint32_t kHeight = 127;
+    constexpr uint32_t kDepth = 65;
+    for (unsigned int w : {13, 63, 128, 256}) {
+        for (unsigned int h : {16, 19, 32, 63}) {
+            for (unsigned int d : {3, 6}) {
+                TextureSpec textureSpec;
+                textureSpec.copyOrigin = {0, 0, 0};
+                textureSpec.level = 0;
+                textureSpec.textureSize = {kWidth, kHeight, kDepth};
+                DoTest(textureSpec, MinimumDataSpec({w, h, d}), {w, h, d});
+            }
+        }
+    }
+}
+
+// Test writing to varying mips
+TEST_P(QueueWriteTextureTests, TextureWriteToMip) {
+    constexpr uint32_t kWidth = 259;
+    constexpr uint32_t kHeight = 127;
+
+    TextureSpec defaultTextureSpec;
+    defaultTextureSpec.copyOrigin = {0, 0, 0};
+    defaultTextureSpec.textureSize = {kWidth, kHeight, 1};
+
+    for (unsigned int i = 1; i < 4; ++i) {
+        TextureSpec textureSpec = defaultTextureSpec;
+        textureSpec.level = i;
+        DoTest(textureSpec, MinimumDataSpec({kWidth >> i, kHeight >> i, 1}),
+               {kWidth >> i, kHeight >> i, 1});
+    }
+}
+
+// Test writing with different multiples of texel block size as data offset
+TEST_P(QueueWriteTextureTests, VaryingDataOffset) {
+    constexpr uint32_t kWidth = 259;
+    constexpr uint32_t kHeight = 127;
+
+    TextureSpec textureSpec;
+    textureSpec.copyOrigin = {0, 0, 0};
+    textureSpec.textureSize = {kWidth, kHeight, 1};
+    textureSpec.level = 0;
+
+    for (unsigned int i : {1, 2, 4, 17, 64, 128, 300}) {
+        DataSpec dataSpec = MinimumDataSpec({kWidth, kHeight, 1});
+        uint64_t offset = i * utils::GetTexelBlockSizeInBytes(kTextureFormat);
+        dataSpec.size += offset;
+        dataSpec.offset += offset;
+        DoTest(textureSpec, dataSpec, {kWidth, kHeight, 1});
+    }
+}
+
+// Test writing with rowsPerImage greater than needed.
+TEST_P(QueueWriteTextureTests, VaryingRowsPerImage) {
+    constexpr uint32_t kWidth = 65;
+    constexpr uint32_t kHeight = 31;
+    constexpr uint32_t kDepth = 17;
+
+    constexpr wgpu::Extent3D copySize = {kWidth - 1, kHeight - 1, kDepth - 1};
+
+    for (unsigned int r : {1, 2, 3, 64, 200}) {
+        TextureSpec textureSpec;
+        textureSpec.copyOrigin = {1, 1, 1};
+        textureSpec.textureSize = {kWidth, kHeight, kDepth};
+        textureSpec.level = 0;
+
+        DataSpec dataSpec = MinimumDataSpec(copySize, 0, copySize.height + r);
+        DoTest(textureSpec, dataSpec, copySize);
+    }
+}
+
+// Test with bytesPerRow greater than needed
+TEST_P(QueueWriteTextureTests, VaryingBytesPerRow) {
+    constexpr uint32_t kWidth = 257;
+    constexpr uint32_t kHeight = 129;
+
+    TextureSpec textureSpec;
+    textureSpec.textureSize = {kWidth, kHeight, 1};
+    textureSpec.copyOrigin = {1, 2, 0};
+    textureSpec.level = 0;
+
+    constexpr wgpu::Extent3D copyExtent = {17, 19, 1};
+
+    for (unsigned int b : {1, 2, 3, 4}) {
+        uint32_t bytesPerRow =
+            copyExtent.width * utils::GetTexelBlockSizeInBytes(kTextureFormat) + b;
+        DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, 0), copyExtent);
+    }
+}
+
+// Test with bytesPerRow greater than needed in a write to a texture array.
+TEST_P(QueueWriteTextureTests, VaryingArrayBytesPerRow) {
+    constexpr uint32_t kWidth = 257;
+    constexpr uint32_t kHeight = 129;
+    constexpr uint32_t kLayers = 65;
+
+    TextureSpec textureSpec;
+    textureSpec.textureSize = {kWidth, kHeight, kLayers};
+    textureSpec.copyOrigin = {1, 2, 3};
+    textureSpec.level = 0;
+
+    constexpr wgpu::Extent3D copyExtent = {17, 19, 21};
+
+    // Test with bytesPerRow divisible by blockWidth
+    for (unsigned int b : {1, 2, 3, 65, 300}) {
+        uint32_t bytesPerRow =
+            (copyExtent.width + b) * utils::GetTexelBlockSizeInBytes(kTextureFormat);
+        uint32_t rowsPerImage = 23;
+        DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, rowsPerImage), copyExtent);
+    }
+
+    // Test with bytesPerRow not divisible by blockWidth
+    for (unsigned int b : {1, 2, 3, 19, 301}) {
+        uint32_t bytesPerRow =
+            copyExtent.width * utils::GetTexelBlockSizeInBytes(kTextureFormat) + b;
+        uint32_t rowsPerImage = 23;
+        DoTest(textureSpec, MinimumDataSpec(copyExtent, bytesPerRow, rowsPerImage), copyExtent);
+    }
+}
+
+DAWN_INSTANTIATE_TEST(QueueWriteTextureTests, MetalBackend());
diff --git a/src/utils/WGPUHelpers.cpp b/src/utils/WGPUHelpers.cpp
index f728549..0a47b8d 100644
--- a/src/utils/WGPUHelpers.cpp
+++ b/src/utils/WGPUHelpers.cpp
@@ -269,9 +269,7 @@
                                               uint32_t rowsPerImage) {
         wgpu::BufferCopyView bufferCopyView = {};
         bufferCopyView.buffer = buffer;
-        bufferCopyView.layout.offset = offset;
-        bufferCopyView.layout.bytesPerRow = bytesPerRow;
-        bufferCopyView.layout.rowsPerImage = rowsPerImage;
+        bufferCopyView.layout = CreateTextureDataLayout(offset, bytesPerRow, rowsPerImage);
 
         return bufferCopyView;
     }
@@ -287,6 +285,17 @@
         return textureCopyView;
     }
 
+    wgpu::TextureDataLayout CreateTextureDataLayout(uint64_t offset,
+                                                    uint32_t bytesPerRow,
+                                                    uint32_t rowsPerImage) {
+        wgpu::TextureDataLayout textureDataLayout;
+        textureDataLayout.offset = offset;
+        textureDataLayout.bytesPerRow = bytesPerRow;
+        textureDataLayout.rowsPerImage = rowsPerImage;
+
+        return textureDataLayout;
+    }
+
     wgpu::SamplerDescriptor GetDefaultSamplerDescriptor() {
         wgpu::SamplerDescriptor desc = {};
 
@@ -391,12 +400,12 @@
     }
 
     // TODO(jiawei.shao@intel.com): support compressed texture formats
-    BufferTextureCopyLayout GetBufferTextureCopyLayoutForTexture2DAtLevel(
+    TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
         wgpu::TextureFormat format,
         wgpu::Extent3D textureSizeAtLevel0,
         uint32_t mipmapLevel,
         uint32_t rowsPerImage) {
-        BufferTextureCopyLayout layout;
+        TextureDataCopyLayout layout;
 
         layout.mipSize = {textureSizeAtLevel0.width >> mipmapLevel,
                           textureSizeAtLevel0.height >> mipmapLevel, textureSizeAtLevel0.depth};
diff --git a/src/utils/WGPUHelpers.h b/src/utils/WGPUHelpers.h
index 5c5f9f2..0c63c4d 100644
--- a/src/utils/WGPUHelpers.h
+++ b/src/utils/WGPUHelpers.h
@@ -55,6 +55,9 @@
     wgpu::TextureCopyView CreateTextureCopyView(wgpu::Texture texture,
                                                 uint32_t level,
                                                 wgpu::Origin3D origin);
+    wgpu::TextureDataLayout CreateTextureDataLayout(uint64_t offset,
+                                                    uint32_t bytesPerRow,
+                                                    uint32_t rowsPerImage);
 
     struct ComboRenderPassDescriptor : public wgpu::RenderPassDescriptor {
       public:
@@ -130,7 +133,7 @@
         const wgpu::BindGroupLayout& layout,
         std::initializer_list<BindingInitializationHelper> entriesInitializer);
 
-    struct BufferTextureCopyLayout {
+    struct TextureDataCopyLayout {
         uint64_t byteLength;
         uint64_t texelBlockCount;
         uint32_t bytesPerRow;
@@ -146,7 +149,7 @@
                                          uint32_t bytesPerRow,
                                          uint32_t rowsPerImage,
                                          uint32_t copyArrayLayerCount);
-    BufferTextureCopyLayout GetBufferTextureCopyLayoutForTexture2DAtLevel(
+    TextureDataCopyLayout GetTextureDataCopyLayoutForTexture2DAtLevel(
         wgpu::TextureFormat format,
         wgpu::Extent3D textureSizeAtLevel0,
         uint32_t mipmapLevel,