src/dawn_native/metal/CommandBufferMTL.mm - dawn - Git at Google

 // Copyright 2017 The Dawn Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "dawn_native/metal/CommandBufferMTL.h"

 #include "dawn_native/BindGroupTracker.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/Commands.h"
 #include "dawn_native/DynamicUploader.h"
 #include "dawn_native/ExternalTexture.h"
 #include "dawn_native/RenderBundle.h"
 #include "dawn_native/metal/BindGroupMTL.h"
 #include "dawn_native/metal/BufferMTL.h"
 #include "dawn_native/metal/ComputePipelineMTL.h"
 #include "dawn_native/metal/DeviceMTL.h"
 #include "dawn_native/metal/PipelineLayoutMTL.h"
 #include "dawn_native/metal/QuerySetMTL.h"
 #include "dawn_native/metal/RenderPipelineMTL.h"
 #include "dawn_native/metal/SamplerMTL.h"
 #include "dawn_native/metal/StagingBufferMTL.h"
 #include "dawn_native/metal/TextureMTL.h"
 #include "dawn_native/metal/UtilsMetal.h"

 #include <tint/tint.h>

 namespace dawn::native::metal {

     namespace {

         // Allows this file to use MTLStoreActionStoreAndMultismapleResolve because the logic is
         // first to compute what the "best" Metal render pass descriptor is, then fix it up if we
         // are not on macOS 10.12 (i.e. the EmulateStoreAndMSAAResolve toggle is on).
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunguarded-availability"
         constexpr MTLStoreAction kMTLStoreActionStoreAndMultisampleResolve =
             MTLStoreActionStoreAndMultisampleResolve;
 #pragma clang diagnostic pop

         MTLIndexType MTLIndexFormat(wgpu::IndexFormat format) {
             switch (format) {
                 case wgpu::IndexFormat::Uint16:
                     return MTLIndexTypeUInt16;
                 case wgpu::IndexFormat::Uint32:
                     return MTLIndexTypeUInt32;
                 case wgpu::IndexFormat::Undefined:
                     UNREACHABLE();
             }
         }

         NSRef<MTLRenderPassDescriptor> CreateMTLRenderPassDescriptor(
             BeginRenderPassCmd* renderPass) {
             // Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef
             NSRef<MTLRenderPassDescriptor> descriptorRef =
                 [MTLRenderPassDescriptor renderPassDescriptor];
             MTLRenderPassDescriptor* descriptor = descriptorRef.Get();

             for (ColorAttachmentIndex attachment :
                  IterateBitSet(renderPass->attachmentState->GetColorAttachmentsMask())) {
                 uint8_t i = static_cast<uint8_t>(attachment);
                 auto& attachmentInfo = renderPass->colorAttachments[attachment];

                 switch (attachmentInfo.loadOp) {
                     case wgpu::LoadOp::Clear:
                         descriptor.colorAttachments[i].loadAction = MTLLoadActionClear;
                         descriptor.colorAttachments[i].clearColor = MTLClearColorMake(
                             attachmentInfo.clearColor.r, attachmentInfo.clearColor.g,
                             attachmentInfo.clearColor.b, attachmentInfo.clearColor.a);
                         break;

                     case wgpu::LoadOp::Load:
                         descriptor.colorAttachments[i].loadAction = MTLLoadActionLoad;
                         break;
                 }

                 descriptor.colorAttachments[i].texture =
                     ToBackend(attachmentInfo.view->GetTexture())->GetMTLTexture();
                 descriptor.colorAttachments[i].level = attachmentInfo.view->GetBaseMipLevel();
                 descriptor.colorAttachments[i].slice = attachmentInfo.view->GetBaseArrayLayer();

                 bool hasResolveTarget = attachmentInfo.resolveTarget != nullptr;
                 if (hasResolveTarget) {
                     descriptor.colorAttachments[i].resolveTexture =
                         ToBackend(attachmentInfo.resolveTarget->GetTexture())->GetMTLTexture();
                     descriptor.colorAttachments[i].resolveLevel =
                         attachmentInfo.resolveTarget->GetBaseMipLevel();
                     descriptor.colorAttachments[i].resolveSlice =
                         attachmentInfo.resolveTarget->GetBaseArrayLayer();

                     switch (attachmentInfo.storeOp) {
                         case wgpu::StoreOp::Store:
                             descriptor.colorAttachments[i].storeAction =
                                 kMTLStoreActionStoreAndMultisampleResolve;
                             break;
                         case wgpu::StoreOp::Discard:
                             descriptor.colorAttachments[i].storeAction =
                                 MTLStoreActionMultisampleResolve;
                             break;
                     }
                 } else {
                     switch (attachmentInfo.storeOp) {
                         case wgpu::StoreOp::Store:
                             descriptor.colorAttachments[i].storeAction = MTLStoreActionStore;
                             break;
                         case wgpu::StoreOp::Discard:
                             descriptor.colorAttachments[i].storeAction = MTLStoreActionDontCare;
                             break;
                     }
                 }
             }

             if (renderPass->attachmentState->HasDepthStencilAttachment()) {
                 auto& attachmentInfo = renderPass->depthStencilAttachment;

                 id<MTLTexture> texture =
                     ToBackend(attachmentInfo.view->GetTexture())->GetMTLTexture();
                 const Format& format = attachmentInfo.view->GetTexture()->GetFormat();

                 if (format.HasDepth()) {
                     descriptor.depthAttachment.texture = texture;
                     descriptor.depthAttachment.level = attachmentInfo.view->GetBaseMipLevel();
                     descriptor.depthAttachment.slice = attachmentInfo.view->GetBaseArrayLayer();

                     switch (attachmentInfo.depthStoreOp) {
                         case wgpu::StoreOp::Store:
                             descriptor.depthAttachment.storeAction = MTLStoreActionStore;
                             break;

                         case wgpu::StoreOp::Discard:
                             descriptor.depthAttachment.storeAction = MTLStoreActionDontCare;
                             break;
                     }

                     switch (attachmentInfo.depthLoadOp) {
                         case wgpu::LoadOp::Clear:
                             descriptor.depthAttachment.loadAction = MTLLoadActionClear;
                             descriptor.depthAttachment.clearDepth = attachmentInfo.clearDepth;
                             break;

                         case wgpu::LoadOp::Load:
                             descriptor.depthAttachment.loadAction = MTLLoadActionLoad;
                             break;
                     }
                 }

                 if (format.HasStencil()) {
                     descriptor.stencilAttachment.texture = texture;
                     descriptor.stencilAttachment.level = attachmentInfo.view->GetBaseMipLevel();
                     descriptor.stencilAttachment.slice = attachmentInfo.view->GetBaseArrayLayer();

                     switch (attachmentInfo.stencilStoreOp) {
                         case wgpu::StoreOp::Store:
                             descriptor.stencilAttachment.storeAction = MTLStoreActionStore;
                             break;

                         case wgpu::StoreOp::Discard:
                             descriptor.stencilAttachment.storeAction = MTLStoreActionDontCare;
                             break;
                     }

                     switch (attachmentInfo.stencilLoadOp) {
                         case wgpu::LoadOp::Clear:
                             descriptor.stencilAttachment.loadAction = MTLLoadActionClear;
                             descriptor.stencilAttachment.clearStencil = attachmentInfo.clearStencil;
                             break;

                         case wgpu::LoadOp::Load:
                             descriptor.stencilAttachment.loadAction = MTLLoadActionLoad;
                             break;
                     }
                 }
             }

             if (renderPass->occlusionQuerySet.Get() != nullptr) {
                 descriptor.visibilityResultBuffer =
                     ToBackend(renderPass->occlusionQuerySet.Get())->GetVisibilityBuffer();
             }

             return descriptorRef;
         }

         // Helper function for Toggle EmulateStoreAndMSAAResolve
         void ResolveInAnotherRenderPass(
             CommandRecordingContext* commandContext,
             const MTLRenderPassDescriptor* mtlRenderPass,
             const std::array<id<MTLTexture>, kMaxColorAttachments>& resolveTextures) {
             // Note that this creates a descriptor that's autoreleased so we don't use AcquireNSRef
             NSRef<MTLRenderPassDescriptor> mtlRenderPassForResolveRef =
                 [MTLRenderPassDescriptor renderPassDescriptor];
             MTLRenderPassDescriptor* mtlRenderPassForResolve = mtlRenderPassForResolveRef.Get();

             for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
                 if (resolveTextures[i] == nullptr) {
                     continue;
                 }

                 mtlRenderPassForResolve.colorAttachments[i].texture =
                     mtlRenderPass.colorAttachments[i].texture;
                 mtlRenderPassForResolve.colorAttachments[i].loadAction = MTLLoadActionLoad;
                 mtlRenderPassForResolve.colorAttachments[i].storeAction =
                     MTLStoreActionMultisampleResolve;
                 mtlRenderPassForResolve.colorAttachments[i].resolveTexture = resolveTextures[i];
                 mtlRenderPassForResolve.colorAttachments[i].resolveLevel =
                     mtlRenderPass.colorAttachments[i].resolveLevel;
                 mtlRenderPassForResolve.colorAttachments[i].resolveSlice =
                     mtlRenderPass.colorAttachments[i].resolveSlice;
             }

             commandContext->BeginRender(mtlRenderPassForResolve);
             commandContext->EndRender();
         }

         // Helper functions for Toggle AlwaysResolveIntoZeroLevelAndLayer
         ResultOrError<NSPRef<id<MTLTexture>>> CreateResolveTextureForWorkaround(
             Device* device,
             MTLPixelFormat mtlFormat,
             uint32_t width,
             uint32_t height) {
             NSRef<MTLTextureDescriptor> mtlDescRef = AcquireNSRef([MTLTextureDescriptor new]);
             MTLTextureDescriptor* mtlDesc = mtlDescRef.Get();

             mtlDesc.textureType = MTLTextureType2D;
             mtlDesc.usage = MTLTextureUsageRenderTarget;
             mtlDesc.pixelFormat = mtlFormat;
             mtlDesc.width = width;
             mtlDesc.height = height;
             mtlDesc.depth = 1;
             mtlDesc.mipmapLevelCount = 1;
             mtlDesc.arrayLength = 1;
             mtlDesc.storageMode = MTLStorageModePrivate;
             mtlDesc.sampleCount = 1;

             id<MTLTexture> texture = [device->GetMTLDevice() newTextureWithDescriptor:mtlDesc];
             if (texture == nil) {
                 return DAWN_OUT_OF_MEMORY_ERROR("Allocation of temporary texture failed.");
             }

             return AcquireNSPRef(texture);
         }

         void CopyIntoTrueResolveTarget(CommandRecordingContext* commandContext,
                                        id<MTLTexture> mtlTrueResolveTexture,
                                        uint32_t trueResolveLevel,
                                        uint32_t trueResolveSlice,
                                        id<MTLTexture> temporaryResolveTexture,
                                        uint32_t width,
                                        uint32_t height) {
             [commandContext->EnsureBlit() copyFromTexture:temporaryResolveTexture
                                               sourceSlice:0
                                               sourceLevel:0
                                              sourceOrigin:MTLOriginMake(0, 0, 0)
                                                sourceSize:MTLSizeMake(width, height, 1)
                                                 toTexture:mtlTrueResolveTexture
                                          destinationSlice:trueResolveSlice
                                          destinationLevel:trueResolveLevel
                                         destinationOrigin:MTLOriginMake(0, 0, 0)];
         }

         // Metal uses a physical addressing mode which means buffers in the shading language are
         // just pointers to the virtual address of their start. This means there is no way to know
         // the length of a buffer to compute the length() of unsized arrays at the end of storage
         // buffers. SPIRV-Cross implements the length() of unsized arrays by requiring an extra
         // buffer that contains the length of other buffers. This structure that keeps track of the
         // length of storage buffers and can apply them to the reserved "buffer length buffer" when
         // needed for a draw or a dispatch.
         struct StorageBufferLengthTracker {
             wgpu::ShaderStage dirtyStages = wgpu::ShaderStage::None;

             // The lengths of buffers are stored as 32bit integers because that is the width the
             // MSL code generated by SPIRV-Cross expects.
             // UBOs require we align the max buffer count to 4 elements (16 bytes).
             static constexpr size_t MaxBufferCount = ((kGenericMetalBufferSlots + 3) / 4) * 4;
             PerStage<std::array<uint32_t, MaxBufferCount>> data;

             void Apply(id<MTLRenderCommandEncoder> render,
                        RenderPipeline* pipeline,
                        bool enableVertexPulling) {
                 wgpu::ShaderStage stagesToApply =
                     dirtyStages & pipeline->GetStagesRequiringStorageBufferLength();

                 if (stagesToApply == wgpu::ShaderStage::None) {
                     return;
                 }

                 if (stagesToApply & wgpu::ShaderStage::Vertex) {
                     uint32_t bufferCount = ToBackend(pipeline->GetLayout())
                                                ->GetBufferBindingCount(SingleShaderStage::Vertex);

                     if (enableVertexPulling) {
                         bufferCount += pipeline->GetVertexBufferCount();
                     }

                     bufferCount = Align(bufferCount, 4);
                     ASSERT(bufferCount <= data[SingleShaderStage::Vertex].size());

                     [render setVertexBytes:data[SingleShaderStage::Vertex].data()
                                     length:sizeof(uint32_t) * bufferCount
                                    atIndex:kBufferLengthBufferSlot];
                 }

                 if (stagesToApply & wgpu::ShaderStage::Fragment) {
                     uint32_t bufferCount = ToBackend(pipeline->GetLayout())
                                                ->GetBufferBindingCount(SingleShaderStage::Fragment);
                     bufferCount = Align(bufferCount, 4);
                     ASSERT(bufferCount <= data[SingleShaderStage::Fragment].size());

                     [render setFragmentBytes:data[SingleShaderStage::Fragment].data()
                                       length:sizeof(uint32_t) * bufferCount
                                      atIndex:kBufferLengthBufferSlot];
                 }

                 // Only mark clean stages that were actually applied.
                 dirtyStages ^= stagesToApply;
             }

             void Apply(id<MTLComputeCommandEncoder> compute, ComputePipeline* pipeline) {
                 if (!(dirtyStages & wgpu::ShaderStage::Compute)) {
                     return;
                 }

                 if (!pipeline->RequiresStorageBufferLength()) {
                     return;
                 }

                 uint32_t bufferCount = ToBackend(pipeline->GetLayout())
                                            ->GetBufferBindingCount(SingleShaderStage::Compute);
                 bufferCount = Align(bufferCount, 4);
                 ASSERT(bufferCount <= data[SingleShaderStage::Compute].size());

                 [compute setBytes:data[SingleShaderStage::Compute].data()
                            length:sizeof(uint32_t) * bufferCount
                           atIndex:kBufferLengthBufferSlot];

                 dirtyStages ^= wgpu::ShaderStage::Compute;
             }
         };

         // Keeps track of the dirty bind groups so they can be lazily applied when we know the
         // pipeline state.
         // Bind groups may be inherited because bind groups are packed in the buffer /
         // texture tables in contiguous order.
         class BindGroupTracker : public BindGroupTrackerBase<true, uint64_t> {
           public:
             explicit BindGroupTracker(StorageBufferLengthTracker* lengthTracker)
                 : BindGroupTrackerBase(), mLengthTracker(lengthTracker) {
             }

             template <typename Encoder>
             void Apply(Encoder encoder) {
                 BeforeApply();
                 for (BindGroupIndex index :
                      IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
                     ApplyBindGroup(encoder, index, ToBackend(mBindGroups[index]),
                                    mDynamicOffsetCounts[index], mDynamicOffsets[index].data(),
                                    ToBackend(mPipelineLayout));
                 }
                 AfterApply();
             }

           private:
             // Handles a call to SetBindGroup, directing the commands to the correct encoder.
             // There is a single function that takes both encoders to factor code. Other approaches
             // like templates wouldn't work because the name of methods are different between the
             // two encoder types.
             void ApplyBindGroupImpl(id<MTLRenderCommandEncoder> render,
                                     id<MTLComputeCommandEncoder> compute,
                                     BindGroupIndex index,
                                     BindGroup* group,
                                     uint32_t dynamicOffsetCount,
                                     uint64_t* dynamicOffsets,
                                     PipelineLayout* pipelineLayout) {
                 uint32_t currentDynamicBufferIndex = 0;

                 // TODO(crbug.com/dawn/854): Maintain buffers and offsets arrays in BindGroup
                 // so that we only have to do one setVertexBuffers and one setFragmentBuffers
                 // call here.
                 for (BindingIndex bindingIndex{0};
                      bindingIndex < group->GetLayout()->GetBindingCount(); ++bindingIndex) {
                     const BindingInfo& bindingInfo =
                         group->GetLayout()->GetBindingInfo(bindingIndex);

                     bool hasVertStage =
                         bindingInfo.visibility & wgpu::ShaderStage::Vertex && render != nullptr;
                     bool hasFragStage =
                         bindingInfo.visibility & wgpu::ShaderStage::Fragment && render != nullptr;
                     bool hasComputeStage =
                         bindingInfo.visibility & wgpu::ShaderStage::Compute && compute != nullptr;

                     uint32_t vertIndex = 0;
                     uint32_t fragIndex = 0;
                     uint32_t computeIndex = 0;

                     if (hasVertStage) {
                         vertIndex = pipelineLayout->GetBindingIndexInfo(
                             SingleShaderStage::Vertex)[index][bindingIndex];
                     }
                     if (hasFragStage) {
                         fragIndex = pipelineLayout->GetBindingIndexInfo(
                             SingleShaderStage::Fragment)[index][bindingIndex];
                     }
                     if (hasComputeStage) {
                         computeIndex = pipelineLayout->GetBindingIndexInfo(
                             SingleShaderStage::Compute)[index][bindingIndex];
                     }

                     switch (bindingInfo.bindingType) {
                         case BindingInfoType::Buffer: {
                             const BufferBinding& binding =
                                 group->GetBindingAsBufferBinding(bindingIndex);
                             const id<MTLBuffer> buffer = ToBackend(binding.buffer)->GetMTLBuffer();
                             NSUInteger offset = binding.offset;

                             // TODO(crbug.com/dawn/854): Record bound buffer status to use
                             // setBufferOffset to achieve better performance.
                             if (bindingInfo.buffer.hasDynamicOffset) {
                                 offset += dynamicOffsets[currentDynamicBufferIndex];
                                 currentDynamicBufferIndex++;
                             }

                             if (hasVertStage) {
                                 mLengthTracker->data[SingleShaderStage::Vertex][vertIndex] =
                                     binding.size;
                                 mLengthTracker->dirtyStages |= wgpu::ShaderStage::Vertex;
                                 [render setVertexBuffers:&buffer
                                                  offsets:&offset
                                                withRange:NSMakeRange(vertIndex, 1)];
                             }
                             if (hasFragStage) {
                                 mLengthTracker->data[SingleShaderStage::Fragment][fragIndex] =
                                     binding.size;
                                 mLengthTracker->dirtyStages |= wgpu::ShaderStage::Fragment;
                                 [render setFragmentBuffers:&buffer
                                                    offsets:&offset
                                                  withRange:NSMakeRange(fragIndex, 1)];
                             }
                             if (hasComputeStage) {
                                 mLengthTracker->data[SingleShaderStage::Compute][computeIndex] =
                                     binding.size;
                                 mLengthTracker->dirtyStages |= wgpu::ShaderStage::Compute;
                                 [compute setBuffers:&buffer
                                             offsets:&offset
                                           withRange:NSMakeRange(computeIndex, 1)];
                             }

                             break;
                         }

                         case BindingInfoType::Sampler: {
                             auto sampler = ToBackend(group->GetBindingAsSampler(bindingIndex));
                             if (hasVertStage) {
                                 [render setVertexSamplerState:sampler->GetMTLSamplerState()
                                                       atIndex:vertIndex];
                             }
                             if (hasFragStage) {
                                 [render setFragmentSamplerState:sampler->GetMTLSamplerState()
                                                         atIndex:fragIndex];
                             }
                             if (hasComputeStage) {
                                 [compute setSamplerState:sampler->GetMTLSamplerState()
                                                  atIndex:computeIndex];
                             }
                             break;
                         }

                         case BindingInfoType::Texture:
                         case BindingInfoType::StorageTexture: {
                             auto textureView =
                                 ToBackend(group->GetBindingAsTextureView(bindingIndex));
                             if (hasVertStage) {
                                 [render setVertexTexture:textureView->GetMTLTexture()
                                                  atIndex:vertIndex];
                             }
                             if (hasFragStage) {
                                 [render setFragmentTexture:textureView->GetMTLTexture()
                                                    atIndex:fragIndex];
                             }
                             if (hasComputeStage) {
                                 [compute setTexture:textureView->GetMTLTexture()
                                             atIndex:computeIndex];
                             }
                             break;
                         }

                         case BindingInfoType::ExternalTexture: {
                             const std::array<Ref<TextureViewBase>, kMaxPlanesPerFormat>& views =
                                 group->GetBindingAsExternalTexture(bindingIndex)->GetTextureViews();

                             // Only single-plane formats are supported right now, so assert only one
                             // view exists.
                             ASSERT(views[1].Get() == nullptr);
                             ASSERT(views[2].Get() == nullptr);

                             TextureView* textureView = ToBackend(views[0].Get());

                             if (hasVertStage) {
                                 [render setVertexTexture:textureView->GetMTLTexture()
                                                  atIndex:vertIndex];
                             }
                             if (hasFragStage) {
                                 [render setFragmentTexture:textureView->GetMTLTexture()
                                                    atIndex:fragIndex];
                             }
                             if (hasComputeStage) {
                                 [compute setTexture:textureView->GetMTLTexture()
                                             atIndex:computeIndex];
                             }
                             break;
                         }
                     }
                 }
             }

             template <typename... Args>
             void ApplyBindGroup(id<MTLRenderCommandEncoder> encoder, Args&&... args) {
                 ApplyBindGroupImpl(encoder, nullptr, std::forward<Args&&>(args)...);
             }

             template <typename... Args>
             void ApplyBindGroup(id<MTLComputeCommandEncoder> encoder, Args&&... args) {
                 ApplyBindGroupImpl(nullptr, encoder, std::forward<Args&&>(args)...);
             }

             StorageBufferLengthTracker* mLengthTracker;
         };

         // Keeps track of the dirty vertex buffer values so they can be lazily applied when we know
         // all the relevant state.
         class VertexBufferTracker {
           public:
             explicit VertexBufferTracker(StorageBufferLengthTracker* lengthTracker)
                 : mLengthTracker(lengthTracker) {
             }

             void OnSetVertexBuffer(VertexBufferSlot slot, Buffer* buffer, uint64_t offset) {
                 mVertexBuffers[slot] = buffer->GetMTLBuffer();
                 mVertexBufferOffsets[slot] = offset;

                 ASSERT(buffer->GetSize() < std::numeric_limits<uint32_t>::max());
                 mVertexBufferBindingSizes[slot] =
                     static_cast<uint32_t>(buffer->GetAllocatedSize() - offset);
                 mDirtyVertexBuffers.set(slot);
             }

             void OnSetPipeline(RenderPipeline* lastPipeline, RenderPipeline* pipeline) {
                 // When a new pipeline is bound we must set all the vertex buffers again because
                 // they might have been offset by the pipeline layout, and they might be packed
                 // differently from the previous pipeline.
                 mDirtyVertexBuffers |= pipeline->GetVertexBufferSlotsUsed();
             }

             void Apply(id<MTLRenderCommandEncoder> encoder,
                        RenderPipeline* pipeline,
                        bool enableVertexPulling) {
                 const auto& vertexBuffersToApply =
                     mDirtyVertexBuffers & pipeline->GetVertexBufferSlotsUsed();

                 for (VertexBufferSlot slot : IterateBitSet(vertexBuffersToApply)) {
                     uint32_t metalIndex = pipeline->GetMtlVertexBufferIndex(slot);

                     if (enableVertexPulling) {
                         // Insert lengths for vertex buffers bound as storage buffers
                         mLengthTracker->data[SingleShaderStage::Vertex][metalIndex] =
                             mVertexBufferBindingSizes[slot];
                         mLengthTracker->dirtyStages |= wgpu::ShaderStage::Vertex;
                     }

                     [encoder setVertexBuffers:&mVertexBuffers[slot]
                                       offsets:&mVertexBufferOffsets[slot]
                                     withRange:NSMakeRange(metalIndex, 1)];
                 }

                 mDirtyVertexBuffers.reset();
             }

           private:
             // All the indices in these arrays are Dawn vertex buffer indices
             ityp::bitset<VertexBufferSlot, kMaxVertexBuffers> mDirtyVertexBuffers;
             ityp::array<VertexBufferSlot, id<MTLBuffer>, kMaxVertexBuffers> mVertexBuffers;
             ityp::array<VertexBufferSlot, NSUInteger, kMaxVertexBuffers> mVertexBufferOffsets;
             ityp::array<VertexBufferSlot, uint32_t, kMaxVertexBuffers> mVertexBufferBindingSizes;

             StorageBufferLengthTracker* mLengthTracker;
         };

     }  // anonymous namespace

     void RecordCopyBufferToTexture(CommandRecordingContext* commandContext,
                                    id<MTLBuffer> mtlBuffer,
                                    uint64_t bufferSize,
                                    uint64_t offset,
                                    uint32_t bytesPerRow,
                                    uint32_t rowsPerImage,
                                    Texture* texture,
                                    uint32_t mipLevel,
                                    const Origin3D& origin,
                                    Aspect aspect,
                                    const Extent3D& copySize) {
         TextureBufferCopySplit splitCopies =
             ComputeTextureBufferCopySplit(texture, mipLevel, origin, copySize, bufferSize, offset,
                                           bytesPerRow, rowsPerImage, aspect);

         MTLBlitOption blitOption = ComputeMTLBlitOption(texture->GetFormat(), aspect);

         for (const auto& copyInfo : splitCopies) {
             uint64_t bufferOffset = copyInfo.bufferOffset;
             switch (texture->GetDimension()) {
                 case wgpu::TextureDimension::e2D: {
                     const MTLOrigin textureOrigin =
                         MTLOriginMake(copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
                     const MTLSize copyExtent =
                         MTLSizeMake(copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);

                     for (uint32_t z = copyInfo.textureOrigin.z;
                          z < copyInfo.textureOrigin.z + copyInfo.copyExtent.depthOrArrayLayers;
                          ++z) {
                         [commandContext->EnsureBlit() copyFromBuffer:mtlBuffer
                                                         sourceOffset:bufferOffset
                                                    sourceBytesPerRow:copyInfo.bytesPerRow
                                                  sourceBytesPerImage:copyInfo.bytesPerImage
                                                           sourceSize:copyExtent
                                                            toTexture:texture->GetMTLTexture()
                                                     destinationSlice:z
                                                     destinationLevel:mipLevel
                                                    destinationOrigin:textureOrigin
                                                              options:blitOption];
                         bufferOffset += copyInfo.bytesPerImage;
                     }
                     break;
                 }
                 case wgpu::TextureDimension::e3D: {
                     [commandContext->EnsureBlit()
                              copyFromBuffer:mtlBuffer
                                sourceOffset:bufferOffset
                           sourceBytesPerRow:copyInfo.bytesPerRow
                         sourceBytesPerImage:copyInfo.bytesPerImage
                                  sourceSize:MTLSizeMake(copyInfo.copyExtent.width,
                                                         copyInfo.copyExtent.height,
                                                         copyInfo.copyExtent.depthOrArrayLayers)
                                   toTexture:texture->GetMTLTexture()
                            destinationSlice:0
                            destinationLevel:mipLevel
                           destinationOrigin:MTLOriginMake(copyInfo.textureOrigin.x,
                                                           copyInfo.textureOrigin.y,
                                                           copyInfo.textureOrigin.z)
                                     options:blitOption];
                     break;
                 }
                 case wgpu::TextureDimension::e1D:
                     UNREACHABLE();
             }
         }
     }

     // static
     Ref<CommandBuffer> CommandBuffer::Create(CommandEncoder* encoder,
                                              const CommandBufferDescriptor* descriptor) {
         return AcquireRef(new CommandBuffer(encoder, descriptor));
     }

     MaybeError CommandBuffer::FillCommands(CommandRecordingContext* commandContext) {
         size_t nextComputePassNumber = 0;
         size_t nextRenderPassNumber = 0;

         auto LazyClearSyncScope = [](const SyncScopeResourceUsage& scope,
                                      CommandRecordingContext* commandContext) {
             for (size_t i = 0; i < scope.textures.size(); ++i) {
                 Texture* texture = ToBackend(scope.textures[i]);

                 // Clear subresources that are not render attachments. Render attachments will be
                 // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
                 // subresource has not been initialized before the render pass.
                 scope.textureUsages[i].Iterate(
                     [&](const SubresourceRange& range, wgpu::TextureUsage usage) {
                         if (usage & ~wgpu::TextureUsage::RenderAttachment) {
                             texture->EnsureSubresourceContentInitialized(commandContext, range);
                         }
                     });
             }
             for (BufferBase* bufferBase : scope.buffers) {
                 ToBackend(bufferBase)->EnsureDataInitialized(commandContext);
             }
         };

         Command type;
         while (mCommands.NextCommandId(&type)) {
             switch (type) {
                 case Command::BeginComputePass: {
                     mCommands.NextCommand<BeginComputePassCmd>();

                     for (const SyncScopeResourceUsage& scope :
                          GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) {
                         LazyClearSyncScope(scope, commandContext);
                     }
                     commandContext->EndBlit();

                     DAWN_TRY(EncodeComputePass(commandContext));

                     nextComputePassNumber++;
                     break;
                 }

                 case Command::BeginRenderPass: {
                     BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();

                     LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber],
                                        commandContext);
                     commandContext->EndBlit();

                     LazyClearRenderPassAttachments(cmd);
                     NSRef<MTLRenderPassDescriptor> descriptor = CreateMTLRenderPassDescriptor(cmd);
                     DAWN_TRY(EncodeRenderPass(commandContext, descriptor.Get(), cmd->width,
                                               cmd->height));

                     nextRenderPassNumber++;
                     break;
                 }

                 case Command::CopyBufferToBuffer: {
                     CopyBufferToBufferCmd* copy = mCommands.NextCommand<CopyBufferToBufferCmd>();
                     if (copy->size == 0) {
                         // Skip no-op copies.
                         break;
                     }

                     ToBackend(copy->source)->EnsureDataInitialized(commandContext);
                     ToBackend(copy->destination)
                         ->EnsureDataInitializedAsDestination(commandContext,
                                                              copy->destinationOffset, copy->size);

                     [commandContext->EnsureBlit()
                            copyFromBuffer:ToBackend(copy->source)->GetMTLBuffer()
                              sourceOffset:copy->sourceOffset
                                  toBuffer:ToBackend(copy->destination)->GetMTLBuffer()
                         destinationOffset:copy->destinationOffset
                                      size:copy->size];
                     break;
                 }

                 case Command::CopyBufferToTexture: {
                     CopyBufferToTextureCmd* copy = mCommands.NextCommand<CopyBufferToTextureCmd>();
                     if (copy->copySize.width == 0 || copy->copySize.height == 0 ||
                         copy->copySize.depthOrArrayLayers == 0) {
                         // Skip no-op copies.
                         continue;
                     }
                     auto& src = copy->source;
                     auto& dst = copy->destination;
                     auto& copySize = copy->copySize;
                     Buffer* buffer = ToBackend(src.buffer.Get());
                     Texture* texture = ToBackend(dst.texture.Get());

                     buffer->EnsureDataInitialized(commandContext);
                     EnsureDestinationTextureInitialized(commandContext, texture, dst, copySize);

                     RecordCopyBufferToTexture(commandContext, buffer->GetMTLBuffer(),
                                               buffer->GetSize(), src.offset, src.bytesPerRow,
                                               src.rowsPerImage, texture, dst.mipLevel, dst.origin,
                                               dst.aspect, copySize);
                     break;
                 }

                 case Command::CopyTextureToBuffer: {
                     CopyTextureToBufferCmd* copy = mCommands.NextCommand<CopyTextureToBufferCmd>();
                     if (copy->copySize.width == 0 || copy->copySize.height == 0 ||
                         copy->copySize.depthOrArrayLayers == 0) {
                         // Skip no-op copies.
                         continue;
                     }
                     auto& src = copy->source;
                     auto& dst = copy->destination;
                     auto& copySize = copy->copySize;
                     Texture* texture = ToBackend(src.texture.Get());
                     Buffer* buffer = ToBackend(dst.buffer.Get());

                     buffer->EnsureDataInitializedAsDestination(commandContext, copy);

                     texture->EnsureSubresourceContentInitialized(
                         commandContext, GetSubresourcesAffectedByCopy(src, copySize));

                     TextureBufferCopySplit splitCopies = ComputeTextureBufferCopySplit(
                         texture, src.mipLevel, src.origin, copySize, buffer->GetSize(), dst.offset,
                         dst.bytesPerRow, dst.rowsPerImage, src.aspect);

                     for (const auto& copyInfo : splitCopies) {
                         MTLBlitOption blitOption =
                             ComputeMTLBlitOption(texture->GetFormat(), src.aspect);
                         uint64_t bufferOffset = copyInfo.bufferOffset;

                         switch (texture->GetDimension()) {
                             case wgpu::TextureDimension::e2D: {
                                 const MTLOrigin textureOrigin = MTLOriginMake(
                                     copyInfo.textureOrigin.x, copyInfo.textureOrigin.y, 0);
                                 const MTLSize copyExtent = MTLSizeMake(
                                     copyInfo.copyExtent.width, copyInfo.copyExtent.height, 1);

                                 for (uint32_t z = copyInfo.textureOrigin.z;
                                      z < copyInfo.textureOrigin.z +
                                              copyInfo.copyExtent.depthOrArrayLayers;
                                      ++z) {
                                     [commandContext->EnsureBlit()
                                                  copyFromTexture:texture->GetMTLTexture()
                                                      sourceSlice:z
                                                      sourceLevel:src.mipLevel
                                                     sourceOrigin:textureOrigin
                                                       sourceSize:copyExtent
                                                         toBuffer:buffer->GetMTLBuffer()
                                                destinationOffset:bufferOffset
                                           destinationBytesPerRow:copyInfo.bytesPerRow
                                         destinationBytesPerImage:copyInfo.bytesPerImage
                                                          options:blitOption];
                                     bufferOffset += copyInfo.bytesPerImage;
                                 }
                                 break;
                             }
                             case wgpu::TextureDimension::e3D: {
                                 [commandContext->EnsureBlit()
                                              copyFromTexture:texture->GetMTLTexture()
                                                  sourceSlice:0
                                                  sourceLevel:src.mipLevel
                                                 sourceOrigin:MTLOriginMake(copyInfo.textureOrigin.x,
                                                                            copyInfo.textureOrigin.y,
                                                                            copyInfo.textureOrigin.z)
                                                   sourceSize:MTLSizeMake(copyInfo.copyExtent.width,
                                                                          copyInfo.copyExtent.height,
                                                                          copyInfo.copyExtent
                                                                              .depthOrArrayLayers)
                                                     toBuffer:buffer->GetMTLBuffer()
                                            destinationOffset:bufferOffset
                                       destinationBytesPerRow:copyInfo.bytesPerRow
                                     destinationBytesPerImage:copyInfo.bytesPerImage
                                                      options:blitOption];
                                 break;
                             }
                             case wgpu::TextureDimension::e1D:
                                 UNREACHABLE();
                         }
                     }
                     break;
                 }

                 case Command::CopyTextureToTexture: {
                     CopyTextureToTextureCmd* copy =
                         mCommands.NextCommand<CopyTextureToTextureCmd>();
                     if (copy->copySize.width == 0 || copy->copySize.height == 0 ||
                         copy->copySize.depthOrArrayLayers == 0) {
                         // Skip no-op copies.
                         continue;
                     }
                     Texture* srcTexture = ToBackend(copy->source.texture.Get());
                     Texture* dstTexture = ToBackend(copy->destination.texture.Get());

                     srcTexture->EnsureSubresourceContentInitialized(
                         commandContext,
                         GetSubresourcesAffectedByCopy(copy->source, copy->copySize));
                     EnsureDestinationTextureInitialized(commandContext, dstTexture,
                                                         copy->destination, copy->copySize);

                     // TODO(crbug.com/dawn/814): support copies with 1D textures.
                     ASSERT(srcTexture->GetDimension() != wgpu::TextureDimension::e1D &&
                            dstTexture->GetDimension() != wgpu::TextureDimension::e1D);

                     const MTLSize sizeOneSlice =
                         MTLSizeMake(copy->copySize.width, copy->copySize.height, 1);

                     uint32_t sourceLayer = 0;
                     uint32_t sourceOriginZ = 0;

                     uint32_t destinationLayer = 0;
                     uint32_t destinationOriginZ = 0;

                     uint32_t* sourceZPtr;
                     if (srcTexture->GetDimension() == wgpu::TextureDimension::e2D) {
                         sourceZPtr = &sourceLayer;
                     } else {
                         sourceZPtr = &sourceOriginZ;
                     }

                     uint32_t* destinationZPtr;
                     if (dstTexture->GetDimension() == wgpu::TextureDimension::e2D) {
                         destinationZPtr = &destinationLayer;
                     } else {
                         destinationZPtr = &destinationOriginZ;
                     }

                     // TODO(crbug.com/dawn/782): Do a single T2T copy if both are 3D.
                     for (uint32_t z = 0; z < copy->copySize.depthOrArrayLayers; ++z) {
                         *sourceZPtr = copy->source.origin.z + z;
                         *destinationZPtr = copy->destination.origin.z + z;

                         // Hold the ref until out of scope
                         NSPRef<id<MTLTexture>> dstTextureView =
                             dstTexture->CreateFormatView(srcTexture->GetFormat().format);

                         [commandContext->EnsureBlit()
                               copyFromTexture:srcTexture->GetMTLTexture()
                                   sourceSlice:sourceLayer
                                   sourceLevel:copy->source.mipLevel
                                  sourceOrigin:MTLOriginMake(copy->source.origin.x,
                                                             copy->source.origin.y, sourceOriginZ)
                                    sourceSize:sizeOneSlice
                                     toTexture:dstTextureView.Get()
                              destinationSlice:destinationLayer
                              destinationLevel:copy->destination.mipLevel
                             destinationOrigin:MTLOriginMake(copy->destination.origin.x,
                                                             copy->destination.origin.y,
                                                             destinationOriginZ)];
                     }
                     break;
                 }

                 case Command::ClearBuffer: {
                     ClearBufferCmd* cmd = mCommands.NextCommand<ClearBufferCmd>();
                     if (cmd->size == 0) {
                         // Skip no-op copies.
                         break;
                     }
                     Buffer* dstBuffer = ToBackend(cmd->buffer.Get());

                     bool clearedToZero = dstBuffer->EnsureDataInitializedAsDestination(
                         commandContext, cmd->offset, cmd->size);

                     if (!clearedToZero) {
                         [commandContext->EnsureBlit() fillBuffer:dstBuffer->GetMTLBuffer()
                                                            range:NSMakeRange(cmd->offset, cmd->size)
                                                            value:0u];
                     }

                     break;
                 }

                 case Command::ResolveQuerySet: {
                     ResolveQuerySetCmd* cmd = mCommands.NextCommand<ResolveQuerySetCmd>();
                     QuerySet* querySet = ToBackend(cmd->querySet.Get());
                     Buffer* destination = ToBackend(cmd->destination.Get());

                     destination->EnsureDataInitializedAsDestination(
                         commandContext, cmd->destinationOffset, cmd->queryCount * sizeof(uint64_t));

                     if (querySet->GetQueryType() == wgpu::QueryType::Occlusion) {
                         [commandContext->EnsureBlit()
                                copyFromBuffer:querySet->GetVisibilityBuffer()
                                  sourceOffset:NSUInteger(cmd->firstQuery * sizeof(uint64_t))
                                      toBuffer:destination->GetMTLBuffer()
                             destinationOffset:NSUInteger(cmd->destinationOffset)
                                          size:NSUInteger(cmd->queryCount * sizeof(uint64_t))];
                     } else {
                         if (@available(macos 10.15, iOS 14.0, *)) {
                             [commandContext->EnsureBlit()
                                   resolveCounters:querySet->GetCounterSampleBuffer()
                                           inRange:NSMakeRange(cmd->firstQuery, cmd->queryCount)
                                 destinationBuffer:destination->GetMTLBuffer()
                                 destinationOffset:NSUInteger(cmd->destinationOffset)];
                         } else {
                             UNREACHABLE();
                         }
                     }
                     break;
                 }

                 case Command::WriteTimestamp: {
                     WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
                     QuerySet* querySet = ToBackend(cmd->querySet.Get());

                     if (@available(macos 10.15, iOS 14.0, *)) {
                         [commandContext->EnsureBlit()
                             sampleCountersInBuffer:querySet->GetCounterSampleBuffer()
                                      atSampleIndex:NSUInteger(cmd->queryIndex)
                                        withBarrier:YES];
                     } else {
                         UNREACHABLE();
                     }
                     break;
                 }

                 case Command::InsertDebugMarker: {
                     // MTLCommandBuffer does not implement insertDebugSignpost
                     SkipCommand(&mCommands, type);
                     break;
                 }

                 case Command::PopDebugGroup: {
                     mCommands.NextCommand<PopDebugGroupCmd>();

                     if (@available(macos 10.13, *)) {
                         [commandContext->GetCommands() popDebugGroup];
                     }
                     break;
                 }

                 case Command::PushDebugGroup: {
                     PushDebugGroupCmd* cmd = mCommands.NextCommand<PushDebugGroupCmd>();
                     char* label = mCommands.NextData<char>(cmd->length + 1);

                     if (@available(macos 10.13, *)) {
                         NSRef<NSString> mtlLabel =
                             AcquireNSRef([[NSString alloc] initWithUTF8String:label]);
                         [commandContext->GetCommands() pushDebugGroup:mtlLabel.Get()];
                     }

                     break;
                 }

                 case Command::WriteBuffer: {
                     WriteBufferCmd* write = mCommands.NextCommand<WriteBufferCmd>();
                     const uint64_t offset = write->offset;
                     const uint64_t size = write->size;
                     if (size == 0) {
                         continue;
                     }

                     Buffer* dstBuffer = ToBackend(write->buffer.Get());
                     uint8_t* data = mCommands.NextData<uint8_t>(size);
                     Device* device = ToBackend(GetDevice());

                     UploadHandle uploadHandle;
                     DAWN_TRY_ASSIGN(uploadHandle, device->GetDynamicUploader()->Allocate(
                                                       size, device->GetPendingCommandSerial(),
                                                       kCopyBufferToBufferOffsetAlignment));
                     ASSERT(uploadHandle.mappedBuffer != nullptr);
                     memcpy(uploadHandle.mappedBuffer, data, size);

                     dstBuffer->EnsureDataInitializedAsDestination(commandContext, offset, size);

                     [commandContext->EnsureBlit()
                            copyFromBuffer:ToBackend(uploadHandle.stagingBuffer)->GetBufferHandle()
                              sourceOffset:uploadHandle.startOffset
                                  toBuffer:dstBuffer->GetMTLBuffer()
                         destinationOffset:offset
                                      size:size];
                     break;
                 }

                 default:
                     UNREACHABLE();
             }
         }

         commandContext->EndBlit();
         return {};
     }

     MaybeError CommandBuffer::EncodeComputePass(CommandRecordingContext* commandContext) {
         ComputePipeline* lastPipeline = nullptr;
         StorageBufferLengthTracker storageBufferLengths = {};
         BindGroupTracker bindGroups(&storageBufferLengths);

         id<MTLComputeCommandEncoder> encoder = commandContext->BeginCompute();

         Command type;
         while (mCommands.NextCommandId(&type)) {
             switch (type) {
                 case Command::EndComputePass: {
                     mCommands.NextCommand<EndComputePassCmd>();
                     commandContext->EndCompute();
                     return {};
                 }

                 case Command::Dispatch: {
                     DispatchCmd* dispatch = mCommands.NextCommand<DispatchCmd>();

                     // Skip noop dispatches, it can causes issues on some systems.
                     if (dispatch->x == 0 || dispatch->y == 0 || dispatch->z == 0) {
                         break;
                     }

                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline);

                     [encoder dispatchThreadgroups:MTLSizeMake(dispatch->x, dispatch->y, dispatch->z)
                             threadsPerThreadgroup:lastPipeline->GetLocalWorkGroupSize()];
                     break;
                 }

                 case Command::DispatchIndirect: {
                     DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();

                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline);

                     Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get());
                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
                     [encoder dispatchThreadgroupsWithIndirectBuffer:indirectBuffer
                                                indirectBufferOffset:dispatch->indirectOffset
                                               threadsPerThreadgroup:lastPipeline
                                                                         ->GetLocalWorkGroupSize()];
                     break;
                 }

                 case Command::SetComputePipeline: {
                     SetComputePipelineCmd* cmd = mCommands.NextCommand<SetComputePipelineCmd>();
                     lastPipeline = ToBackend(cmd->pipeline).Get();

                     bindGroups.OnSetPipeline(lastPipeline);

                     lastPipeline->Encode(encoder);
                     break;
                 }

                 case Command::SetBindGroup: {
                     SetBindGroupCmd* cmd = mCommands.NextCommand<SetBindGroupCmd>();
                     uint32_t* dynamicOffsets = nullptr;
                     if (cmd->dynamicOffsetCount > 0) {
                         dynamicOffsets = mCommands.NextData<uint32_t>(cmd->dynamicOffsetCount);
                     }

                     bindGroups.OnSetBindGroup(cmd->index, ToBackend(cmd->group.Get()),
                                               cmd->dynamicOffsetCount, dynamicOffsets);
                     break;
                 }

                 case Command::InsertDebugMarker: {
                     InsertDebugMarkerCmd* cmd = mCommands.NextCommand<InsertDebugMarkerCmd>();
                     char* label = mCommands.NextData<char>(cmd->length + 1);
                     NSRef<NSString> mtlLabel =
                         AcquireNSRef([[NSString alloc] initWithUTF8String:label]);
                     [encoder insertDebugSignpost:mtlLabel.Get()];
                     break;
                 }

                 case Command::PopDebugGroup: {
                     mCommands.NextCommand<PopDebugGroupCmd>();

                     [encoder popDebugGroup];
                     break;
                 }

                 case Command::PushDebugGroup: {
                     PushDebugGroupCmd* cmd = mCommands.NextCommand<PushDebugGroupCmd>();
                     char* label = mCommands.NextData<char>(cmd->length + 1);
                     NSRef<NSString> mtlLabel =
                         AcquireNSRef([[NSString alloc] initWithUTF8String:label]);
                     [encoder pushDebugGroup:mtlLabel.Get()];
                     break;
                 }

                 case Command::WriteTimestamp: {
                     WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
                     QuerySet* querySet = ToBackend(cmd->querySet.Get());

                     if (@available(macos 10.15, iOS 14.0, *)) {
                         [encoder sampleCountersInBuffer:querySet->GetCounterSampleBuffer()
                                           atSampleIndex:NSUInteger(cmd->queryIndex)
                                             withBarrier:YES];
                     } else {
                         UNREACHABLE();
                     }
                     break;
                 }

                 default: {
                     UNREACHABLE();
                     break;
                 }
             }
         }

         // EndComputePass should have been called
         UNREACHABLE();
     }

     MaybeError CommandBuffer::EncodeRenderPass(CommandRecordingContext* commandContext,
                                                MTLRenderPassDescriptor* mtlRenderPass,
                                                uint32_t width,
                                                uint32_t height) {
         ASSERT(mtlRenderPass);

         Device* device = ToBackend(GetDevice());

         // Handle Toggle AlwaysResolveIntoZeroLevelAndLayer. We must handle this before applying
         // the store + MSAA resolve workaround, otherwise this toggle will never be handled because
         // the resolve texture is removed when applying the store + MSAA resolve workaround.
         if (device->IsToggleEnabled(Toggle::AlwaysResolveIntoZeroLevelAndLayer)) {
             std::array<id<MTLTexture>, kMaxColorAttachments> trueResolveTextures = {};
             std::array<uint32_t, kMaxColorAttachments> trueResolveLevels = {};
             std::array<uint32_t, kMaxColorAttachments> trueResolveSlices = {};

             // Use temporary resolve texture on the resolve targets with non-zero resolveLevel or
             // resolveSlice.
             bool useTemporaryResolveTexture = false;
             std::array<NSPRef<id<MTLTexture>>, kMaxColorAttachments> temporaryResolveTextures = {};
             for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
                 if (mtlRenderPass.colorAttachments[i].resolveTexture == nullptr) {
                     continue;
                 }

                 if (mtlRenderPass.colorAttachments[i].resolveLevel == 0 &&
                     mtlRenderPass.colorAttachments[i].resolveSlice == 0) {
                     continue;
                 }

                 trueResolveTextures[i] = mtlRenderPass.colorAttachments[i].resolveTexture;
                 trueResolveLevels[i] = mtlRenderPass.colorAttachments[i].resolveLevel;
                 trueResolveSlices[i] = mtlRenderPass.colorAttachments[i].resolveSlice;

                 const MTLPixelFormat mtlFormat = trueResolveTextures[i].pixelFormat;
                 DAWN_TRY_ASSIGN(temporaryResolveTextures[i], CreateResolveTextureForWorkaround(
                                                                  device, mtlFormat, width, height));

                 mtlRenderPass.colorAttachments[i].resolveTexture =
                     temporaryResolveTextures[i].Get();
                 mtlRenderPass.colorAttachments[i].resolveLevel = 0;
                 mtlRenderPass.colorAttachments[i].resolveSlice = 0;
                 useTemporaryResolveTexture = true;
             }

             // If we need to use a temporary resolve texture we need to copy the result of MSAA
             // resolve back to the true resolve targets.
             if (useTemporaryResolveTexture) {
                 DAWN_TRY(EncodeRenderPass(commandContext, mtlRenderPass, width, height));
                 for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
                     if (trueResolveTextures[i] == nullptr) {
                         continue;
                     }

                     ASSERT(temporaryResolveTextures[i] != nullptr);
                     CopyIntoTrueResolveTarget(commandContext, trueResolveTextures[i],
                                               trueResolveLevels[i], trueResolveSlices[i],
                                               temporaryResolveTextures[i].Get(), width, height);
                 }
                 return {};
             }
         }

         // Handle Store + MSAA resolve workaround (Toggle EmulateStoreAndMSAAResolve).
         if (device->IsToggleEnabled(Toggle::EmulateStoreAndMSAAResolve)) {
             bool hasStoreAndMSAAResolve = false;

             // Remove any store + MSAA resolve and remember them.
             std::array<id<MTLTexture>, kMaxColorAttachments> resolveTextures = {};
             for (uint32_t i = 0; i < kMaxColorAttachments; ++i) {
                 if (mtlRenderPass.colorAttachments[i].storeAction ==
                     kMTLStoreActionStoreAndMultisampleResolve) {
                     hasStoreAndMSAAResolve = true;
                     resolveTextures[i] = mtlRenderPass.colorAttachments[i].resolveTexture;

                     mtlRenderPass.colorAttachments[i].storeAction = MTLStoreActionStore;
                     mtlRenderPass.colorAttachments[i].resolveTexture = nullptr;
                 }
             }

             // If we found a store + MSAA resolve we need to resolve in a different render pass.
             if (hasStoreAndMSAAResolve) {
                 DAWN_TRY(EncodeRenderPass(commandContext, mtlRenderPass, width, height));
                 ResolveInAnotherRenderPass(commandContext, mtlRenderPass, resolveTextures);
                 return {};
             }
         }

         DAWN_TRY(EncodeRenderPassInternal(commandContext, mtlRenderPass, width, height));
         return {};
     }

     MaybeError CommandBuffer::EncodeRenderPassInternal(CommandRecordingContext* commandContext,
                                                        MTLRenderPassDescriptor* mtlRenderPass,
                                                        uint32_t width,
                                                        uint32_t height) {
         bool enableVertexPulling = GetDevice()->IsToggleEnabled(Toggle::MetalEnableVertexPulling);
         RenderPipeline* lastPipeline = nullptr;
         id<MTLBuffer> indexBuffer = nullptr;
         uint32_t indexBufferBaseOffset = 0;
         MTLIndexType indexBufferType;
         uint64_t indexFormatSize = 0;

         StorageBufferLengthTracker storageBufferLengths = {};
         VertexBufferTracker vertexBuffers(&storageBufferLengths);
         BindGroupTracker bindGroups(&storageBufferLengths);

         id<MTLRenderCommandEncoder> encoder = commandContext->BeginRender(mtlRenderPass);

         auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) {
             switch (type) {
                 case Command::Draw: {
                     DrawCmd* draw = iter->NextCommand<DrawCmd>();

                     vertexBuffers.Apply(encoder, lastPipeline, enableVertexPulling);
                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline, enableVertexPulling);

                     // The instance count must be non-zero, otherwise no-op
                     if (draw->instanceCount != 0) {
                         // MTLFeatureSet_iOS_GPUFamily3_v1 does not support baseInstance
                         if (draw->firstInstance == 0) {
                             [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                                         vertexStart:draw->firstVertex
                                         vertexCount:draw->vertexCount
                                       instanceCount:draw->instanceCount];
                         } else {
                             [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                                         vertexStart:draw->firstVertex
                                         vertexCount:draw->vertexCount
                                       instanceCount:draw->instanceCount
                                        baseInstance:draw->firstInstance];
                         }
                     }
                     break;
                 }

                 case Command::DrawIndexed: {
                     DrawIndexedCmd* draw = iter->NextCommand<DrawIndexedCmd>();

                     vertexBuffers.Apply(encoder, lastPipeline, enableVertexPulling);
                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline, enableVertexPulling);

                     // The index and instance count must be non-zero, otherwise no-op
                     if (draw->indexCount != 0 && draw->instanceCount != 0) {
                         // MTLFeatureSet_iOS_GPUFamily3_v1 does not support baseInstance and
                         // baseVertex.
                         if (draw->baseVertex == 0 && draw->firstInstance == 0) {
                             [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                                                 indexCount:draw->indexCount
                                                  indexType:indexBufferType
                                                indexBuffer:indexBuffer
                                          indexBufferOffset:indexBufferBaseOffset +
                                                            draw->firstIndex * indexFormatSize
                                              instanceCount:draw->instanceCount];
                         } else {
                             [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                                                 indexCount:draw->indexCount
                                                  indexType:indexBufferType
                                                indexBuffer:indexBuffer
                                          indexBufferOffset:indexBufferBaseOffset +
                                                            draw->firstIndex * indexFormatSize
                                              instanceCount:draw->instanceCount
                                                 baseVertex:draw->baseVertex
                                               baseInstance:draw->firstInstance];
                         }
                     }
                     break;
                 }

                 case Command::DrawIndirect: {
                     DrawIndirectCmd* draw = iter->NextCommand<DrawIndirectCmd>();

                     vertexBuffers.Apply(encoder, lastPipeline, enableVertexPulling);
                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline, enableVertexPulling);

                     Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
                     [encoder drawPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                               indirectBuffer:indirectBuffer
                         indirectBufferOffset:draw->indirectOffset];
                     break;
                 }

                 case Command::DrawIndexedIndirect: {
                     DrawIndexedIndirectCmd* draw = iter->NextCommand<DrawIndexedIndirectCmd>();

                     vertexBuffers.Apply(encoder, lastPipeline, enableVertexPulling);
                     bindGroups.Apply(encoder);
                     storageBufferLengths.Apply(encoder, lastPipeline, enableVertexPulling);

                     Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                     ASSERT(buffer != nullptr);

                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
                     [encoder drawIndexedPrimitives:lastPipeline->GetMTLPrimitiveTopology()
                                          indexType:indexBufferType
                                        indexBuffer:indexBuffer
                                  indexBufferOffset:indexBufferBaseOffset
                                     indirectBuffer:indirectBuffer
                               indirectBufferOffset:draw->indirectOffset];
                     break;
                 }

                 case Command::InsertDebugMarker: {
                     InsertDebugMarkerCmd* cmd = iter->NextCommand<InsertDebugMarkerCmd>();
                     char* label = iter->NextData<char>(cmd->length + 1);
                     NSRef<NSString> mtlLabel =
                         AcquireNSRef([[NSString alloc] initWithUTF8String:label]);
                     [encoder insertDebugSignpost:mtlLabel.Get()];
                     break;
                 }

                 case Command::PopDebugGroup: {
                     iter->NextCommand<PopDebugGroupCmd>();

                     [encoder popDebugGroup];
                     break;
                 }

                 case Command::PushDebugGroup: {
                     PushDebugGroupCmd* cmd = iter->NextCommand<PushDebugGroupCmd>();
                     char* label = iter->NextData<char>(cmd->length + 1);
                     NSRef<NSString> mtlLabel =
                         AcquireNSRef([[NSString alloc] initWithUTF8String:label]);
                     [encoder pushDebugGroup:mtlLabel.Get()];
                     break;
                 }

                 case Command::SetRenderPipeline: {
                     SetRenderPipelineCmd* cmd = iter->NextCommand<SetRenderPipelineCmd>();
                     RenderPipeline* newPipeline = ToBackend(cmd->pipeline).Get();

                     vertexBuffers.OnSetPipeline(lastPipeline, newPipeline);
                     bindGroups.OnSetPipeline(newPipeline);

                     [encoder setDepthStencilState:newPipeline->GetMTLDepthStencilState()];
                     [encoder setFrontFacingWinding:newPipeline->GetMTLFrontFace()];
                     [encoder setCullMode:newPipeline->GetMTLCullMode()];
                     [encoder setDepthBias:newPipeline->GetDepthBias()
                                slopeScale:newPipeline->GetDepthBiasSlopeScale()
                                     clamp:newPipeline->GetDepthBiasClamp()];
                     if (@available(macOS 10.11, iOS 11.0, *)) {
                         MTLDepthClipMode clipMode = newPipeline->ShouldClampDepth() ?
                             MTLDepthClipModeClamp : MTLDepthClipModeClip;
                         [encoder setDepthClipMode:clipMode];
                     }
                     newPipeline->Encode(encoder);

                     lastPipeline = newPipeline;
                     break;
                 }

                 case Command::SetBindGroup: {
                     SetBindGroupCmd* cmd = iter->NextCommand<SetBindGroupCmd>();
                     uint32_t* dynamicOffsets = nullptr;
                     if (cmd->dynamicOffsetCount > 0) {
                         dynamicOffsets = iter->NextData<uint32_t>(cmd->dynamicOffsetCount);
                     }

                     bindGroups.OnSetBindGroup(cmd->index, ToBackend(cmd->group.Get()),
                                               cmd->dynamicOffsetCount, dynamicOffsets);
                     break;
                 }

                 case Command::SetIndexBuffer: {
                     SetIndexBufferCmd* cmd = iter->NextCommand<SetIndexBufferCmd>();
                     auto b = ToBackend(cmd->buffer.Get());
                     indexBuffer = b->GetMTLBuffer();
                     indexBufferBaseOffset = cmd->offset;
                     indexBufferType = MTLIndexFormat(cmd->format);
                     indexFormatSize = IndexFormatSize(cmd->format);
                     break;
                 }

                 case Command::SetVertexBuffer: {
                     SetVertexBufferCmd* cmd = iter->NextCommand<SetVertexBufferCmd>();

                     vertexBuffers.OnSetVertexBuffer(cmd->slot, ToBackend(cmd->buffer.Get()),
                                                     cmd->offset);
                     break;
                 }

                 default:
                     UNREACHABLE();
                     break;
             }
         };

         Command type;
         while (mCommands.NextCommandId(&type)) {
             switch (type) {
                 case Command::EndRenderPass: {
                     mCommands.NextCommand<EndRenderPassCmd>();
                     commandContext->EndRender();
                     return {};
                 }

                 case Command::SetStencilReference: {
                     SetStencilReferenceCmd* cmd = mCommands.NextCommand<SetStencilReferenceCmd>();
                     [encoder setStencilReferenceValue:cmd->reference];
                     break;
                 }

                 case Command::SetViewport: {
                     SetViewportCmd* cmd = mCommands.NextCommand<SetViewportCmd>();
                     MTLViewport viewport;
                     viewport.originX = cmd->x;
                     viewport.originY = cmd->y;
                     viewport.width = cmd->width;
                     viewport.height = cmd->height;
                     viewport.znear = cmd->minDepth;
                     viewport.zfar = cmd->maxDepth;

                     [encoder setViewport:viewport];
                     break;
                 }

                 case Command::SetScissorRect: {
                     SetScissorRectCmd* cmd = mCommands.NextCommand<SetScissorRectCmd>();
                     MTLScissorRect rect;
                     rect.x = cmd->x;
                     rect.y = cmd->y;
                     rect.width = cmd->width;
                     rect.height = cmd->height;

                     [encoder setScissorRect:rect];
                     break;
                 }

                 case Command::SetBlendConstant: {
                     SetBlendConstantCmd* cmd = mCommands.NextCommand<SetBlendConstantCmd>();
                     [encoder setBlendColorRed:cmd->color.r
                                         green:cmd->color.g
                                          blue:cmd->color.b
                                         alpha:cmd->color.a];
                     break;
                 }

                 case Command::ExecuteBundles: {
                     ExecuteBundlesCmd* cmd = mCommands.NextCommand<ExecuteBundlesCmd>();
                     auto bundles = mCommands.NextData<Ref<RenderBundleBase>>(cmd->count);

                     for (uint32_t i = 0; i < cmd->count; ++i) {
                         CommandIterator* iter = bundles[i]->GetCommands();
                         iter->Reset();
                         while (iter->NextCommandId(&type)) {
                             EncodeRenderBundleCommand(iter, type);
                         }
                     }
                     break;
                 }

                 case Command::BeginOcclusionQuery: {
                     BeginOcclusionQueryCmd* cmd = mCommands.NextCommand<BeginOcclusionQueryCmd>();

                     [encoder setVisibilityResultMode:MTLVisibilityResultModeBoolean
                                               offset:cmd->queryIndex * sizeof(uint64_t)];
                     break;
                 }

                 case Command::EndOcclusionQuery: {
                     EndOcclusionQueryCmd* cmd = mCommands.NextCommand<EndOcclusionQueryCmd>();

                     [encoder setVisibilityResultMode:MTLVisibilityResultModeDisabled
                                               offset:cmd->queryIndex * sizeof(uint64_t)];
                     break;
                 }

                 case Command::WriteTimestamp: {
                     WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
                     QuerySet* querySet = ToBackend(cmd->querySet.Get());

                     if (@available(macos 10.15, iOS 14.0, *)) {
                         [encoder sampleCountersInBuffer:querySet->GetCounterSampleBuffer()
                                           atSampleIndex:NSUInteger(cmd->queryIndex)
                                             withBarrier:YES];
                     } else {
                         UNREACHABLE();
                     }
                     break;
                 }

                 default: {
                     EncodeRenderBundleCommand(&mCommands, type);
                     break;
                 }
             }
         }

         // EndRenderPass should have been called
         UNREACHABLE();
     }

 }  // namespace dawn::native::metal