Implement OpArrayLength on Metal

Metal uses a physical addressing mode and buffers are just pointers in
GPU memory that don't have a size. This prevents implementing
OpArrayLength without any additional information. When a shader uses
OpArrayLength on unsized arrays, SPIRV-Cross requires an extra buffer
argument that gets the length of the buffers.

Use that workaround mechanism in the Metal backend by keeping track of
the storage buffer sizes and applying the extra "buffer length buffer"
when a pipeline requires it.

Also adds tests that OpArraySize works in all shader stages.

BUG=dawn:195

Change-Id: I6aa6089aaea85d0589ccad1756e55dd0befefcb5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/9386
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
diff --git a/BUILD.gn b/BUILD.gn
index 2b53fd3..b07b3b1 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -705,6 +705,7 @@
     "src/tests/end2end/MultisampledRenderingTests.cpp",
     "src/tests/end2end/NonzeroTextureCreationTests.cpp",
     "src/tests/end2end/ObjectCachingTests.cpp",
+    "src/tests/end2end/OpArrayLengthTests.cpp",
     "src/tests/end2end/PrimitiveTopologyTests.cpp",
     "src/tests/end2end/RenderPassLoadOpTests.cpp",
     "src/tests/end2end/RenderPassTests.cpp",
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 6892fe5..a096073 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -191,6 +191,67 @@
                           destinationOrigin:MTLOriginMake(0, 0, 0)];
         }
 
+        // Metal uses a physical addressing mode which means buffers in the shading language are
+        // just pointers to the virtual address of their start. This means there is no way to know
+        // the length of a buffer to compute the length() of unsized arrays at the end of storage
+        // buffers. SPIRV-Cross implements the length() of unsized arrays by requiring an extra
+        // buffer that contains the length of other buffers. This structure that keeps track of the
+        // length of storage buffers and can apply them to the reserved "buffer length buffer" when
+        // needed for a draw or a dispatch.
+        struct StorageBufferLengthTracker {
+            dawn::ShaderStageBit dirtyStages = dawn::ShaderStageBit::None;
+
+            // The lengths of buffers are stored as 32bit integers because that is the width the
+            // MSL code generated by SPIRV-Cross expects.
+            PerStage<std::array<uint32_t, kGenericMetalBufferSlots>> data;
+
+            void Apply(RenderPipeline* pipeline, id<MTLRenderCommandEncoder> render) {
+                dawn::ShaderStageBit stagesToApply =
+                    dirtyStages & pipeline->GetStagesRequiringStorageBufferLength();
+
+                if (stagesToApply == dawn::ShaderStageBit::None) {
+                    return;
+                }
+
+                if (stagesToApply & dawn::ShaderStageBit::Vertex) {
+                    uint32_t bufferCount = ToBackend(pipeline->GetLayout())
+                                               ->GetBufferBindingCount(ShaderStage::Vertex);
+                    [render setVertexBytes:data[ShaderStage::Vertex].data()
+                                    length:sizeof(uint32_t) * bufferCount
+                                   atIndex:kBufferLengthBufferSlot];
+                }
+
+                if (stagesToApply & dawn::ShaderStageBit::Fragment) {
+                    uint32_t bufferCount = ToBackend(pipeline->GetLayout())
+                                               ->GetBufferBindingCount(ShaderStage::Fragment);
+                    [render setFragmentBytes:data[ShaderStage::Fragment].data()
+                                      length:sizeof(uint32_t) * bufferCount
+                                     atIndex:kBufferLengthBufferSlot];
+                }
+
+                // Only mark clean stages that were actually applied.
+                dirtyStages ^= stagesToApply;
+            }
+
+            void Apply(ComputePipeline* pipeline, id<MTLComputeCommandEncoder> compute) {
+                if (!(dirtyStages & dawn::ShaderStageBit::Compute)) {
+                    return;
+                }
+
+                if (!pipeline->RequiresStorageBufferLength()) {
+                    return;
+                }
+
+                uint32_t bufferCount =
+                    ToBackend(pipeline->GetLayout())->GetBufferBindingCount(ShaderStage::Compute);
+                [compute setBytes:data[ShaderStage::Compute].data()
+                           length:sizeof(uint32_t) * bufferCount
+                          atIndex:kBufferLengthBufferSlot];
+
+                dirtyStages ^= dawn::ShaderStageBit::Compute;
+            }
+        };
+
         // Handles a call to SetBindGroup, directing the commands to the correct encoder.
         // There is a single function that takes both encoders to factor code. Other approaches like
         // templates wouldn't work because the name of methods are different between the two encoder
@@ -200,6 +261,7 @@
                             uint32_t dynamicOffsetCount,
                             uint64_t* dynamicOffsets,
                             PipelineLayout* pipelineLayout,
+                            StorageBufferLengthTracker* lengthTracker,
                             id<MTLRenderCommandEncoder> render,
                             id<MTLComputeCommandEncoder> compute) {
             const auto& layout = group->GetLayout()->GetBindingInfo();
@@ -234,7 +296,8 @@
                 switch (layout.types[bindingIndex]) {
                     case dawn::BindingType::UniformBuffer:
                     case dawn::BindingType::StorageBuffer: {
-                        BufferBinding binding = group->GetBindingAsBufferBinding(bindingIndex);
+                        const BufferBinding& binding =
+                            group->GetBindingAsBufferBinding(bindingIndex);
                         const id<MTLBuffer> buffer = ToBackend(binding.buffer)->GetMTLBuffer();
                         NSUInteger offset = binding.offset;
 
@@ -246,16 +309,22 @@
                         }
 
                         if (hasVertStage) {
+                            lengthTracker->data[ShaderStage::Vertex][vertIndex] = binding.size;
+                            lengthTracker->dirtyStages |= dawn::ShaderStageBit::Vertex;
                             [render setVertexBuffers:&buffer
                                              offsets:&offset
                                            withRange:NSMakeRange(vertIndex, 1)];
                         }
                         if (hasFragStage) {
+                            lengthTracker->data[ShaderStage::Fragment][fragIndex] = binding.size;
+                            lengthTracker->dirtyStages |= dawn::ShaderStageBit::Fragment;
                             [render setFragmentBuffers:&buffer
                                                offsets:&offset
                                              withRange:NSMakeRange(fragIndex, 1)];
                         }
                         if (hasComputeStage) {
+                            lengthTracker->data[ShaderStage::Compute][computeIndex] = binding.size;
+                            lengthTracker->dirtyStages |= dawn::ShaderStageBit::Compute;
                             [compute setBuffers:&buffer
                                         offsets:&offset
                                       withRange:NSMakeRange(computeIndex, 1)];
@@ -611,6 +680,7 @@
 
     void CommandBuffer::EncodeComputePass(id<MTLCommandBuffer> commandBuffer) {
         ComputePipeline* lastPipeline = nullptr;
+        StorageBufferLengthTracker storageBufferLengths = {};
 
         // Will be autoreleased
         id<MTLComputeCommandEncoder> encoder = [commandBuffer computeCommandEncoder];
@@ -626,12 +696,15 @@
 
                 case Command::Dispatch: {
                     DispatchCmd* dispatch = mCommands.NextCommand<DispatchCmd>();
+                    storageBufferLengths.Apply(lastPipeline, encoder);
+
                     [encoder dispatchThreadgroups:MTLSizeMake(dispatch->x, dispatch->y, dispatch->z)
                             threadsPerThreadgroup:lastPipeline->GetLocalWorkGroupSize()];
                 } break;
 
                 case Command::DispatchIndirect: {
                     DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
+                    storageBufferLengths.Apply(lastPipeline, encoder);
 
                     Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get());
                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
@@ -656,8 +729,8 @@
                     }
 
                     ApplyBindGroup(cmd->index, ToBackend(cmd->group.Get()), cmd->dynamicOffsetCount,
-                                   dynamicOffsets, ToBackend(lastPipeline->GetLayout()), nil,
-                                   encoder);
+                                   dynamicOffsets, ToBackend(lastPipeline->GetLayout()),
+                                   &storageBufferLengths, nil, encoder);
                 } break;
 
                 case Command::InsertDebugMarker: {
@@ -792,6 +865,7 @@
         id<MTLBuffer> indexBuffer = nil;
         uint32_t indexBufferBaseOffset = 0;
         VertexInputBufferTracker vertexInputBuffers;
+        StorageBufferLengthTracker storageBufferLengths = {};
 
         // This will be autoreleased
         id<MTLRenderCommandEncoder> encoder =
@@ -810,6 +884,7 @@
                     DrawCmd* draw = mCommands.NextCommand<DrawCmd>();
 
                     vertexInputBuffers.Apply(encoder, lastPipeline);
+                    storageBufferLengths.Apply(lastPipeline, encoder);
 
                     // The instance count must be non-zero, otherwise no-op
                     if (draw->instanceCount != 0) {
@@ -827,6 +902,7 @@
                         IndexFormatSize(lastPipeline->GetVertexInputDescriptor()->indexFormat);
 
                     vertexInputBuffers.Apply(encoder, lastPipeline);
+                    storageBufferLengths.Apply(lastPipeline, encoder);
 
                     // The index and instance count must be non-zero, otherwise no-op
                     if (draw->indexCount != 0 && draw->instanceCount != 0) {
@@ -846,6 +922,7 @@
                     DrawIndirectCmd* draw = mCommands.NextCommand<DrawIndirectCmd>();
 
                     vertexInputBuffers.Apply(encoder, lastPipeline);
+                    storageBufferLengths.Apply(lastPipeline, encoder);
 
                     Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
@@ -858,6 +935,7 @@
                     DrawIndirectCmd* draw = mCommands.NextCommand<DrawIndirectCmd>();
 
                     vertexInputBuffers.Apply(encoder, lastPipeline);
+                    storageBufferLengths.Apply(lastPipeline, encoder);
 
                     Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                     id<MTLBuffer> indirectBuffer = buffer->GetMTLBuffer();
@@ -960,8 +1038,8 @@
                     }
 
                     ApplyBindGroup(cmd->index, ToBackend(cmd->group.Get()), cmd->dynamicOffsetCount,
-                                   dynamicOffsets, ToBackend(lastPipeline->GetLayout()), encoder,
-                                   nil);
+                                   dynamicOffsets, ToBackend(lastPipeline->GetLayout()),
+                                   &storageBufferLengths, encoder, nil);
                 } break;
 
                 case Command::SetIndexBuffer: {
diff --git a/src/dawn_native/metal/ComputePipelineMTL.h b/src/dawn_native/metal/ComputePipelineMTL.h
index 6f3aca9..71b5ba3 100644
--- a/src/dawn_native/metal/ComputePipelineMTL.h
+++ b/src/dawn_native/metal/ComputePipelineMTL.h
@@ -30,10 +30,12 @@
 
         void Encode(id<MTLComputeCommandEncoder> encoder);
         MTLSize GetLocalWorkGroupSize() const;
+        bool RequiresStorageBufferLength() const;
 
       private:
         id<MTLComputePipelineState> mMtlComputePipelineState = nil;
         MTLSize mLocalWorkgroupSize;
+        bool mRequiresStorageBufferLength;
     };
 
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/metal/ComputePipelineMTL.mm b/src/dawn_native/metal/ComputePipelineMTL.mm
index 66f6da8..ef4f8a3 100644
--- a/src/dawn_native/metal/ComputePipelineMTL.mm
+++ b/src/dawn_native/metal/ComputePipelineMTL.mm
@@ -39,6 +39,7 @@
 
         // Copy over the local workgroup size as it is passed to dispatch explicitly in Metal
         mLocalWorkgroupSize = computeData.localWorkgroupSize;
+        mRequiresStorageBufferLength = computeData.needsStorageBufferLength;
     }
 
     ComputePipeline::~ComputePipeline() {
@@ -53,4 +54,8 @@
         return mLocalWorkgroupSize;
     }
 
+    bool ComputePipeline::RequiresStorageBufferLength() const {
+        return mRequiresStorageBufferLength;
+    }
+
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/metal/PipelineLayoutMTL.h b/src/dawn_native/metal/PipelineLayoutMTL.h
index 59ba3b7..59091ac 100644
--- a/src/dawn_native/metal/PipelineLayoutMTL.h
+++ b/src/dawn_native/metal/PipelineLayoutMTL.h
@@ -29,6 +29,13 @@
 
     class Device;
 
+    // The number of Metal buffers usable by applications in general
+    static constexpr size_t kMetalBufferTableSize = 31;
+    // The Metal buffer slot that Dawn reserves for its own use to pass more data to shaders
+    static constexpr size_t kBufferLengthBufferSlot = kMetalBufferTableSize - 1;
+    // The number of Metal buffers Dawn can use in a generic way (i.e. that aren't reserved)
+    static constexpr size_t kGenericMetalBufferSlots = kMetalBufferTableSize - 1;
+
     class PipelineLayout : public PipelineLayoutBase {
       public:
         PipelineLayout(Device* device, const PipelineLayoutDescriptor* descriptor);
diff --git a/src/dawn_native/metal/RenderPipelineMTL.h b/src/dawn_native/metal/RenderPipelineMTL.h
index 10d7525..8574356 100644
--- a/src/dawn_native/metal/RenderPipelineMTL.h
+++ b/src/dawn_native/metal/RenderPipelineMTL.h
@@ -41,6 +41,8 @@
         // vertex buffer table.
         uint32_t GetMtlVertexBufferIndex(uint32_t dawnIndex) const;
 
+        dawn::ShaderStageBit GetStagesRequiringStorageBufferLength() const;
+
       private:
         MTLVertexDescriptor* MakeVertexDesc();
 
@@ -51,6 +53,8 @@
         id<MTLRenderPipelineState> mMtlRenderPipelineState = nil;
         id<MTLDepthStencilState> mMtlDepthStencilState = nil;
         std::array<uint32_t, kMaxVertexBuffers> mMtlVertexBufferIndices;
+
+        dawn::ShaderStageBit mStagesRequiringStorageBufferLength = dawn::ShaderStageBit::None;
     };
 
 }}  // namespace dawn_native::metal
diff --git a/src/dawn_native/metal/RenderPipelineMTL.mm b/src/dawn_native/metal/RenderPipelineMTL.mm
index ac6ab35..ef54f40 100644
--- a/src/dawn_native/metal/RenderPipelineMTL.mm
+++ b/src/dawn_native/metal/RenderPipelineMTL.mm
@@ -319,12 +319,18 @@
         ShaderModule::MetalFunctionData vertexData = vertexModule->GetFunction(
             vertexEntryPoint, ShaderStage::Vertex, ToBackend(GetLayout()));
         descriptorMTL.vertexFunction = vertexData.function;
+        if (vertexData.needsStorageBufferLength) {
+            mStagesRequiringStorageBufferLength |= dawn::ShaderStageBit::Vertex;
+        }
 
         const ShaderModule* fragmentModule = ToBackend(descriptor->fragmentStage->module);
         const char* fragmentEntryPoint = descriptor->fragmentStage->entryPoint;
         ShaderModule::MetalFunctionData fragmentData = fragmentModule->GetFunction(
             fragmentEntryPoint, ShaderStage::Fragment, ToBackend(GetLayout()));
         descriptorMTL.fragmentFunction = fragmentData.function;
+        if (fragmentData.needsStorageBufferLength) {
+            mStagesRequiringStorageBufferLength |= dawn::ShaderStageBit::Fragment;
+        }
 
         if (HasDepthStencilAttachment()) {
             // TODO(kainino@chromium.org): Handle depth-only and stencil-only formats.
@@ -405,6 +411,10 @@
         return mMtlVertexBufferIndices[dawnIndex];
     }
 
+    dawn::ShaderStageBit RenderPipeline::GetStagesRequiringStorageBufferLength() const {
+        return mStagesRequiringStorageBufferLength;
+    }
+
     MTLVertexDescriptor* RenderPipeline::MakeVertexDesc() {
         MTLVertexDescriptor* mtlVertexDescriptor = [MTLVertexDescriptor new];
 
diff --git a/src/dawn_native/metal/ShaderModuleMTL.h b/src/dawn_native/metal/ShaderModuleMTL.h
index 69f9007..65767c6 100644
--- a/src/dawn_native/metal/ShaderModuleMTL.h
+++ b/src/dawn_native/metal/ShaderModuleMTL.h
@@ -35,6 +35,7 @@
         struct MetalFunctionData {
             id<MTLFunction> function;
             MTLSize localWorkgroupSize;
+            bool needsStorageBufferLength;
             ~MetalFunctionData() {
                 [function release];
             }
diff --git a/src/dawn_native/metal/ShaderModuleMTL.mm b/src/dawn_native/metal/ShaderModuleMTL.mm
index b525eba..929fc5f 100644
--- a/src/dawn_native/metal/ShaderModuleMTL.mm
+++ b/src/dawn_native/metal/ShaderModuleMTL.mm
@@ -58,13 +58,19 @@
         options_glsl.vertex.flip_vert_y = true;
         compiler.spirv_cross::CompilerGLSL::set_common_options(options_glsl);
 
+        spirv_cross::CompilerMSL::Options options_msl;
+
         // Disable PointSize builtin for https://bugs.chromium.org/p/dawn/issues/detail?id=146
-        // Becuase Metal will reject PointSize builtin if the shader is compiled into a render
+        // Because Metal will reject PointSize builtin if the shader is compiled into a render
         // pipeline that uses a non-point topology.
         // TODO (hao.x.li@intel.com): Remove this once WebGPU requires there is no
         // gl_PointSize builtin (https://github.com/gpuweb/gpuweb/issues/332).
-        spirv_cross::CompilerMSL::Options options_msl;
         options_msl.enable_point_size_builtin = false;
+
+        // Always use vertex buffer 30 (the last one in the vertex buffer table) to contain
+        // the shader storage buffer lengths.
+        options_msl.buffer_size_buffer_index = kBufferLengthBufferSlot;
+
         compiler.set_msl_options(options_msl);
 
         // By default SPIRV-Cross will give MSL resources indices in increasing order.
@@ -135,6 +141,8 @@
             [library release];
         }
 
+        result.needsStorageBufferLength = compiler.needs_buffer_size_buffer();
+
         return result;
     }
 
diff --git a/src/tests/end2end/OpArrayLengthTests.cpp b/src/tests/end2end/OpArrayLengthTests.cpp
new file mode 100644
index 0000000..be402ea
--- /dev/null
+++ b/src/tests/end2end/OpArrayLengthTests.cpp
@@ -0,0 +1,265 @@
+// Copyright 2019 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tests/DawnTest.h"
+
+#include "common/Assert.h"
+#include "utils/ComboRenderPipelineDescriptor.h"
+#include "utils/DawnHelpers.h"
+
+class OpArrayLengthTest : public DawnTest {
+  protected:
+    void SetUp() {
+        DawnTest::SetUp();
+
+        // Create buffers of various size to check the length() implementation
+        dawn::BufferDescriptor bufferDesc;
+        bufferDesc.size = 4;
+        bufferDesc.usage = dawn::BufferUsageBit::Storage;
+        mStorageBuffer4 = device.CreateBuffer(&bufferDesc);
+
+        bufferDesc.size = 256;
+        mStorageBuffer256 = device.CreateBuffer(&bufferDesc);
+
+        bufferDesc.size = 512;
+        mStorageBuffer512 = device.CreateBuffer(&bufferDesc);
+
+        // Put them all in a bind group for tests to bind them easily.
+        dawn::ShaderStageBit kAllStages = dawn::ShaderStageBit::Fragment |
+                                          dawn::ShaderStageBit::Vertex |
+                                          dawn::ShaderStageBit::Compute;
+        mBindGroupLayout =
+            utils::MakeBindGroupLayout(device, {{0, kAllStages, dawn::BindingType::StorageBuffer},
+                                                {1, kAllStages, dawn::BindingType::StorageBuffer},
+                                                {2, kAllStages, dawn::BindingType::StorageBuffer}});
+
+        mBindGroup = utils::MakeBindGroup(device, mBindGroupLayout,
+                                          {
+                                              {0, mStorageBuffer4, 0, 4},
+                                              {1, mStorageBuffer256, 0, dawn::kWholeSize},
+                                              {2, mStorageBuffer512, 0, 512},
+                                          });
+
+        // Common shader code to use these buffers in shaders, assuming they are in bindgroup index
+        // 0.
+        mShaderInterface = R"(
+            // The length should be 1 because the buffer is 4-byte long.
+            layout(std430, set = 0, binding = 0) buffer Buffer1 {
+                float data[];
+            } buffer1;
+
+            // The length should be 64 because the buffer is 256 bytes long.
+            layout(std430, set = 0, binding = 1) buffer Buffer2 {
+                float data[];
+            } buffer2;
+
+            // The length should be (512 - 16*4) / 8 = 56 because the buffer is 512 bytes long
+            // and the structure is 8 bytes big.
+            struct Buffer3Data {float a; int b;};
+            layout(std430, set = 0, binding = 2) buffer Buffer3 {
+                mat4 garbage;
+                Buffer3Data data[];
+            } buffer3;
+        )";
+
+        // See comments in the shader for an explanation of these values
+        mExpectedLengths = {1, 64, 56};
+    }
+
+    dawn::Buffer mStorageBuffer4;
+    dawn::Buffer mStorageBuffer256;
+    dawn::Buffer mStorageBuffer512;
+
+    dawn::BindGroupLayout mBindGroupLayout;
+    dawn::BindGroup mBindGroup;
+    std::string mShaderInterface;
+    std::array<uint32_t, 3> mExpectedLengths;
+};
+
+// Test OpArrayLength in the compute stage
+TEST_P(OpArrayLengthTest, Compute) {
+    // TODO(cwallez@chromium.org): The computations for length() of unsized buffer is broken on
+    // Nvidia OpenGL. See https://bugs.chromium.org/p/dawn/issues/detail?id=197
+    DAWN_SKIP_TEST_IF(IsNvidia() && IsOpenGL());
+
+    // Create a buffer to hold the result sizes and create a bindgroup for it.
+    dawn::BufferDescriptor bufferDesc;
+    bufferDesc.usage = dawn::BufferUsageBit::Storage | dawn::BufferUsageBit::CopySrc;
+    bufferDesc.size = sizeof(uint32_t) * mExpectedLengths.size();
+    dawn::Buffer resultBuffer = device.CreateBuffer(&bufferDesc);
+
+    dawn::BindGroupLayout resultLayout = utils::MakeBindGroupLayout(
+        device, {{0, dawn::ShaderStageBit::Compute, dawn::BindingType::StorageBuffer}});
+
+    dawn::BindGroup resultBindGroup =
+        utils::MakeBindGroup(device, resultLayout, {{0, resultBuffer, 0, dawn::kWholeSize}});
+
+    // Create the compute pipeline that stores the length()s in the result buffer.
+    dawn::BindGroupLayout bgls[] = {mBindGroupLayout, resultLayout};
+    dawn::PipelineLayoutDescriptor plDesc;
+    plDesc.bindGroupLayoutCount = 2;
+    plDesc.bindGroupLayouts = bgls;
+    dawn::PipelineLayout pl = device.CreatePipelineLayout(&plDesc);
+
+    dawn::PipelineStageDescriptor computeStage;
+    computeStage.entryPoint = "main";
+    computeStage.module = utils::CreateShaderModule(device, utils::ShaderStage::Compute,
+                                                    (R"(#version 450
+            layout(std430, set = 1, binding = 0) buffer ResultBuffer {
+                uint result[3];
+            };
+            )" + mShaderInterface + R"(
+            void main() {
+                result[0] = buffer1.data.length();
+                result[1] = buffer2.data.length();
+                result[2] = buffer3.data.length();
+            })")
+                                                        .c_str());
+
+    dawn::ComputePipelineDescriptor pipelineDesc;
+    pipelineDesc.layout = pl;
+    pipelineDesc.computeStage = &computeStage;
+    dawn::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
+
+    // Run a single instance of the compute shader
+    dawn::CommandEncoder encoder = device.CreateCommandEncoder();
+    dawn::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+    pass.SetBindGroup(0, mBindGroup, 0, nullptr);
+    pass.SetBindGroup(1, resultBindGroup, 0, nullptr);
+    pass.Dispatch(1, 1, 1);
+    pass.EndPass();
+
+    dawn::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(mExpectedLengths.data(), resultBuffer, 0, 3);
+}
+
+// Test OpArrayLength in the fragment stage
+TEST_P(OpArrayLengthTest, Fragment) {
+    // TODO(cwallez@chromium.org): The computations for length() of unsized buffer is broken on
+    // Nvidia OpenGL. See https://bugs.chromium.org/p/dawn/issues/detail?id=197
+    DAWN_SKIP_TEST_IF(IsNvidia() && IsOpenGL());
+
+    utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+    // Create the pipeline that computes the length of the buffers and writes it to the only render
+    // pass pixel.
+    dawn::ShaderModule vsModule = utils::CreateShaderModule(device, utils::ShaderStage::Vertex, R"(
+        #version 450
+        void main() {
+            gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
+            gl_PointSize = 1.0;
+        })");
+
+    dawn::ShaderModule fsModule = utils::CreateShaderModule(device, utils::ShaderStage::Fragment,
+                                                            (R"(
+        #version 450
+        )" + mShaderInterface + R"(
+        layout(location = 0) out vec4 fragColor;
+        void main() {
+            fragColor.r = buffer1.data.length() / 255.0f;
+            fragColor.g = buffer2.data.length() / 255.0f;
+            fragColor.b = buffer3.data.length() / 255.0f;
+            fragColor.a = 0.0f;
+        })")
+                                                                .c_str());
+
+    utils::ComboRenderPipelineDescriptor descriptor(device);
+    descriptor.cVertexStage.module = vsModule;
+    descriptor.cFragmentStage.module = fsModule;
+    descriptor.primitiveTopology = dawn::PrimitiveTopology::PointList;
+    descriptor.cColorStates[0]->format = renderPass.colorFormat;
+    descriptor.layout = utils::MakeBasicPipelineLayout(device, &mBindGroupLayout);
+    dawn::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor);
+
+    // "Draw" the lengths to the texture.
+    dawn::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        dawn::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, mBindGroup, 0, nullptr);
+        pass.Draw(1, 1, 0, 0);
+        pass.EndPass();
+    }
+
+    dawn::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    RGBA8 expectedColor = RGBA8(mExpectedLengths[0], mExpectedLengths[1], mExpectedLengths[2], 0);
+    EXPECT_PIXEL_RGBA8_EQ(expectedColor, renderPass.color, 0, 0);
+}
+
+// Test OpArrayLength in the vertex stage
+TEST_P(OpArrayLengthTest, Vertex) {
+    // TODO(cwallez@chromium.org): The computations for length() of unsized buffer is broken on
+    // Nvidia OpenGL. See https://bugs.chromium.org/p/dawn/issues/detail?id=197
+    DAWN_SKIP_TEST_IF(IsNvidia() && IsOpenGL());
+
+    utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+    // Create the pipeline that computes the length of the buffers and writes it to the only render
+    // pass pixel.
+    dawn::ShaderModule vsModule = utils::CreateShaderModule(device, utils::ShaderStage::Vertex,
+                                                            (R"(
+        #version 450
+        )" + mShaderInterface + R"(
+        layout(location = 0) out vec4 pointColor;
+        void main() {
+            pointColor.r = buffer1.data.length() / 255.0f;
+            pointColor.g = buffer2.data.length() / 255.0f;
+            pointColor.b = buffer3.data.length() / 255.0f;
+            pointColor.a = 0.0f;
+
+            gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
+            gl_PointSize = 1.0;
+        })")
+                                                                .c_str());
+
+    dawn::ShaderModule fsModule =
+        utils::CreateShaderModule(device, utils::ShaderStage::Fragment, R"(
+        #version 450
+        layout(location = 0) out vec4 fragColor;
+        layout(location = 0) in vec4 pointColor;
+        void main() {
+            fragColor = pointColor;
+        })");
+
+    utils::ComboRenderPipelineDescriptor descriptor(device);
+    descriptor.cVertexStage.module = vsModule;
+    descriptor.cFragmentStage.module = fsModule;
+    descriptor.primitiveTopology = dawn::PrimitiveTopology::PointList;
+    descriptor.cColorStates[0]->format = renderPass.colorFormat;
+    descriptor.layout = utils::MakeBasicPipelineLayout(device, &mBindGroupLayout);
+    dawn::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor);
+
+    // "Draw" the lengths to the texture.
+    dawn::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        dawn::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, mBindGroup, 0, nullptr);
+        pass.Draw(1, 1, 0, 0);
+        pass.EndPass();
+    }
+
+    dawn::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    RGBA8 expectedColor = RGBA8(mExpectedLengths[0], mExpectedLengths[1], mExpectedLengths[2], 0);
+    EXPECT_PIXEL_RGBA8_EQ(expectedColor, renderPass.color, 0, 0);
+}
+
+DAWN_INSTANTIATE_TEST(OpArrayLengthTest, D3D12Backend, MetalBackend, OpenGLBackend, VulkanBackend);