Add tests for some limits

tested:
 - maxComputeWorkgroupStorageSize
 - maxUniformBufferBindingSize
 - maxStorageBufferBindingSize

Two of these limits are exposed as configurable to the JS API
so it's important they are tested to work before we expose
them. maxUniformBufferBindingSize came along as well because
the test for storageBufferBindingSize was easy to parameterize.

Bug: dawn:685
Change-Id: I08de6df9d70a22aca0f48ac3fef0038f7aec727b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/66480
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Brandon Jones <bajones@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn
index 3f236fe..e1e658a 100644
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@@ -355,6 +355,7 @@
     "end2end/FirstIndexOffsetTests.cpp",
     "end2end/GpuMemorySynchronizationTests.cpp",
     "end2end/IndexFormatTests.cpp",
+    "end2end/MaxLimitTests.cpp",
     "end2end/MemoryAllocationStressTests.cpp",
     "end2end/MultisampledRenderingTests.cpp",
     "end2end/MultisampledSamplingTests.cpp",
diff --git a/src/tests/DawnTest.cpp b/src/tests/DawnTest.cpp
index 0dff126..cb59775 100644
--- a/src/tests/DawnTest.cpp
+++ b/src/tests/DawnTest.cpp
@@ -858,6 +858,10 @@
     return {};
 }
 
+wgpu::RequiredLimits DawnTestBase::GetRequiredLimits(const wgpu::SupportedLimits&) {
+    return {};
+}
+
 const wgpu::AdapterProperties& DawnTestBase::GetAdapterProperties() const {
     return mParam.adapterProperties;
 }
@@ -921,6 +925,11 @@
     deviceDescriptor.forceDisabledToggles = mParam.forceDisabledWorkarounds;
     deviceDescriptor.requiredFeatures = GetRequiredFeatures();
 
+    wgpu::SupportedLimits supportedLimits;
+    mBackendAdapter.GetLimits(reinterpret_cast<WGPUSupportedLimits*>(&supportedLimits));
+    wgpu::RequiredLimits requiredLimits = GetRequiredLimits(supportedLimits);
+    deviceDescriptor.requiredLimits = reinterpret_cast<WGPURequiredLimits*>(&requiredLimits);
+
     // Disabled disallowing unsafe APIs so we can test them.
     deviceDescriptor.forceDisabledToggles.push_back("disallow_unsafe_apis");
 
diff --git a/src/tests/DawnTest.h b/src/tests/DawnTest.h
index 2d96e1f..db13ff6 100644
--- a/src/tests/DawnTest.h
+++ b/src/tests/DawnTest.h
@@ -483,6 +483,8 @@
     // code path to handle the situation when not all features are supported.
     virtual std::vector<const char*> GetRequiredFeatures();
 
+    virtual wgpu::RequiredLimits GetRequiredLimits(const wgpu::SupportedLimits&);
+
     const wgpu::AdapterProperties& GetAdapterProperties() const;
 
     // TODO(crbug.com/dawn/689): Use limits returned from the wire
diff --git a/src/tests/end2end/MaxLimitTests.cpp b/src/tests/end2end/MaxLimitTests.cpp
new file mode 100644
index 0000000..a083cf0
--- /dev/null
+++ b/src/tests/end2end/MaxLimitTests.cpp
@@ -0,0 +1,226 @@
+// Copyright 2021 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tests/DawnTest.h"
+
+#include "common/Math.h"
+#include "utils/WGPUHelpers.h"
+
+class MaxLimitTests : public DawnTest {
+  public:
+    wgpu::RequiredLimits GetRequiredLimits(const wgpu::SupportedLimits& supported) override {
+        wgpu::RequiredLimits required = {};
+        required.limits = supported.limits;
+        return required;
+    }
+};
+
+// Test using the maximum amount of workgroup memory works
+TEST_P(MaxLimitTests, MaxComputeWorkgroupStorageSize) {
+    uint32_t maxComputeWorkgroupStorageSize =
+        GetSupportedLimits().limits.maxComputeWorkgroupStorageSize;
+
+    std::string shader = R"(
+        [[block]] struct Dst {
+            value0 : u32;
+            value1 : u32;
+        };
+
+        [[group(0), binding(0)]] var<storage, write> dst : Dst;
+
+        struct WGData {
+          value0 : u32;
+          // padding such that value0 and value1 are the first and last bytes of the memory.
+          [[size()" + std::to_string(maxComputeWorkgroupStorageSize / 4 - 2) +
+                         R"()]] padding : u32;
+          value1 : u32;
+        };
+        var<workgroup> wg_data : WGData;
+
+        [[stage(compute), workgroup_size(2,1,1)]]
+        fn main([[builtin(local_invocation_index)]] LocalInvocationIndex : u32) {
+            if (LocalInvocationIndex == 0u) {
+                // Put data into the first and last byte of workgroup memory.
+                wg_data.value0 = 79u;
+                wg_data.value1 = 42u;
+            }
+
+            workgroupBarrier();
+
+            if (LocalInvocationIndex == 1u) {
+                // Read data out of workgroup memory into a storage buffer.
+                dst.value0 = wg_data.value0;
+                dst.value1 = wg_data.value1;
+            }
+        }
+    )";
+    wgpu::ComputePipelineDescriptor csDesc;
+    csDesc.compute.module = utils::CreateShaderModule(device, shader.c_str());
+    csDesc.compute.entryPoint = "main";
+    wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);
+
+    // Set up dst storage buffer
+    wgpu::BufferDescriptor dstDesc;
+    dstDesc.size = 8;
+    dstDesc.usage =
+        wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
+    wgpu::Buffer dst = device.CreateBuffer(&dstDesc);
+
+    // Set up bind group and issue dispatch
+    wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                     {
+                                                         {0, dst},
+                                                     });
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+    pass.SetBindGroup(0, bindGroup);
+    pass.Dispatch(1);
+    pass.EndPass();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_EQ(79, dst, 0);
+    EXPECT_BUFFER_U32_EQ(42, dst, 4);
+}
+
+// Test using the maximum uniform/storage buffer binding size works
+TEST_P(MaxLimitTests, MaxBufferBindingSize) {
+    // The uniform buffer layout used in this test is not supported on ES.
+    DAWN_TEST_UNSUPPORTED_IF(IsOpenGLES());
+
+    for (wgpu::BufferUsage usage : {wgpu::BufferUsage::Storage, wgpu::BufferUsage::Uniform}) {
+        uint64_t maxBufferBindingSize;
+        std::string shader;
+        switch (usage) {
+            case wgpu::BufferUsage::Storage:
+                maxBufferBindingSize = GetSupportedLimits().limits.maxStorageBufferBindingSize;
+                // TODO(crbug.com/dawn/1160): Usually can't actually allocate a buffer this large
+                // because allocating the buffer for zero-initialization fails.
+                maxBufferBindingSize =
+                    std::min(maxBufferBindingSize, uint64_t(2) * 1024 * 1024 * 1024);
+                if (IsWARP()) {
+                    maxBufferBindingSize =
+                        std::min(maxBufferBindingSize, uint64_t(1) * 1024 * 1024 * 1024);
+                }
+                shader = R"(
+                  [[block]] struct Buf {
+                      value0 : u32;
+                      // padding such that value0 and value1 are the first and last bytes of the memory.
+                      [[size()" +
+                         std::to_string(maxBufferBindingSize - 8) + R"()]] padding : u32;
+                      value1 : u32;
+                  };
+
+                  [[block]] struct Result {
+                      value0 : u32;
+                      value1 : u32;
+                  };
+
+                  [[group(0), binding(0)]] var<storage, read> buf : Buf;
+                  [[group(0), binding(1)]] var<storage, write> result : Result;
+
+                  [[stage(compute), workgroup_size(1,1,1)]]
+                  fn main() {
+                      result.value0 = buf.value0;
+                      result.value1 = buf.value1;
+                  }
+              )";
+                break;
+            case wgpu::BufferUsage::Uniform:
+                maxBufferBindingSize = GetSupportedLimits().limits.maxUniformBufferBindingSize;
+                shader = R"(
+                  [[block]] struct Buf {
+                      value0 : u32;
+                      // padding such that value0 and value1 are the first and last bytes of the memory.
+                      [[size()" +
+                         std::to_string(maxBufferBindingSize - 8) + R"()]] padding : u32;
+                      value1 : u32;
+                  };
+
+                  [[block]] struct Result {
+                      value0 : u32;
+                      value1 : u32;
+                  };
+
+                  [[group(0), binding(0)]] var<uniform> buf : Buf;
+                  [[group(0), binding(1)]] var<storage, write> result : Result;
+
+                  [[stage(compute), workgroup_size(1,1,1)]]
+                  fn main() {
+                      result.value0 = buf.value0;
+                      result.value1 = buf.value1;
+                  }
+              )";
+                break;
+            default:
+                UNREACHABLE();
+        }
+
+        device.PushErrorScope(wgpu::ErrorFilter::OutOfMemory);
+
+        wgpu::BufferDescriptor bufDesc;
+        bufDesc.size = maxBufferBindingSize;
+        bufDesc.usage = usage | wgpu::BufferUsage::CopyDst;
+        wgpu::Buffer buffer = device.CreateBuffer(&bufDesc);
+
+        WGPUErrorType oomResult;
+        device.PopErrorScope([](WGPUErrorType type, const char*,
+                                void* userdata) { *static_cast<WGPUErrorType*>(userdata) = type; },
+                             &oomResult);
+        FlushWire();
+        // Max buffer size is smaller than the max buffer binding size.
+        DAWN_TEST_UNSUPPORTED_IF(oomResult == WGPUErrorType_OutOfMemory);
+
+        wgpu::BufferDescriptor resultBufDesc;
+        resultBufDesc.size = 8;
+        resultBufDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
+        wgpu::Buffer resultBuffer = device.CreateBuffer(&resultBufDesc);
+
+        uint32_t value0 = 89234;
+        queue.WriteBuffer(buffer, 0, &value0, sizeof(value0));
+
+        uint32_t value1 = 234;
+        uint64_t value1Offset = Align(maxBufferBindingSize - sizeof(value1), 4);
+        queue.WriteBuffer(buffer, value1Offset, &value1, sizeof(value1));
+
+        wgpu::ComputePipelineDescriptor csDesc;
+        csDesc.compute.module = utils::CreateShaderModule(device, shader.c_str());
+        csDesc.compute.entryPoint = "main";
+        wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);
+
+        wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                         {{0, buffer}, {1, resultBuffer}});
+
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroup);
+        pass.Dispatch(1);
+        pass.EndPass();
+        wgpu::CommandBuffer commands = encoder.Finish();
+        queue.Submit(1, &commands);
+
+        EXPECT_BUFFER_U32_EQ(value0, resultBuffer, 0);
+        EXPECT_BUFFER_U32_EQ(value1, resultBuffer, 4);
+    }
+}
+
+DAWN_INSTANTIATE_TEST(MaxLimitTests,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      OpenGLBackend(),
+                      OpenGLESBackend(),
+                      VulkanBackend());