Add per-stage and per-pipeline-layout limits and validation

This CL adds per-stage and per-pipeline-layout limits according
to the WebGPU spec. It also slightly increases kMaxBindingsPerGroup
from 16 to 24 so that these limits can be effectively tested
without hitting kMaxBindingsPerGroup. kMaxBindingsPerGroup is not a
real WebGPU limit and will be removed in future patches.

Bug: dawn:443
Change-Id: I72be062cd31dea4ebd851f2d9f8274a77f286846
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24481
Reviewed-by: Stephen White <senorblanco@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/common/Constants.h b/src/common/Constants.h
index ab37303..3d2c0da 100644
--- a/src/common/Constants.h
+++ b/src/common/Constants.h
@@ -19,7 +19,7 @@
 
 static constexpr uint32_t kMaxBindGroups = 4u;
 // TODO(cwallez@chromium.org): investigate bindgroup limits
-static constexpr uint32_t kMaxBindingsPerGroup = 16u;
+static constexpr uint32_t kMaxBindingsPerGroup = 24u;
 static constexpr uint32_t kMaxVertexAttributes = 16u;
 // Vulkan has a standalone limit named maxVertexInputAttributeOffset (2047u at least) for vertex
 // attribute offset. The limit might be meaningless because Vulkan has another limit named
@@ -35,15 +35,21 @@
 static constexpr uint32_t kTextureBytesPerRowAlignment = 256u;
 // Dynamic buffer offsets require offset to be divisible by 256
 static constexpr uint64_t kMinDynamicBufferOffsetAlignment = 256u;
-// Max numbers of dynamic uniform buffers
-static constexpr uint32_t kMaxDynamicUniformBufferCount = 8u;
-// Max numbers of dynamic storage buffers
-static constexpr uint32_t kMaxDynamicStorageBufferCount = 4u;
-// Max numbers of dynamic buffers
-static constexpr uint32_t kMaxDynamicBufferCount =
-    kMaxDynamicUniformBufferCount + kMaxDynamicStorageBufferCount;
+
+// Per stage limits
+static constexpr uint32_t kMaxSampledTexturesPerShaderStage = 16;
+static constexpr uint32_t kMaxSamplersPerShaderStage = 16;
+static constexpr uint32_t kMaxStorageBuffersPerShaderStage = 4;
+static constexpr uint32_t kMaxStorageTexturesPerShaderStage = 4;
+static constexpr uint32_t kMaxUniformBuffersPerShaderStage = 12;
+
+// Per pipeline layout limits
+static constexpr uint32_t kMaxDynamicUniformBuffersPerPipelineLayout = 8u;
+static constexpr uint32_t kMaxDynamicStorageBuffersPerPipelineLayout = 4u;
+
 // Max size of uniform buffer binding
 static constexpr uint64_t kMaxUniformBufferBindingSize = 16384u;
+
 // Indirect command sizes
 static constexpr uint64_t kDispatchIndirectSize = 3 * sizeof(uint32_t);
 static constexpr uint64_t kDrawIndirectSize = 4 * sizeof(uint32_t);
diff --git a/src/dawn_native/BUILD.gn b/src/dawn_native/BUILD.gn
index f6974c8..41f6d4e 100644
--- a/src/dawn_native/BUILD.gn
+++ b/src/dawn_native/BUILD.gn
@@ -163,6 +163,7 @@
     "BindGroupLayout.cpp",
     "BindGroupLayout.h",
     "BindGroupTracker.h",
+    "BindingInfo.cpp",
     "BindingInfo.h",
     "BuddyAllocator.cpp",
     "BuddyAllocator.h",
diff --git a/src/dawn_native/BindGroupLayout.cpp b/src/dawn_native/BindGroupLayout.cpp
index 54a8804..3476520 100644
--- a/src/dawn_native/BindGroupLayout.cpp
+++ b/src/dawn_native/BindGroupLayout.cpp
@@ -17,6 +17,7 @@
 #include "common/BitSetIterator.h"
 #include "common/HashUtils.h"
 #include "dawn_native/Device.h"
+#include "dawn_native/PerStage.h"
 #include "dawn_native/ValidationUtils_autogen.h"
 
 #include <algorithm>
@@ -194,8 +195,7 @@
         }
 
         std::set<BindingNumber> bindingsSet;
-        uint32_t dynamicUniformBufferCount = 0;
-        uint32_t dynamicStorageBufferCount = 0;
+        BindingCounts bindingCounts = {};
         for (uint32_t i = 0; i < descriptor->entryCount; ++i) {
             const BindGroupLayoutEntry& entry = descriptor->entries[i];
             BindingNumber bindingNumber = BindingNumber(entry.binding);
@@ -226,29 +226,35 @@
 
             switch (entry.type) {
                 case wgpu::BindingType::UniformBuffer:
-                    if (entry.hasDynamicOffset) {
-                        ++dynamicUniformBufferCount;
-                    }
-                    break;
                 case wgpu::BindingType::StorageBuffer:
                 case wgpu::BindingType::ReadonlyStorageBuffer:
-                    if (entry.hasDynamicOffset) {
-                        ++dynamicStorageBufferCount;
-                    }
                     break;
                 case wgpu::BindingType::SampledTexture:
+                    if (entry.hasDynamicOffset) {
+                        return DAWN_VALIDATION_ERROR("Sampled textures cannot be dynamic");
+                    }
+                    break;
                 case wgpu::BindingType::Sampler:
                 case wgpu::BindingType::ComparisonSampler:
+                    if (entry.hasDynamicOffset) {
+                        return DAWN_VALIDATION_ERROR("Samplers cannot be dynamic");
+                    }
+                    break;
                 case wgpu::BindingType::ReadonlyStorageTexture:
                 case wgpu::BindingType::WriteonlyStorageTexture:
                     if (entry.hasDynamicOffset) {
-                        return DAWN_VALIDATION_ERROR("Samplers and textures cannot be dynamic");
+                        return DAWN_VALIDATION_ERROR("Storage textures cannot be dynamic");
                     }
                     break;
                 case wgpu::BindingType::StorageTexture:
                     return DAWN_VALIDATION_ERROR("storage textures aren't supported (yet)");
+                default:
+                    UNREACHABLE();
+                    break;
             }
 
+            IncrementBindingCounts(&bindingCounts, entry);
+
             bindingsSet.insert(bindingNumber);
         }
 
@@ -256,15 +262,7 @@
             return DAWN_VALIDATION_ERROR("The number of bindings exceeds kMaxBindingsPerGroup.");
         }
 
-        if (dynamicUniformBufferCount > kMaxDynamicUniformBufferCount) {
-            return DAWN_VALIDATION_ERROR(
-                "The number of dynamic uniform buffer exceeds the maximum value");
-        }
-
-        if (dynamicStorageBufferCount > kMaxDynamicStorageBufferCount) {
-            return DAWN_VALIDATION_ERROR(
-                "The number of dynamic storage buffer exceeds the maximum value");
-        }
+        DAWN_TRY(ValidateBindingCounts(bindingCounts));
 
         return {};
     }
@@ -383,13 +381,13 @@
 
     BindGroupLayoutBase::BindGroupLayoutBase(DeviceBase* device,
                                              const BindGroupLayoutDescriptor* descriptor)
-        : CachedObject(device), mBindingCount(descriptor->entryCount) {
+        : CachedObject(device) {
         std::vector<BindGroupLayoutEntry> sortedBindings(
             descriptor->entries, descriptor->entries + descriptor->entryCount);
 
         std::sort(sortedBindings.begin(), sortedBindings.end(), SortBindingsCompare);
 
-        for (BindingIndex i{0}; i < mBindingCount; ++i) {
+        for (BindingIndex i{0}; i < BindingIndex(descriptor->entryCount); ++i) {
             const BindGroupLayoutEntry& binding = sortedBindings[static_cast<uint32_t>(i)];
             mBindingInfo[i].binding = BindingNumber(binding.binding);
             mBindingInfo[i].type = binding.type;
@@ -399,21 +397,6 @@
             mBindingInfo[i].storageTextureFormat = binding.storageTextureFormat;
             mBindingInfo[i].minBufferBindingSize = binding.minBufferBindingSize;
 
-            switch (binding.type) {
-                case wgpu::BindingType::UniformBuffer:
-                case wgpu::BindingType::StorageBuffer:
-                case wgpu::BindingType::ReadonlyStorageBuffer:
-                    // Buffers must be contiguously packed at the start of the binding info.
-                    ASSERT(mBufferCount == i);
-                    ++mBufferCount;
-                    if (binding.minBufferBindingSize == 0) {
-                        ++mUnverifiedBufferCount;
-                    }
-                    break;
-                default:
-                    break;
-            }
-
             if (binding.viewDimension == wgpu::TextureViewDimension::Undefined) {
                 mBindingInfo[i].viewDimension = wgpu::TextureViewDimension::e2D;
             } else {
@@ -422,30 +405,17 @@
 
             mBindingInfo[i].multisampled = binding.multisampled;
             mBindingInfo[i].hasDynamicOffset = binding.hasDynamicOffset;
-            if (binding.hasDynamicOffset) {
-                switch (binding.type) {
-                    case wgpu::BindingType::UniformBuffer:
-                        ++mDynamicUniformBufferCount;
-                        break;
-                    case wgpu::BindingType::StorageBuffer:
-                    case wgpu::BindingType::ReadonlyStorageBuffer:
-                        ++mDynamicStorageBufferCount;
-                        break;
-                    case wgpu::BindingType::SampledTexture:
-                    case wgpu::BindingType::Sampler:
-                    case wgpu::BindingType::ComparisonSampler:
-                    case wgpu::BindingType::StorageTexture:
-                    case wgpu::BindingType::ReadonlyStorageTexture:
-                    case wgpu::BindingType::WriteonlyStorageTexture:
-                        UNREACHABLE();
-                        break;
-                }
+
+            if (IsBufferBinding(binding.type)) {
+                // Buffers must be contiguously packed at the start of the binding info.
+                ASSERT(GetBufferCount() == i);
             }
+            IncrementBindingCounts(&mBindingCounts, binding);
 
             const auto& it = mBindingMap.emplace(BindingNumber(binding.binding), i);
             ASSERT(it.second);
         }
-        ASSERT(CheckBufferBindingsFirst({mBindingInfo.data(), mBindingCount}));
+        ASSERT(CheckBufferBindingsFirst({mBindingInfo.data(), GetBindingCount()}));
     }
 
     BindGroupLayoutBase::BindGroupLayoutBase(DeviceBase* device, ObjectBase::ErrorTag tag)
@@ -501,29 +471,26 @@
     }
 
     BindingIndex BindGroupLayoutBase::GetBindingCount() const {
-        return mBindingCount;
+        return BindingIndex(mBindingCounts.totalCount);
     }
 
     BindingIndex BindGroupLayoutBase::GetBufferCount() const {
-        return mBufferCount;
+        return BindingIndex(mBindingCounts.bufferCount);
     }
 
     BindingIndex BindGroupLayoutBase::GetDynamicBufferCount() const {
         // This is a binding index because dynamic buffers are packed at the front of the binding
         // info.
-        return static_cast<BindingIndex>(mDynamicStorageBufferCount + mDynamicUniformBufferCount);
-    }
-
-    uint32_t BindGroupLayoutBase::GetDynamicUniformBufferCount() const {
-        return mDynamicUniformBufferCount;
-    }
-
-    uint32_t BindGroupLayoutBase::GetDynamicStorageBufferCount() const {
-        return mDynamicStorageBufferCount;
+        return static_cast<BindingIndex>(mBindingCounts.dynamicStorageBufferCount +
+                                         mBindingCounts.dynamicUniformBufferCount);
     }
 
     uint32_t BindGroupLayoutBase::GetUnverifiedBufferCount() const {
-        return mUnverifiedBufferCount;
+        return mBindingCounts.unverifiedBufferCount;
+    }
+
+    const BindingCounts& BindGroupLayoutBase::GetBindingCountInfo() const {
+        return mBindingCounts;
     }
 
     size_t BindGroupLayoutBase::GetBindingDataSize() const {
@@ -532,31 +499,29 @@
         // Followed by:
         // |---------buffer size array--------|
         // |-uint64_t[mUnverifiedBufferCount]-|
-        size_t objectPointerStart = static_cast<uint32_t>(mBufferCount) * sizeof(BufferBindingData);
+        size_t objectPointerStart = mBindingCounts.bufferCount * sizeof(BufferBindingData);
         ASSERT(IsAligned(objectPointerStart, alignof(Ref<ObjectBase>)));
-        size_t bufferSizeArrayStart = Align(
-            objectPointerStart + static_cast<uint32_t>(mBindingCount) * sizeof(Ref<ObjectBase>),
-            sizeof(uint64_t));
+        size_t bufferSizeArrayStart =
+            Align(objectPointerStart + mBindingCounts.totalCount * sizeof(Ref<ObjectBase>),
+                  sizeof(uint64_t));
         ASSERT(IsAligned(bufferSizeArrayStart, alignof(uint64_t)));
-        return bufferSizeArrayStart + mUnverifiedBufferCount * sizeof(uint64_t);
+        return bufferSizeArrayStart + mBindingCounts.unverifiedBufferCount * sizeof(uint64_t);
     }
 
     BindGroupLayoutBase::BindingDataPointers BindGroupLayoutBase::ComputeBindingDataPointers(
         void* dataStart) const {
         BufferBindingData* bufferData = reinterpret_cast<BufferBindingData*>(dataStart);
-        auto bindings =
-            reinterpret_cast<Ref<ObjectBase>*>(bufferData + static_cast<uint32_t>(mBufferCount));
-        uint64_t* unverifiedBufferSizes =
-            AlignPtr(reinterpret_cast<uint64_t*>(bindings + static_cast<uint32_t>(mBindingCount)),
-                     sizeof(uint64_t));
+        auto bindings = reinterpret_cast<Ref<ObjectBase>*>(bufferData + mBindingCounts.bufferCount);
+        uint64_t* unverifiedBufferSizes = AlignPtr(
+            reinterpret_cast<uint64_t*>(bindings + mBindingCounts.totalCount), sizeof(uint64_t));
 
         ASSERT(IsPtrAligned(bufferData, alignof(BufferBindingData)));
         ASSERT(IsPtrAligned(bindings, alignof(Ref<ObjectBase>)));
         ASSERT(IsPtrAligned(unverifiedBufferSizes, alignof(uint64_t)));
 
-        return {{bufferData, mBufferCount},
-                {bindings, mBindingCount},
-                {unverifiedBufferSizes, mUnverifiedBufferCount}};
+        return {{bufferData, GetBufferCount()},
+                {bindings, GetBindingCount()},
+                {unverifiedBufferSizes, mBindingCounts.unverifiedBufferCount}};
     }
 
 }  // namespace dawn_native
diff --git a/src/dawn_native/BindGroupLayout.h b/src/dawn_native/BindGroupLayout.h
index 4c3c4c6..5e50e75 100644
--- a/src/dawn_native/BindGroupLayout.h
+++ b/src/dawn_native/BindGroupLayout.h
@@ -79,13 +79,16 @@
         };
 
         BindingIndex GetBindingCount() const;
+        // Returns |BindingIndex| because buffers are packed at the front.
         BindingIndex GetBufferCount() const;
         // Returns |BindingIndex| because dynamic buffers are packed at the front.
         BindingIndex GetDynamicBufferCount() const;
-        uint32_t GetDynamicUniformBufferCount() const;
-        uint32_t GetDynamicStorageBufferCount() const;
         uint32_t GetUnverifiedBufferCount() const;
 
+        // Used to get counts and validate them in pipeline layout creation. Other getters
+        // should be used to get typed integer counts.
+        const BindingCounts& GetBindingCountInfo() const;
+
         struct BufferBindingData {
             uint64_t offset;
             uint64_t size;
@@ -120,12 +123,7 @@
       private:
         BindGroupLayoutBase(DeviceBase* device, ObjectBase::ErrorTag tag);
 
-        BindingIndex mBindingCount;
-        BindingIndex mBufferCount{0};  // |BindingIndex| because buffers are packed at the front.
-        uint32_t mUnverifiedBufferCount = 0;  // Buffers with minimum buffer size unspecified
-        uint32_t mDynamicUniformBufferCount = 0;
-        uint32_t mDynamicStorageBufferCount = 0;
-
+        BindingCounts mBindingCounts = {};
         ityp::array<BindingIndex, BindingInfo, kMaxBindingsPerGroup> mBindingInfo;
 
         // Map from BindGroupLayoutEntry.binding to packed indices.
diff --git a/src/dawn_native/BindingInfo.cpp b/src/dawn_native/BindingInfo.cpp
new file mode 100644
index 0000000..6ade32b
--- /dev/null
+++ b/src/dawn_native/BindingInfo.cpp
@@ -0,0 +1,137 @@
+// Copyright 2020 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn_native/BindingInfo.h"
+
+namespace dawn_native {
+
+    void IncrementBindingCounts(BindingCounts* bindingCounts, const BindGroupLayoutEntry& entry) {
+        bindingCounts->totalCount += 1;
+
+        uint32_t PerStageBindingCounts::*perStageBindingCountMember = nullptr;
+        switch (entry.type) {
+            case wgpu::BindingType::UniformBuffer:
+                ++bindingCounts->bufferCount;
+                if (entry.hasDynamicOffset) {
+                    ++bindingCounts->dynamicUniformBufferCount;
+                }
+                if (entry.minBufferBindingSize == 0) {
+                    ++bindingCounts->unverifiedBufferCount;
+                }
+                perStageBindingCountMember = &PerStageBindingCounts::uniformBufferCount;
+                break;
+
+            case wgpu::BindingType::StorageBuffer:
+            case wgpu::BindingType::ReadonlyStorageBuffer:
+                ++bindingCounts->bufferCount;
+                if (entry.hasDynamicOffset) {
+                    ++bindingCounts->dynamicStorageBufferCount;
+                }
+                if (entry.minBufferBindingSize == 0) {
+                    ++bindingCounts->unverifiedBufferCount;
+                }
+                perStageBindingCountMember = &PerStageBindingCounts::storageBufferCount;
+                break;
+
+            case wgpu::BindingType::SampledTexture:
+                perStageBindingCountMember = &PerStageBindingCounts::sampledTextureCount;
+                break;
+
+            case wgpu::BindingType::Sampler:
+            case wgpu::BindingType::ComparisonSampler:
+                perStageBindingCountMember = &PerStageBindingCounts::samplerCount;
+                break;
+
+            case wgpu::BindingType::ReadonlyStorageTexture:
+            case wgpu::BindingType::WriteonlyStorageTexture:
+                perStageBindingCountMember = &PerStageBindingCounts::storageTextureCount;
+                break;
+
+            case wgpu::BindingType::StorageTexture:
+            default:
+                UNREACHABLE();
+                break;
+        }
+
+        ASSERT(perStageBindingCountMember != nullptr);
+        for (SingleShaderStage stage : IterateStages(entry.visibility)) {
+            ++(bindingCounts->perStage[stage].*perStageBindingCountMember);
+        }
+    }
+
+    void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& rhs) {
+        bindingCounts->totalCount += rhs.totalCount;
+        bindingCounts->bufferCount += rhs.bufferCount;
+        bindingCounts->unverifiedBufferCount += rhs.unverifiedBufferCount;
+        bindingCounts->dynamicUniformBufferCount += rhs.dynamicUniformBufferCount;
+        bindingCounts->dynamicStorageBufferCount += rhs.dynamicStorageBufferCount;
+
+        for (SingleShaderStage stage : IterateStages(kAllStages)) {
+            bindingCounts->perStage[stage].sampledTextureCount +=
+                rhs.perStage[stage].sampledTextureCount;
+            bindingCounts->perStage[stage].samplerCount += rhs.perStage[stage].samplerCount;
+            bindingCounts->perStage[stage].storageBufferCount +=
+                rhs.perStage[stage].storageBufferCount;
+            bindingCounts->perStage[stage].storageTextureCount +=
+                rhs.perStage[stage].storageTextureCount;
+            bindingCounts->perStage[stage].uniformBufferCount +=
+                rhs.perStage[stage].uniformBufferCount;
+        }
+    }
+
+    MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts) {
+        if (bindingCounts.dynamicUniformBufferCount > kMaxDynamicUniformBuffersPerPipelineLayout) {
+            return DAWN_VALIDATION_ERROR(
+                "The number of dynamic uniform buffers exceeds the maximum per-pipeline-layout "
+                "limit");
+        }
+
+        if (bindingCounts.dynamicStorageBufferCount > kMaxDynamicStorageBuffersPerPipelineLayout) {
+            return DAWN_VALIDATION_ERROR(
+                "The number of dynamic storage buffers exceeds the maximum per-pipeline-layout "
+                "limit");
+        }
+
+        for (SingleShaderStage stage : IterateStages(kAllStages)) {
+            if (bindingCounts.perStage[stage].sampledTextureCount >
+                kMaxSampledTexturesPerShaderStage) {
+                return DAWN_VALIDATION_ERROR(
+                    "The number of sampled textures exceeds the maximum "
+                    "per-stage limit.");
+            }
+            if (bindingCounts.perStage[stage].samplerCount > kMaxSamplersPerShaderStage) {
+                return DAWN_VALIDATION_ERROR(
+                    "The number of samplers exceeds the maximum per-stage limit.");
+            }
+            if (bindingCounts.perStage[stage].storageBufferCount >
+                kMaxStorageBuffersPerShaderStage) {
+                return DAWN_VALIDATION_ERROR(
+                    "The number of storage buffers exceeds the maximum per-stage limit.");
+            }
+            if (bindingCounts.perStage[stage].storageTextureCount >
+                kMaxStorageTexturesPerShaderStage) {
+                return DAWN_VALIDATION_ERROR(
+                    "The number of storage textures exceeds the maximum per-stage limit.");
+            }
+            if (bindingCounts.perStage[stage].uniformBufferCount >
+                kMaxUniformBuffersPerShaderStage) {
+                return DAWN_VALIDATION_ERROR(
+                    "The number of uniform buffers exceeds the maximum per-stage limit.");
+            }
+        }
+
+        return {};
+    }
+
+}  // namespace dawn_native
diff --git a/src/dawn_native/BindingInfo.h b/src/dawn_native/BindingInfo.h
index cac4f4c..f518ed8 100644
--- a/src/dawn_native/BindingInfo.h
+++ b/src/dawn_native/BindingInfo.h
@@ -18,7 +18,10 @@
 #include "common/Constants.h"
 #include "common/TypedInteger.h"
 #include "common/ityp_array.h"
+#include "dawn_native/Error.h"
 #include "dawn_native/Format.h"
+#include "dawn_native/PerStage.h"
+
 #include "dawn_native/dawn_platform.h"
 
 #include <cstdint>
@@ -48,6 +51,27 @@
         uint64_t minBufferBindingSize = 0;
     };
 
+    struct PerStageBindingCounts {
+        uint32_t sampledTextureCount;
+        uint32_t samplerCount;
+        uint32_t storageBufferCount;
+        uint32_t storageTextureCount;
+        uint32_t uniformBufferCount;
+    };
+
+    struct BindingCounts {
+        uint32_t totalCount;
+        uint32_t bufferCount;
+        uint32_t unverifiedBufferCount;  // Buffers with minimum buffer size unspecified
+        uint32_t dynamicUniformBufferCount;
+        uint32_t dynamicStorageBufferCount;
+        PerStage<PerStageBindingCounts> perStage;
+    };
+
+    void IncrementBindingCounts(BindingCounts* bindingCounts, const BindGroupLayoutEntry& entry);
+    void AccumulateBindingCounts(BindingCounts* bindingCounts, const BindingCounts& rhs);
+    MaybeError ValidateBindingCounts(const BindingCounts& bindingCounts);
+
     // For buffer size validation
     using RequiredBufferSizes = ityp::array<BindGroupIndex, std::vector<uint64_t>, kMaxBindGroups>;
 
diff --git a/src/dawn_native/CMakeLists.txt b/src/dawn_native/CMakeLists.txt
index cabbc1a..7abd5d9 100644
--- a/src/dawn_native/CMakeLists.txt
+++ b/src/dawn_native/CMakeLists.txt
@@ -35,6 +35,7 @@
     "BindGroupLayout.cpp"
     "BindGroupLayout.h"
     "BindGroupTracker.h"
+    "BindingInfo.cpp"
     "BindingInfo.h"
     "BuddyAllocator.cpp"
     "BuddyAllocator.h"
diff --git a/src/dawn_native/PipelineLayout.cpp b/src/dawn_native/PipelineLayout.cpp
index 0cccabf..5b47d9d 100644
--- a/src/dawn_native/PipelineLayout.cpp
+++ b/src/dawn_native/PipelineLayout.cpp
@@ -70,24 +70,14 @@
             return DAWN_VALIDATION_ERROR("too many bind group layouts");
         }
 
-        uint32_t totalDynamicUniformBufferCount = 0;
-        uint32_t totalDynamicStorageBufferCount = 0;
+        BindingCounts bindingCounts = {};
         for (uint32_t i = 0; i < descriptor->bindGroupLayoutCount; ++i) {
             DAWN_TRY(device->ValidateObject(descriptor->bindGroupLayouts[i]));
-            totalDynamicUniformBufferCount +=
-                descriptor->bindGroupLayouts[i]->GetDynamicUniformBufferCount();
-            totalDynamicStorageBufferCount +=
-                descriptor->bindGroupLayouts[i]->GetDynamicStorageBufferCount();
+            AccumulateBindingCounts(&bindingCounts,
+                                    descriptor->bindGroupLayouts[i]->GetBindingCountInfo());
         }
 
-        if (totalDynamicUniformBufferCount > kMaxDynamicUniformBufferCount) {
-            return DAWN_VALIDATION_ERROR("too many dynamic uniform buffers in pipeline layout");
-        }
-
-        if (totalDynamicStorageBufferCount > kMaxDynamicStorageBufferCount) {
-            return DAWN_VALIDATION_ERROR("too many dynamic storage buffers in pipeline layout");
-        }
-
+        DAWN_TRY(ValidateBindingCounts(bindingCounts));
         return {};
     }
 
@@ -140,6 +130,7 @@
         // A counter of how many bindings we've populated in |entryData|
         ityp::array<BindGroupIndex, BindingIndex, kMaxBindGroups> entryCounts = {};
 
+        BindingCounts bindingCounts = {};
         BindGroupIndex bindGroupLayoutCount(0);
         for (uint32_t moduleIndex = 0; moduleIndex < count; ++moduleIndex) {
             const ShaderModuleBase* module = modules[moduleIndex];
@@ -201,6 +192,7 @@
                         }
                     }
 
+                    IncrementBindingCounts(&bindingCounts, bindingSlot);
                     BindingIndex currentBindingCount = entryCounts[group];
                     entryData[group][currentBindingCount] = bindingSlot;
 
@@ -214,6 +206,8 @@
             }
         }
 
+        DAWN_TRY(ValidateBindingCounts(bindingCounts));
+
         ityp::array<BindGroupIndex, BindGroupLayoutBase*, kMaxBindGroups> bindGroupLayouts = {};
         for (BindGroupIndex group(0); group < bindGroupLayoutCount; ++group) {
             BindGroupLayoutDescriptor desc = {};
diff --git a/src/tests/end2end/PipelineLayoutTests.cpp b/src/tests/end2end/PipelineLayoutTests.cpp
index a4a02e9..5c8eeeb 100644
--- a/src/tests/end2end/PipelineLayoutTests.cpp
+++ b/src/tests/end2end/PipelineLayoutTests.cpp
@@ -28,7 +28,7 @@
     wgpu::BindGroupLayout bglA;
     {
         std::vector<wgpu::BindGroupLayoutEntry> entries;
-        for (uint32_t i = 0; i < kMaxDynamicStorageBufferCount; i++) {
+        for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; i++) {
             entries.push_back(
                 {i, wgpu::ShaderStage::Compute, wgpu::BindingType::StorageBuffer, true});
         }
@@ -40,10 +40,11 @@
     }
 
     // Create the second bind group layout that has one non-dynamic buffer binding.
+    // It is in the fragment stage to avoid the max per-stage storage buffer limit.
     wgpu::BindGroupLayout bglB;
     {
         wgpu::BindGroupLayoutDescriptor descriptor;
-        wgpu::BindGroupLayoutEntry entry = {0, wgpu::ShaderStage::Compute,
+        wgpu::BindGroupLayoutEntry entry = {0, wgpu::ShaderStage::Fragment,
                                             wgpu::BindingType::StorageBuffer, false};
         descriptor.entryCount = 1;
         descriptor.entries = &entry;
diff --git a/src/tests/unittests/validation/BindGroupValidationTests.cpp b/src/tests/unittests/validation/BindGroupValidationTests.cpp
index 7187be3..577bd67 100644
--- a/src/tests/unittests/validation/BindGroupValidationTests.cpp
+++ b/src/tests/unittests/validation/BindGroupValidationTests.cpp
@@ -485,6 +485,13 @@
 
 class BindGroupLayoutValidationTest : public ValidationTest {
   public:
+    wgpu::BindGroupLayout MakeBindGroupLayout(wgpu::BindGroupLayoutEntry* binding, uint32_t count) {
+        wgpu::BindGroupLayoutDescriptor descriptor;
+        descriptor.entryCount = count;
+        descriptor.entries = binding;
+        return device.CreateBindGroupLayout(&descriptor);
+    }
+
     void TestCreateBindGroupLayout(wgpu::BindGroupLayoutEntry* binding,
                                    uint32_t count,
                                    bool expected) {
@@ -545,8 +552,14 @@
 TEST_F(BindGroupLayoutValidationTest, BindGroupLayoutMaxBindings) {
     wgpu::BindGroupLayoutEntry entries[kMaxBindingsPerGroup + 1];
 
+    wgpu::BindingType bindingsTypes[3] = {wgpu::BindingType::UniformBuffer,
+                                          wgpu::BindingType::SampledTexture,
+                                          wgpu::BindingType::Sampler};
     for (uint32_t i = 0; i < kMaxBindingsPerGroup + 1; i++) {
-        entries[i].type = wgpu::BindingType::UniformBuffer;
+        // Alternate between uniform/sampled tex/sampler to avoid per-stage limits.
+        // Note: This is a temporary test and will be removed once the kMaxBindingsPerGroup
+        // limit is lifted.
+        entries[i].type = bindingsTypes[i % 3];
         entries[i].binding = i;
         entries[i].visibility = wgpu::ShaderStage::Compute;
     }
@@ -634,6 +647,96 @@
     ASSERT_DEVICE_ERROR(utils::MakeBindGroup(device, bgl, {{0, buffer}}));
 }
 
+TEST_F(BindGroupLayoutValidationTest, PerStageLimits) {
+    struct TestInfo {
+        uint32_t maxCount;
+        wgpu::BindingType bindingType;
+        wgpu::BindingType otherBindingType;
+    };
+
+    constexpr TestInfo kTestInfos[] = {
+        {kMaxSampledTexturesPerShaderStage, wgpu::BindingType::SampledTexture,
+         wgpu::BindingType::UniformBuffer},
+        {kMaxSamplersPerShaderStage, wgpu::BindingType::Sampler, wgpu::BindingType::UniformBuffer},
+        {kMaxSamplersPerShaderStage, wgpu::BindingType::ComparisonSampler,
+         wgpu::BindingType::UniformBuffer},
+        {kMaxStorageBuffersPerShaderStage, wgpu::BindingType::StorageBuffer,
+         wgpu::BindingType::UniformBuffer},
+        {kMaxStorageTexturesPerShaderStage, wgpu::BindingType::ReadonlyStorageTexture,
+         wgpu::BindingType::UniformBuffer},
+        {kMaxStorageTexturesPerShaderStage, wgpu::BindingType::WriteonlyStorageTexture,
+         wgpu::BindingType::UniformBuffer},
+        {kMaxUniformBuffersPerShaderStage, wgpu::BindingType::UniformBuffer,
+         wgpu::BindingType::SampledTexture},
+    };
+
+    for (TestInfo info : kTestInfos) {
+        wgpu::BindGroupLayout bgl[2];
+        std::vector<wgpu::BindGroupLayoutEntry> maxBindings;
+
+        auto PopulateEntry = [](wgpu::BindGroupLayoutEntry entry) {
+            switch (entry.type) {
+                case wgpu::BindingType::ReadonlyStorageTexture:
+                case wgpu::BindingType::WriteonlyStorageTexture:
+                    entry.storageTextureFormat = wgpu::TextureFormat::RGBA8Unorm;
+                    break;
+                default:
+                    break;
+            }
+            return entry;
+        };
+
+        for (uint32_t i = 0; i < info.maxCount; ++i) {
+            maxBindings.push_back(PopulateEntry({i, wgpu::ShaderStage::Compute, info.bindingType}));
+        }
+
+        // Creating with the maxes works.
+        bgl[0] = MakeBindGroupLayout(maxBindings.data(), maxBindings.size());
+
+        // Adding an extra binding of a different type works.
+        {
+            std::vector<wgpu::BindGroupLayoutEntry> bindings = maxBindings;
+            bindings.push_back(
+                PopulateEntry({info.maxCount, wgpu::ShaderStage::Compute, info.otherBindingType}));
+            MakeBindGroupLayout(bindings.data(), bindings.size());
+        }
+
+        // Adding an extra binding of the maxed type in a different stage works
+        {
+            std::vector<wgpu::BindGroupLayoutEntry> bindings = maxBindings;
+            bindings.push_back(
+                PopulateEntry({info.maxCount, wgpu::ShaderStage::Fragment, info.bindingType}));
+            MakeBindGroupLayout(bindings.data(), bindings.size());
+        }
+
+        // Adding an extra binding of the maxed type and stage exceeds the per stage limit.
+        {
+            std::vector<wgpu::BindGroupLayoutEntry> bindings = maxBindings;
+            bindings.push_back(
+                PopulateEntry({info.maxCount, wgpu::ShaderStage::Compute, info.bindingType}));
+            ASSERT_DEVICE_ERROR(MakeBindGroupLayout(bindings.data(), bindings.size()));
+        }
+
+        // Creating a pipeline layout from the valid BGL works.
+        TestCreatePipelineLayout(bgl, 1, true);
+
+        // Adding an extra binding of a different type in a different BGL works
+        bgl[1] = utils::MakeBindGroupLayout(
+            device, {PopulateEntry({0, wgpu::ShaderStage::Compute, info.otherBindingType})});
+        TestCreatePipelineLayout(bgl, 2, true);
+
+        // Adding an extra binding of the maxed type in a different stage works
+        bgl[1] = utils::MakeBindGroupLayout(
+            device, {PopulateEntry({0, wgpu::ShaderStage::Fragment, info.bindingType})});
+        TestCreatePipelineLayout(bgl, 2, true);
+
+        // Adding an extra binding of the maxed type in a different BGL exceeds the per stage limit.
+        bgl[1] = utils::MakeBindGroupLayout(
+            device, {PopulateEntry({0, wgpu::ShaderStage::Compute, info.bindingType})});
+        TestCreatePipelineLayout(bgl, 2, false);
+    }
+}
+
 // Check that dynamic buffer numbers exceed maximum value in one bind group layout.
 TEST_F(BindGroupLayoutValidationTest, DynamicBufferNumberLimit) {
     wgpu::BindGroupLayout bgl[2];
@@ -641,49 +744,49 @@
     std::vector<wgpu::BindGroupLayoutEntry> maxStorageDB;
     std::vector<wgpu::BindGroupLayoutEntry> maxReadonlyStorageDB;
 
-    for (uint32_t i = 0; i < kMaxDynamicUniformBufferCount; ++i) {
+    // In this test, we use all the same shader stage. Ensure that this does not exceed the
+    // per-stage limit.
+    static_assert(kMaxDynamicUniformBuffersPerPipelineLayout <= kMaxUniformBuffersPerShaderStage,
+                  "");
+    static_assert(kMaxDynamicStorageBuffersPerPipelineLayout <= kMaxStorageBuffersPerShaderStage,
+                  "");
+
+    for (uint32_t i = 0; i < kMaxDynamicUniformBuffersPerPipelineLayout; ++i) {
         maxUniformDB.push_back(
             {i, wgpu::ShaderStage::Compute, wgpu::BindingType::UniformBuffer, true});
     }
 
-    for (uint32_t i = 0; i < kMaxDynamicStorageBufferCount; ++i) {
+    for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
         maxStorageDB.push_back(
             {i, wgpu::ShaderStage::Compute, wgpu::BindingType::StorageBuffer, true});
     }
 
-    for (uint32_t i = 0; i < kMaxDynamicStorageBufferCount; ++i) {
+    for (uint32_t i = 0; i < kMaxDynamicStorageBuffersPerPipelineLayout; ++i) {
         maxReadonlyStorageDB.push_back(
             {i, wgpu::ShaderStage::Compute, wgpu::BindingType::ReadonlyStorageBuffer, true});
     }
 
-    auto MakeBindGroupLayout = [&](wgpu::BindGroupLayoutEntry* binding,
-                                   uint32_t count) -> wgpu::BindGroupLayout {
-        wgpu::BindGroupLayoutDescriptor descriptor;
-        descriptor.entryCount = count;
-        descriptor.entries = binding;
-        return device.CreateBindGroupLayout(&descriptor);
-    };
-
+    // Test creating with the maxes works
     {
         bgl[0] = MakeBindGroupLayout(maxUniformDB.data(), maxUniformDB.size());
-        bgl[1] = MakeBindGroupLayout(maxStorageDB.data(), maxStorageDB.size());
+        TestCreatePipelineLayout(bgl, 1, true);
 
-        TestCreatePipelineLayout(bgl, 2, true);
+        bgl[0] = MakeBindGroupLayout(maxStorageDB.data(), maxStorageDB.size());
+        TestCreatePipelineLayout(bgl, 1, true);
+
+        bgl[0] = MakeBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size());
+        TestCreatePipelineLayout(bgl, 1, true);
     }
 
-    {
-        bgl[0] = MakeBindGroupLayout(maxUniformDB.data(), maxUniformDB.size());
-        bgl[1] = MakeBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size());
-
-        TestCreatePipelineLayout(bgl, 2, true);
-    }
+    // The following tests exceed the per-pipeline layout limits. We use the Fragment stage to
+    // ensure we don't hit the per-stage limit.
 
     // Check dynamic uniform buffers exceed maximum in pipeline layout.
     {
         bgl[0] = MakeBindGroupLayout(maxUniformDB.data(), maxUniformDB.size());
         bgl[1] = utils::MakeBindGroupLayout(
             device, {
-                        {0, wgpu::ShaderStage::Compute, wgpu::BindingType::UniformBuffer, true},
+                        {0, wgpu::ShaderStage::Fragment, wgpu::BindingType::UniformBuffer, true},
                     });
 
         TestCreatePipelineLayout(bgl, 2, false);
@@ -694,7 +797,7 @@
         bgl[0] = MakeBindGroupLayout(maxStorageDB.data(), maxStorageDB.size());
         bgl[1] = utils::MakeBindGroupLayout(
             device, {
-                        {0, wgpu::ShaderStage::Compute, wgpu::BindingType::StorageBuffer, true},
+                        {0, wgpu::ShaderStage::Fragment, wgpu::BindingType::StorageBuffer, true},
                     });
 
         TestCreatePipelineLayout(bgl, 2, false);
@@ -706,7 +809,7 @@
         bgl[1] = utils::MakeBindGroupLayout(
             device,
             {
-                {0, wgpu::ShaderStage::Compute, wgpu::BindingType::ReadonlyStorageBuffer, true},
+                {0, wgpu::ShaderStage::Fragment, wgpu::BindingType::ReadonlyStorageBuffer, true},
             });
 
         TestCreatePipelineLayout(bgl, 2, false);
@@ -719,7 +822,7 @@
         bgl[1] = utils::MakeBindGroupLayout(
             device,
             {
-                {0, wgpu::ShaderStage::Compute, wgpu::BindingType::ReadonlyStorageBuffer, true},
+                {0, wgpu::ShaderStage::Fragment, wgpu::BindingType::ReadonlyStorageBuffer, true},
             });
 
         TestCreatePipelineLayout(bgl, 2, false);
@@ -727,21 +830,24 @@
 
     // Check dynamic uniform buffers exceed maximum in bind group layout.
     {
-        maxUniformDB.push_back({kMaxDynamicUniformBufferCount, wgpu::ShaderStage::Compute,
-                                wgpu::BindingType::UniformBuffer, true});
+        maxUniformDB.push_back({kMaxDynamicUniformBuffersPerPipelineLayout,
+                                wgpu::ShaderStage::Fragment, wgpu::BindingType::UniformBuffer,
+                                true});
         TestCreateBindGroupLayout(maxUniformDB.data(), maxUniformDB.size(), false);
     }
 
     // Check dynamic storage buffers exceed maximum in bind group layout.
     {
-        maxStorageDB.push_back({kMaxDynamicStorageBufferCount, wgpu::ShaderStage::Compute,
-                                wgpu::BindingType::StorageBuffer, true});
+        maxStorageDB.push_back({kMaxDynamicStorageBuffersPerPipelineLayout,
+                                wgpu::ShaderStage::Fragment, wgpu::BindingType::StorageBuffer,
+                                true});
         TestCreateBindGroupLayout(maxStorageDB.data(), maxStorageDB.size(), false);
     }
 
     // Check dynamic readonly storage buffers exceed maximum in bind group layout.
     {
-        maxReadonlyStorageDB.push_back({kMaxDynamicStorageBufferCount, wgpu::ShaderStage::Compute,
+        maxReadonlyStorageDB.push_back({kMaxDynamicStorageBuffersPerPipelineLayout,
+                                        wgpu::ShaderStage::Fragment,
                                         wgpu::BindingType::ReadonlyStorageBuffer, true});
         TestCreateBindGroupLayout(maxReadonlyStorageDB.data(), maxReadonlyStorageDB.size(), false);
     }
diff --git a/src/tests/unittests/validation/MinimumBufferSizeValidationTests.cpp b/src/tests/unittests/validation/MinimumBufferSizeValidationTests.cpp
index 9b61e57..b3ba571 100644
--- a/src/tests/unittests/validation/MinimumBufferSizeValidationTests.cpp
+++ b/src/tests/unittests/validation/MinimumBufferSizeValidationTests.cpp
@@ -521,20 +521,18 @@
 
 // Various bindings in std140 have correct minimum size reflection
 TEST_F(MinBufferSizeDefaultLayoutTests, std140Inferred) {
-    CheckShaderBindingSizeReflection("std140", {{{0, 0, "float a", 4},
-                                                 {0, 1, "float b[]", 16},
-                                                 {0, 2, "mat2 c", 32},
-                                                 {0, 3, "int d; float e[]", 32},
+    CheckShaderBindingSizeReflection(
+        "std140", {{{0, 0, "float a", 4}, {0, 1, "float b[]", 16}, {0, 2, "mat2 c", 32}}});
+    CheckShaderBindingSizeReflection("std140", {{{0, 3, "int d; float e[]", 32},
                                                  {0, 4, "ThreeFloats f", 12},
                                                  {0, 5, "ThreeFloats g[]", 16}}});
 }
 
 // Various bindings in std430 have correct minimum size reflection
 TEST_F(MinBufferSizeDefaultLayoutTests, std430Inferred) {
-    CheckShaderBindingSizeReflection("std430", {{{0, 0, "float a", 4},
-                                                 {0, 1, "float b[]", 4},
-                                                 {0, 2, "mat2 c", 16},
-                                                 {0, 3, "int d; float e[]", 8},
+    CheckShaderBindingSizeReflection(
+        "std430", {{{0, 0, "float a", 4}, {0, 1, "float b[]", 4}, {0, 2, "mat2 c", 16}}});
+    CheckShaderBindingSizeReflection("std430", {{{0, 3, "int d; float e[]", 8},
                                                  {0, 4, "ThreeFloats f", 12},
                                                  {0, 5, "ThreeFloats g[]", 12}}});
 }
@@ -558,18 +556,20 @@
 TEST_F(MinBufferSizeDefaultLayoutTests, std140MultipleBindGroups) {
     CheckShaderBindingSizeReflection("std140",
                                      {{{0, 0, "float a", 4}, {0, 1, "float b[]", 16}},
-                                      {{1, 2, "mat2 c", 32}, {1, 3, "int d; float e[]", 32}},
-                                      {{2, 4, "ThreeFloats f", 12}},
-                                      {{3, 5, "ThreeFloats g[]", 16}}});
+                                      {{1, 2, "mat2 c", 32}, {1, 3, "int d; float e[]", 32}}});
+    CheckShaderBindingSizeReflection(
+        "std140", {{{0, 4, "ThreeFloats f", 12}, {0, 1, "float b[]", 16}},
+                   {{1, 5, "ThreeFloats g[]", 16}, {1, 3, "int d; float e[]", 32}}});
 }
 
 // Various bindings have correct size across multiple groups
 TEST_F(MinBufferSizeDefaultLayoutTests, std430MultipleBindGroups) {
     CheckShaderBindingSizeReflection("std430",
                                      {{{0, 0, "float a", 4}, {0, 1, "float b[]", 4}},
-                                      {{1, 2, "mat2 c", 16}, {1, 3, "int d; float e[]", 8}},
-                                      {{2, 4, "ThreeFloats f", 12}},
-                                      {{3, 5, "ThreeFloats g[]", 12}}});
+                                      {{1, 2, "mat2 c", 16}, {1, 3, "int d; float e[]", 8}}});
+    CheckShaderBindingSizeReflection(
+        "std430", {{{0, 4, "ThreeFloats f", 12}, {0, 1, "float b[]", 4}},
+                   {{1, 5, "ThreeFloats g[]", 12}, {1, 3, "int d; float e[]", 8}}});
 }
 
 // Minimum size should be the max requirement of both vertex and fragment stages