[dawn][vk] Freshen up comments in vulkan::{BGL, DescriptorSetAllocator}

These were outdated, had TODOs with too few details and associated with
people instead of issues, etc.

Also makes `DescriptorSetAllocator` a `RefCounted` as it doesn't need to
be `ObjectBase` with the error monad mechanism.

Also pass in the VkDescriptorSetLayout instead of the BindGroupLayout to
DescriptorSetAllocator::Allocate to make it more clear exactly what will
be used.

Bug: 439522242
Change-Id: Iac766c4f0368dadab4ee7db67deda63b1023fbca
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/257754
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
Reviewed-by: Brandon Jones <bajones@chromium.org>
diff --git a/src/dawn/native/vulkan/BindGroupLayoutVk.cpp b/src/dawn/native/vulkan/BindGroupLayoutVk.cpp
index b05c9f6..3d5ce2e 100644
--- a/src/dawn/native/vulkan/BindGroupLayoutVk.cpp
+++ b/src/dawn/native/vulkan/BindGroupLayoutVk.cpp
@@ -228,8 +228,6 @@
         descriptorCountPerType[vulkanType] += numVkDescriptors;
     }
 
-    // TODO(enga): Consider deduping allocators for layouts with the same descriptor type
-    // counts.
     mDescriptorSetAllocator =
         DescriptorSetAllocator::Create(device, std::move(descriptorCountPerType));
 
@@ -252,10 +250,6 @@
 
     // DescriptorSetLayout aren't used by execution on the GPU and can be deleted at any time,
     // so we can destroy mHandle immediately instead of using the FencedDeleter.
-    // (Swiftshader implements this wrong b/154522740).
-    // In practice, the GPU is done with all descriptor sets because bind group deallocation
-    // refs the bind group layout so that once the bind group is finished being used, we can
-    // recycle its descriptor set.
     if (mHandle != VK_NULL_HANDLE) {
         device->fn.DestroyDescriptorSetLayout(device->GetVkDevice(), mHandle, nullptr);
         mHandle = VK_NULL_HANDLE;
@@ -271,7 +265,7 @@
     Device* device,
     const UnpackedPtr<BindGroupDescriptor>& descriptor) {
     DescriptorSetAllocation descriptorSetAllocation;
-    DAWN_TRY_ASSIGN(descriptorSetAllocation, mDescriptorSetAllocator->Allocate(this));
+    DAWN_TRY_ASSIGN(descriptorSetAllocation, mDescriptorSetAllocator->Allocate(mHandle));
 
     return AcquireRef(mBindGroupAllocator->Allocate(device, descriptor, descriptorSetAllocation));
 }
diff --git a/src/dawn/native/vulkan/BindGroupLayoutVk.h b/src/dawn/native/vulkan/BindGroupLayoutVk.h
index 7d69e1d..bbedb85 100644
--- a/src/dawn/native/vulkan/BindGroupLayoutVk.h
+++ b/src/dawn/native/vulkan/BindGroupLayoutVk.h
@@ -48,18 +48,9 @@
 
 VkDescriptorType VulkanDescriptorType(const BindingInfo& bindingInfo);
 
-// In Vulkan descriptor pools have to be sized to an exact number of descriptors. This means
-// it's hard to have something where we can mix different types of descriptor sets because
-// we don't know if their vector of number of descriptors will be similar.
-//
-// That's why that in addition to containing the VkDescriptorSetLayout to create
-// VkDescriptorSets for its bindgroups, the layout also acts as an allocator for the descriptor
-// sets.
-//
-// The allocations is done with one pool per descriptor set, which is inefficient, but at least
-// the pools are reused when no longer used. Minimizing the number of descriptor pool allocation
-// is important because creating them can incur GPU memory allocation which is usually an
-// expensive syscall.
+// Backend BindGroupLayout implementation for Vulkan. In addition to containing a BindGroupAllocator
+// for the CPU-side tracking data, it has a DescriptorSetAllocator that handles efficient allocation
+// of the corresponding VkDescriptorSets.
 class BindGroupLayout final : public BindGroupLayoutInternalBase {
   public:
     static ResultOrError<Ref<BindGroupLayout>> Create(
diff --git a/src/dawn/native/vulkan/DescriptorSetAllocator.cpp b/src/dawn/native/vulkan/DescriptorSetAllocator.cpp
index 6b70b4f..520427d 100644
--- a/src/dawn/native/vulkan/DescriptorSetAllocator.cpp
+++ b/src/dawn/native/vulkan/DescriptorSetAllocator.cpp
@@ -30,27 +30,27 @@
 #include <utility>
 
 #include "dawn/native/Queue.h"
-#include "dawn/native/vulkan/BindGroupLayoutVk.h"
 #include "dawn/native/vulkan/DeviceVk.h"
 #include "dawn/native/vulkan/FencedDeleter.h"
 #include "dawn/native/vulkan/VulkanError.h"
 
 namespace dawn::native::vulkan {
 
-// TODO(enga): Figure out this value.
+// TODO(https://crbug.com/439522242): Consider adding some better heuristic, like an exponential
+// increase up to a larger constant.
 static constexpr uint32_t kMaxDescriptorsPerPool = 512;
 
 // static
 Ref<DescriptorSetAllocator> DescriptorSetAllocator::Create(
-    DeviceBase* device,
+    Device* device,
     absl::flat_hash_map<VkDescriptorType, uint32_t> descriptorCountPerType) {
     return AcquireRef(new DescriptorSetAllocator(device, descriptorCountPerType));
 }
 
 DescriptorSetAllocator::DescriptorSetAllocator(
-    DeviceBase* device,
+    Device* device,
     absl::flat_hash_map<VkDescriptorType, uint32_t> descriptorCountPerType)
-    : ObjectBase(device) {
+    : mDevice(device) {
     // Compute the total number of descriptors for this layout.
     uint32_t totalDescriptorCount = 0;
     mPoolSizes.reserve(descriptorCountPerType.size());
@@ -87,17 +87,17 @@
     for (auto& pool : mDescriptorPools) {
         DAWN_ASSERT(pool.freeSetIndices.size() == mMaxSets);
         if (pool.vkPool != VK_NULL_HANDLE) {
-            Device* device = ToBackend(GetDevice());
-            device->GetFencedDeleter()->DeleteWhenUnused(pool.vkPool);
+            mDevice->GetFencedDeleter()->DeleteWhenUnused(pool.vkPool);
         }
     }
 }
 
-ResultOrError<DescriptorSetAllocation> DescriptorSetAllocator::Allocate(BindGroupLayout* layout) {
+ResultOrError<DescriptorSetAllocation> DescriptorSetAllocator::Allocate(
+    VkDescriptorSetLayout dsLayout) {
     Mutex::AutoLock lock(&mMutex);
 
     if (mAvailableDescriptorPoolIndices.empty()) {
-        DAWN_TRY(AllocateDescriptorPool(layout));
+        DAWN_TRY(AllocateDescriptorPool(dsLayout));
     }
 
     DAWN_ASSERT(!mAvailableDescriptorPoolIndices.empty());
@@ -119,7 +119,6 @@
 
 void DescriptorSetAllocator::Deallocate(DescriptorSetAllocation* allocationInfo) {
     bool enqueueDeferredDeallocation = false;
-    Device* device = ToBackend(GetDevice());
 
     {
         Mutex::AutoLock lock(&mMutex);
@@ -129,8 +128,10 @@
 
         // We can't reuse the descriptor set right away because the Vulkan spec says in the
         // documentation for vkCmdBindDescriptorSets that the set may be consumed any time between
-        // host execution of the command and the end of the draw/dispatch.
-        const ExecutionSerial serial = device->GetQueue()->GetPendingCommandSerial();
+        // host execution (on "bindful" GPU that inline the descriptors in the command stream) of
+        // the command and the end of the draw/dispatch (for "bindless" GPUs where shaders read
+        // directly from the VkDescriptorPool).
+        const ExecutionSerial serial = mDevice->GetQueue()->GetPendingCommandSerial();
         mPendingDeallocations.Enqueue({allocationInfo->poolIndex, allocationInfo->setIndex},
                                       serial);
 
@@ -146,7 +147,7 @@
     if (enqueueDeferredDeallocation) {
         // Release lock before calling EnqueueDeferredDeallocation() to avoid lock acquisition
         // order inversion with lock used there.
-        device->EnqueueDeferredDeallocation(this);
+        mDevice->EnqueueDeferredDeallocation(this);
     }
 }
 
@@ -165,7 +166,7 @@
     mPendingDeallocations.ClearUpTo(completedSerial);
 }
 
-MaybeError DescriptorSetAllocator::AllocateDescriptorPool(BindGroupLayout* layout) {
+MaybeError DescriptorSetAllocator::AllocateDescriptorPool(VkDescriptorSetLayout dsLayout) {
     VkDescriptorPoolCreateInfo createInfo;
     createInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
     createInfo.pNext = nullptr;
@@ -174,14 +175,12 @@
     createInfo.poolSizeCount = static_cast<PoolIndex>(mPoolSizes.size());
     createInfo.pPoolSizes = mPoolSizes.data();
 
-    Device* device = ToBackend(GetDevice());
-
     VkDescriptorPool descriptorPool;
-    DAWN_TRY(CheckVkSuccess(device->fn.CreateDescriptorPool(device->GetVkDevice(), &createInfo,
-                                                            nullptr, &*descriptorPool),
+    DAWN_TRY(CheckVkSuccess(mDevice->fn.CreateDescriptorPool(mDevice->GetVkDevice(), &createInfo,
+                                                             nullptr, &*descriptorPool),
                             "CreateDescriptorPool"));
 
-    std::vector<VkDescriptorSetLayout> layouts(mMaxSets, layout->GetHandle());
+    std::vector<VkDescriptorSetLayout> layouts(mMaxSets, dsLayout);
 
     VkDescriptorSetAllocateInfo allocateInfo;
     allocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
@@ -192,12 +191,12 @@
 
     std::vector<VkDescriptorSet> sets(mMaxSets);
     MaybeError result =
-        CheckVkSuccess(device->fn.AllocateDescriptorSets(device->GetVkDevice(), &allocateInfo,
-                                                         AsVkArray(sets.data())),
+        CheckVkSuccess(mDevice->fn.AllocateDescriptorSets(mDevice->GetVkDevice(), &allocateInfo,
+                                                          AsVkArray(sets.data())),
                        "AllocateDescriptorSets");
     if (result.IsError()) {
         // On an error we can destroy the pool immediately because no command references it.
-        device->fn.DestroyDescriptorPool(device->GetVkDevice(), descriptorPool, nullptr);
+        mDevice->fn.DestroyDescriptorPool(mDevice->GetVkDevice(), descriptorPool, nullptr);
         DAWN_TRY(std::move(result));
     }
 
diff --git a/src/dawn/native/vulkan/DescriptorSetAllocator.h b/src/dawn/native/vulkan/DescriptorSetAllocator.h
index c1b9cd8..0f97ce4 100644
--- a/src/dawn/native/vulkan/DescriptorSetAllocator.h
+++ b/src/dawn/native/vulkan/DescriptorSetAllocator.h
@@ -32,37 +32,54 @@
 
 #include "absl/container/flat_hash_map.h"
 #include "dawn/common/Mutex.h"
+#include "dawn/common/RefCounted.h"
 #include "dawn/common/SerialQueue.h"
 #include "dawn/common/vulkan_platform.h"
 #include "dawn/native/Error.h"
 #include "dawn/native/IntegerTypes.h"
-#include "dawn/native/ObjectBase.h"
 #include "dawn/native/vulkan/DescriptorSetAllocation.h"
 #include "partition_alloc/pointers/raw_ptr.h"
 
 namespace dawn::native::vulkan {
 
-class BindGroupLayout;
+class Device;
 
-class DescriptorSetAllocator : public ObjectBase {
+// Vulkan requires that descriptor sets are sub-allocated from pre-created pools of descriptors.
+// Creating one pool per descriptor set is inefficient as each pool might be a different GPU
+// allocation (causing syscalls etc). This class handles allocating larger pools and reusing
+// sub-allocations when descriptor sets are no longer in use.
+//
+// Pools are allocated to be a multiple of the size required by a BindGroupLayout (multidimensional,
+// one per-type of descriptor) and all the descriptor sets for a layout created immediately. This is
+// necessary because the Vulkan spec doesn't require drivers to implement anything for
+// VkDescriptorPool but a simple bump allocator (with the ability to roll back one allocation).
+// Instead we allocate all the descriptors sets and overwrite over time.
+// TODO(https://crbug.com/439522242): We could reuse allocators between BindGroupLayouts with the
+// same amount of descriptors if we added logic to be resilient to vkAllocateDescriptorSet failures
+// and some form of GCing / repointing of the dawn::native::vulkan::BindGroupLayout.
+//
+// It is RefCounted because when descriptor set is destroyed, the allocator is added to a
+// notification queue on the device to get called back when the queue serial is completed. As such
+// it has multiple owners: the BindGroupLayout it corresponds to, and sometimes the device as well.
+class DescriptorSetAllocator : public RefCounted {
     using PoolIndex = uint32_t;
     using SetIndex = uint16_t;
 
   public:
     static Ref<DescriptorSetAllocator> Create(
-        DeviceBase* device,
+        Device* device,
         absl::flat_hash_map<VkDescriptorType, uint32_t> descriptorCountPerType);
 
-    ResultOrError<DescriptorSetAllocation> Allocate(BindGroupLayout* layout);
+    ResultOrError<DescriptorSetAllocation> Allocate(VkDescriptorSetLayout dsLayout);
     void Deallocate(DescriptorSetAllocation* allocationInfo);
     void FinishDeallocation(ExecutionSerial completedSerial);
 
   private:
-    DescriptorSetAllocator(DeviceBase* device,
+    DescriptorSetAllocator(Device* device,
                            absl::flat_hash_map<VkDescriptorType, uint32_t> descriptorCountPerType);
     ~DescriptorSetAllocator() override;
 
-    MaybeError AllocateDescriptorPool(BindGroupLayout* layout);
+    MaybeError AllocateDescriptorPool(VkDescriptorSetLayout dsLayout);
 
     std::vector<VkDescriptorPoolSize> mPoolSizes;
     SetIndex mMaxSets;
@@ -85,6 +102,8 @@
 
     // Used to guard all public member functions.
     Mutex mMutex;
+
+    const raw_ptr<Device> mDevice;
 };
 
 }  // namespace dawn::native::vulkan
diff --git a/src/dawn/native/vulkan/DeviceVk.cpp b/src/dawn/native/vulkan/DeviceVk.cpp
index 89ae921..b24fcda7 100644
--- a/src/dawn/native/vulkan/DeviceVk.cpp
+++ b/src/dawn/native/vulkan/DeviceVk.cpp
@@ -454,7 +454,9 @@
     if (IsRobustnessEnabled()) {
         usedKnobs.features.robustBufferAccess = VK_TRUE;
 
-        // Enable the robustness 2 guarantees that better implement the WebGPU semantics.
+        // Enable the robustness 2 guarantees that better implement the WebGPU semantics. It forces
+        // tight bounds checking when enabled on buffers (instead of potentially extending by 16
+        // bytes or a vertex stride). It also exposes driver support for image robustness.
         if (mDeviceInfo.HasExt(DeviceExt::Robustness2)) {
             DAWN_ASSERT(usedKnobs.HasExt(DeviceExt::Robustness2));