Dawn native/wire: Add experimental subgroup limits

This CL add the experimental subgroup limits by introducing a new
structure DawnExperimentalSubgroupLimits chained after SupportedLimits.
The structure can be used for querying adapter or device limits, and the
result would be WGPU_LIMIT_U32_UNDEFINED if AllowUnsafeAPI is not
enabled.

Issue: dawn:464
Change-Id: I2ad591d4c5cf88ad69df48da8c2b01bbc1f38656
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/151282
Reviewed-by: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
diff --git a/dawn.json b/dawn.json
index 8756252..dcb8d10 100644
--- a/dawn.json
+++ b/dawn.json
@@ -1390,6 +1390,15 @@
             {"name": "max compute workgroups per dimension", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}
         ]
     },
+    "dawn experimental subgroup limits": {
+        "category": "structure",
+        "chained": "out",
+        "chain roots": ["supported limits"],
+        "members": [
+            {"name": "min subgroup size", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"},
+            {"name": "max subgroup size", "type": "uint32_t", "default": "WGPU_LIMIT_U32_UNDEFINED"}
+        ]
+    },
     "required limits": {
         "category": "structure",
         "extensible": "in",
@@ -3263,6 +3272,7 @@
             {"value": 1014, "name": "render pass pixel local storage", "tags": ["dawn"]},
             {"value": 1015, "name": "pipeline layout pixel local storage", "tags": ["dawn"]},
             {"value": 1016, "name": "buffer host mapped pointer", "tags": ["dawn"]},
+            {"value": 1017, "name": "dawn experimental subgroup limits", "tags": ["dawn"]},
 
             {"value": 1100, "name": "shared texture memory vk image descriptor", "tags": ["dawn", "native"]},
             {"value": 1101, "name": "shared texture memory vk dedicated allocation descriptor", "tags": ["dawn", "native"]},
diff --git a/src/dawn/native/Adapter.cpp b/src/dawn/native/Adapter.cpp
index 6c52628..1706458 100644
--- a/src/dawn/native/Adapter.cpp
+++ b/src/dawn/native/Adapter.cpp
@@ -65,7 +65,10 @@
 
 bool AdapterBase::APIGetLimits(SupportedLimits* limits) const {
     DAWN_ASSERT(limits != nullptr);
-    if (limits->nextInChain != nullptr) {
+    // TODO(dawn:1955): Revisit after deciding how to improve the validation for ChainedStructOut.
+    MaybeError result =
+        ValidateSTypes(limits->nextInChain, {{wgpu::SType::DawnExperimentalSubgroupLimits}});
+    if (mPhysicalDevice->GetInstance()->ConsumedError(std::move(result))) {
         return false;
     }
     if (mUseTieredLimits) {
@@ -73,6 +76,29 @@
     } else {
         limits->limits = mPhysicalDevice->GetLimits().v1;
     }
+    for (auto* chain = limits->nextInChain; chain; chain = chain->nextInChain) {
+        wgpu::ChainedStructOut originalChain = *chain;
+        switch (chain->sType) {
+            case (wgpu::SType::DawnExperimentalSubgroupLimits): {
+                DawnExperimentalSubgroupLimits* subgroupLimits =
+                    reinterpret_cast<DawnExperimentalSubgroupLimits*>(chain);
+                if (!mTogglesState.IsEnabled(Toggle::AllowUnsafeAPIs)) {
+                    // If AllowUnsafeAPIs is not enabled, return the default-initialized
+                    // DawnExperimentalSubgroupLimits object, where minSubgroupSize and
+                    // maxSubgroupSize are WGPU_LIMIT_U32_UNDEFINED.
+                    *subgroupLimits = DawnExperimentalSubgroupLimits{};
+                } else {
+                    *subgroupLimits = mPhysicalDevice->GetLimits().experimentalSubgroupLimits;
+                }
+                break;
+            }
+            default:
+                // ValidateSTypes ensures that all chained sTypes are known.
+                DAWN_UNREACHABLE();
+        }
+        // Recover the original chain
+        *chain = originalChain;
+    }
     return true;
 }
 
@@ -181,15 +207,17 @@
     }
 
     if (descriptor->requiredLimits != nullptr) {
+        // Only consider limits in RequiredLimits structure, and currently no chained structure
+        // supported.
+        DAWN_INVALID_IF(descriptor->requiredLimits->nextInChain != nullptr,
+                        "can not chain after requiredLimits.");
+
         SupportedLimits supportedLimits;
         bool success = APIGetLimits(&supportedLimits);
         DAWN_ASSERT(success);
 
         DAWN_TRY_CONTEXT(ValidateLimits(supportedLimits.limits, descriptor->requiredLimits->limits),
                          "validating required limits");
-
-        DAWN_INVALID_IF(descriptor->requiredLimits->nextInChain != nullptr,
-                        "nextInChain is not nullptr.");
     }
 
     return mPhysicalDevice->CreateDevice(this, descriptor, deviceToggles);
diff --git a/src/dawn/native/Device.cpp b/src/dawn/native/Device.cpp
index 808f453..7a9bd34c 100644
--- a/src/dawn/native/Device.cpp
+++ b/src/dawn/native/Device.cpp
@@ -219,6 +219,9 @@
     } else {
         GetDefaultLimits(&mLimits.v1, adapter->GetFeatureLevel());
     }
+    // Get experimentalSubgroupLimits from physical device
+    mLimits.experimentalSubgroupLimits =
+        GetPhysicalDevice()->GetLimits().experimentalSubgroupLimits;
 
     mFormatTable = BuildFormatTable(this);
 
@@ -1505,10 +1508,37 @@
 
 bool DeviceBase::APIGetLimits(SupportedLimits* limits) const {
     DAWN_ASSERT(limits != nullptr);
-    if (limits->nextInChain != nullptr) {
+    // TODO(dawn:1955): Revisit after deciding how to improve the validation for ChainedStructOut.
+    MaybeError result =
+        ValidateSTypes(limits->nextInChain, {{wgpu::SType::DawnExperimentalSubgroupLimits}});
+    if (GetPhysicalDevice()->GetInstance()->ConsumedError(std::move(result))) {
         return false;
     }
+
     limits->limits = mLimits.v1;
+
+    for (auto* chain = limits->nextInChain; chain; chain = chain->nextInChain) {
+        wgpu::ChainedStructOut originalChain = *chain;
+        switch (chain->sType) {
+            case (wgpu::SType::DawnExperimentalSubgroupLimits): {
+                DawnExperimentalSubgroupLimits* subgroupLimits =
+                    reinterpret_cast<DawnExperimentalSubgroupLimits*>(chain);
+                if (!mToggles.IsEnabled(Toggle::AllowUnsafeAPIs)) {
+                    // If AllowUnsafeAPIs is not enabled, return the default-initialized
+                    // DawnExperimentalSubgroupLimits object, where minSubgroupSize and
+                    // maxSubgroupSize are WGPU_LIMIT_U32_UNDEFINED.
+                    *subgroupLimits = DawnExperimentalSubgroupLimits{};
+                } else {
+                    *subgroupLimits = mLimits.experimentalSubgroupLimits;
+                }
+                break;
+            }
+            default:
+                DAWN_UNREACHABLE();
+        }
+        // Recover the original chain
+        *chain = originalChain;
+    }
     return true;
 }
 
diff --git a/src/dawn/native/Limits.h b/src/dawn/native/Limits.h
index 83ae805..798b1da 100644
--- a/src/dawn/native/Limits.h
+++ b/src/dawn/native/Limits.h
@@ -24,6 +24,7 @@
 
 struct CombinedLimits {
     Limits v1;
+    DawnExperimentalSubgroupLimits experimentalSubgroupLimits;
 };
 
 // Populate |limits| with the default limits.
diff --git a/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp b/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
index 2424039..d93b1fe 100644
--- a/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
+++ b/src/dawn/native/d3d12/PhysicalDeviceD3D12.cpp
@@ -348,6 +348,13 @@
     // TODO(crbug.com/dawn/1448):
     // - maxInterStageShaderVariables
 
+    // Experimental limits for subgroups
+    limits->experimentalSubgroupLimits.minSubgroupSize = mDeviceInfo.waveLaneCountMin;
+    // Currently the WaveLaneCountMax queried from D3D12 API is not reliable and the meaning is
+    // unclear. Use 128 instead, which is the largest possible size. Reference:
+    // https://github.com/Microsoft/DirectXShaderCompiler/wiki/Wave-Intrinsics#:~:text=UINT%20WaveLaneCountMax
+    limits->experimentalSubgroupLimits.maxSubgroupSize = 128u;
+
     return {};
 }
 
diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm
index 434f8bb..69cf2d4 100644
--- a/src/dawn/native/metal/BackendMTL.mm
+++ b/src/dawn/native/metal/BackendMTL.mm
@@ -805,6 +805,10 @@
         // TODO(crbug.com/dawn/1448):
         // - maxInterStageShaderVariables
 
+        // Experimental limits for subgroups
+        limits->experimentalSubgroupLimits.minSubgroupSize = 4;
+        limits->experimentalSubgroupLimits.maxSubgroupSize = 64;
+
         return {};
     }
 
diff --git a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
index e0ddd4c..bb92142 100644
--- a/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
+++ b/src/dawn/native/vulkan/PhysicalDeviceVk.cpp
@@ -518,6 +518,12 @@
     // TODO(crbug.com/dawn/1448):
     // - maxInterStageShaderVariables
 
+    // Experimental limits for subgroups
+    limits->experimentalSubgroupLimits.minSubgroupSize =
+        mDeviceInfo.subgroupSizeControlProperties.minSubgroupSize;
+    limits->experimentalSubgroupLimits.maxSubgroupSize =
+        mDeviceInfo.subgroupSizeControlProperties.maxSubgroupSize;
+
     return {};
 }
 
diff --git a/src/dawn/wire/client/LimitsAndFeatures.cpp b/src/dawn/wire/client/LimitsAndFeatures.cpp
index 6983b34..9cea113 100644
--- a/src/dawn/wire/client/LimitsAndFeatures.cpp
+++ b/src/dawn/wire/client/LimitsAndFeatures.cpp
@@ -25,10 +25,28 @@
 
 bool LimitsAndFeatures::GetLimits(WGPUSupportedLimits* limits) const {
     DAWN_ASSERT(limits != nullptr);
-    if (limits->nextInChain != nullptr) {
-        return false;
-    }
+    auto* originalNextInChain = limits->nextInChain;
     *limits = mLimits;
+    limits->nextInChain = originalNextInChain;
+    // Handle other requiring limits that chained after WGPUSupportedLimits
+    for (auto* chain = limits->nextInChain; chain; chain = chain->next) {
+        // Store the WGPUChainedStructOut to restore the chain after assignment.
+        WGPUChainedStructOut originalChainedStructOut = *chain;
+        switch (chain->sType) {
+            case (WGPUSType_DawnExperimentalSubgroupLimits): {
+                auto* experimentalSubgroupLimits =
+                    reinterpret_cast<WGPUDawnExperimentalSubgroupLimits*>(chain);
+                // This assignment break the next field of WGPUChainedStructOut head.
+                *experimentalSubgroupLimits = mExperimentalSubgroupLimits;
+                break;
+            }
+            default:
+                // Fail if unknown sType found.
+                return false;
+        }
+        // Restore the chain.
+        *chain = originalChainedStructOut;
+    }
     return true;
 }
 
@@ -50,6 +68,20 @@
     DAWN_ASSERT(limits != nullptr);
     mLimits = *limits;
     mLimits.nextInChain = nullptr;
+    // Handle other limits that chained after WGPUSupportedLimits
+    for (auto* chain = limits->nextInChain; chain; chain = chain->next) {
+        switch (chain->sType) {
+            case (WGPUSType_DawnExperimentalSubgroupLimits): {
+                auto* experimentalSubgroupLimits =
+                    reinterpret_cast<WGPUDawnExperimentalSubgroupLimits*>(chain);
+                mExperimentalSubgroupLimits = *experimentalSubgroupLimits;
+                mExperimentalSubgroupLimits.chain.next = nullptr;
+                break;
+            }
+            default:
+                DAWN_UNREACHABLE();
+        }
+    }
 }
 
 void LimitsAndFeatures::SetFeatures(const WGPUFeatureName* features, uint32_t featuresCount) {
diff --git a/src/dawn/wire/client/LimitsAndFeatures.h b/src/dawn/wire/client/LimitsAndFeatures.h
index cfe9353..379e7e6 100644
--- a/src/dawn/wire/client/LimitsAndFeatures.h
+++ b/src/dawn/wire/client/LimitsAndFeatures.h
@@ -35,6 +35,7 @@
 
   private:
     WGPUSupportedLimits mLimits;
+    WGPUDawnExperimentalSubgroupLimits mExperimentalSubgroupLimits;
     std::unordered_set<WGPUFeatureName> mFeatures;
 };
 
diff --git a/src/dawn/wire/server/ServerAdapter.cpp b/src/dawn/wire/server/ServerAdapter.cpp
index 3bac8a1..dbee88e 100644
--- a/src/dawn/wire/server/ServerAdapter.cpp
+++ b/src/dawn/wire/server/ServerAdapter.cpp
@@ -83,6 +83,10 @@
     cmd.features = features.data();
 
     WGPUSupportedLimits limits = {};
+    // Also query the DawnExperimentalSubgroupLimits and report to client.
+    WGPUDawnExperimentalSubgroupLimits experimentalSubgroupLimits = {};
+    experimentalSubgroupLimits.chain.sType = WGPUSType_DawnExperimentalSubgroupLimits;
+    limits.nextInChain = &experimentalSubgroupLimits.chain;
     mProcs.deviceGetLimits(device, &limits);
     cmd.limits = &limits;
 
diff --git a/src/dawn/wire/server/ServerInstance.cpp b/src/dawn/wire/server/ServerInstance.cpp
index 9d7cf94..8f0d5ea 100644
--- a/src/dawn/wire/server/ServerInstance.cpp
+++ b/src/dawn/wire/server/ServerInstance.cpp
@@ -56,13 +56,12 @@
         return;
     }
 
-    WGPUAdapterProperties properties = {};
-    WGPUSupportedLimits limits = {};
-    std::vector<WGPUFeatureName> features;
-
     // Assign the handle and allocated status if the adapter is created successfully.
     AdapterObjects().FillReservation(data->adapterObjectId, adapter);
 
+    // Query and report the adapter supported features.
+    std::vector<WGPUFeatureName> features;
+
     size_t featuresCount = mProcs.adapterEnumerateFeatures(adapter, nullptr);
     features.resize(featuresCount);
     mProcs.adapterEnumerateFeatures(adapter, features.data());
@@ -73,9 +72,19 @@
     cmd.featuresCount = std::distance(features.begin(), it);
     cmd.features = features.data();
 
+    // Query and report the adapter properties.
+    WGPUAdapterProperties properties = {};
     mProcs.adapterGetProperties(adapter, &properties);
-    mProcs.adapterGetLimits(adapter, &limits);
     cmd.properties = &properties;
+
+    // Query and report the adapter limits, including DawnExperimentalSubgroupLimits.
+    WGPUSupportedLimits limits = {};
+
+    WGPUDawnExperimentalSubgroupLimits experimentalSubgroupLimits = {};
+    experimentalSubgroupLimits.chain.sType = WGPUSType_DawnExperimentalSubgroupLimits;
+    limits.nextInChain = &experimentalSubgroupLimits.chain;
+
+    mProcs.adapterGetLimits(adapter, &limits);
     cmd.limits = &limits;
 
     SerializeCommand(cmd);