Reland: Vulkan: Enforce fixed subgroup size for compute shaders. Reland after a refactor of how the extension handling work in the Vulkan backend. The original author is David Turner <david.turner.dev@gmail.com>. This CL ensures that, on architectures with a varying subgroup size, compute shaders are always compiled with a fixed subgroup size to avoid consistency issues when one shader writes data in a subgroup-size dependent layout to GPU memory, to be read by another shader in a future dispatch. At the moment, only Intel ICDs are known to implement this [1], and the code uses a heuristics to chose the size of 16, which seems to be the sweet spot according to Intel engineers. + Update the PNextChainBuilder class to deal with the fact that VkComputePipelineCreateInfo::pNext is defined as a const void*, which created compiler errors in the previous implementation. [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875 Bug: dawn:464 Change-Id: I035ee06084fcc964742f0bff4c54cff257c742ae Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23202 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Austin Eng <enga@chromium.org> Reviewed-by: Stephen White <senorblanco@chromium.org>

commit: fefb452f2969e9e012c332e58a04e3b72abb8ae3 [log] [tgz]
author: Corentin Wallez <cwallez@chromium.org> Tue Jun 16 09:17:48 2020 +0000
committer: Commit Bot service account <commit-bot@chromium.org> Tue Jun 16 09:17:48 2020 +0000
tree: e1903714867712eddff2628b29e2530d38d1f2c7
parent: 5cef1162b9af6c9377cc8f41c019b5cdc456c77f [diff]
diff --git a/src/dawn_native/vulkan/ComputePipelineVk.cpp b/src/dawn_native/vulkan/ComputePipelineVk.cpp
index b4a9dda..f681547 100644
--- a/src/dawn_native/vulkan/ComputePipelineVk.cpp
+++ b/src/dawn_native/vulkan/ComputePipelineVk.cpp

@@ -18,6 +18,7 @@
 #include "dawn_native/vulkan/FencedDeleter.h"
 #include "dawn_native/vulkan/PipelineLayoutVk.h"
 #include "dawn_native/vulkan/ShaderModuleVk.h"
+#include "dawn_native/vulkan/UtilsVulkan.h"
 #include "dawn_native/vulkan/VulkanError.h"
 
 namespace dawn_native { namespace vulkan {
@@ -49,6 +50,19 @@
         createInfo.stage.pSpecializationInfo = nullptr;
 
         Device* device = ToBackend(GetDevice());
+
+        PNextChainBuilder extChain(&createInfo);
+
+        VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeInfo = {};
+        uint32_t computeSubgroupSize = device->GetComputeSubgroupSize();
+        if (computeSubgroupSize != 0u) {
+            ASSERT(device->GetDeviceInfo().HasExt(DeviceExt::SubgroupSizeControl));
+            subgroupSizeInfo.requiredSubgroupSize = computeSubgroupSize;
+            extChain.Add(
+                &subgroupSizeInfo,
+                VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+        }
+
         return CheckVkSuccess(
             device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
                                               &createInfo, nullptr, &*mHandle),

diff --git a/src/dawn_native/vulkan/DeviceVk.cpp b/src/dawn_native/vulkan/DeviceVk.cpp
index df356f6..70183c4 100644
--- a/src/dawn_native/vulkan/DeviceVk.cpp
+++ b/src/dawn_native/vulkan/DeviceVk.cpp

@@ -305,6 +305,8 @@
             // Always request all the features from VK_EXT_subgroup_size_control when available.
             usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures;
             featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures);
+
+            mComputeSubgroupSize = FindComputeSubgroupSize();
         }
 
         if (IsExtensionEnabled(Extension::TextureCompressionBC)) {
@@ -386,6 +388,32 @@
         return usedKnobs;
     }
 
+    uint32_t Device::FindComputeSubgroupSize() const {
+        if (!mDeviceInfo.HasExt(DeviceExt::SubgroupSizeControl)) {
+            return 0;
+        }
+
+        const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext =
+            mDeviceInfo.subgroupSizeControlProperties;
+
+        if (ext.minSubgroupSize == ext.maxSubgroupSize) {
+            return 0;
+        }
+
+        // At the moment, only Intel devices support varying subgroup sizes and 16, which is the
+        // next value after the minimum of 8, is the sweet spot according to [1]. Hence the
+        // following heuristics, which may need to be adjusted in the future for other
+        // architectures, or if a specific API is added to let client code select the size..
+        //
+        // [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
+        uint32_t subgroupSize = ext.minSubgroupSize * 2;
+        if (subgroupSize <= ext.maxSubgroupSize) {
+            return subgroupSize;
+        } else {
+            return ext.minSubgroupSize;
+        }
+    }
+
     void Device::GatherQueueFromDevice() {
         fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
     }
@@ -712,6 +740,10 @@
         return mResourceMemoryAllocator.get();
     }
 
+    uint32_t Device::GetComputeSubgroupSize() const {
+        return mComputeSubgroupSize;
+    }
+
     MaybeError Device::WaitForIdleForDestruction() {
         // Immediately tag the recording context as unused so we don't try to submit it in Tick.
         // Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in

diff --git a/src/dawn_native/vulkan/DeviceVk.h b/src/dawn_native/vulkan/DeviceVk.h
index b38eb77..9e9ded9 100644
--- a/src/dawn_native/vulkan/DeviceVk.h
+++ b/src/dawn_native/vulkan/DeviceVk.h

@@ -96,6 +96,10 @@
 
         ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
 
+        // Return the fixed subgroup size to use for compute shaders on this device or 0 if none
+        // needs to be set.
+        uint32_t GetComputeSubgroupSize() const;
+
       private:
         Device(Adapter* adapter, const DeviceDescriptor* descriptor);
 
@@ -130,6 +134,7 @@
         ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
         void GatherQueueFromDevice();
 
+        uint32_t FindComputeSubgroupSize() const;
         void InitTogglesFromDriver();
         void ApplyDepth24PlusS8Toggle();
 
@@ -144,6 +149,7 @@
         VkDevice mVkDevice = VK_NULL_HANDLE;
         uint32_t mQueueFamily = 0;
         VkQueue mQueue = VK_NULL_HANDLE;
+        uint32_t mComputeSubgroupSize = 0;
 
         SerialQueue<Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation;
         std::unique_ptr<FencedDeleter> mDeleter;
commit	fefb452f2969e9e012c332e58a04e3b72abb8ae3	[log] [tgz]
author	Corentin Wallez <cwallez@chromium.org>	Tue Jun 16 09:17:48 2020 +0000
committer	Commit Bot service account <commit-bot@chromium.org>	Tue Jun 16 09:17:48 2020 +0000
tree	e1903714867712eddff2628b29e2530d38d1f2c7
parent	5cef1162b9af6c9377cc8f41c019b5cdc456c77f [diff]