Vulkan: Enforce fixed subgroup size for compute shaders.
This CL ensures that, on architectures with a varying subgroup size,
compute shaders are always compiled with a fixed subgroup size to
avoid consistency issues when one shader writes data in a subgroup-size
dependent layout to GPU memory, to be read by another shader in a
future dispatch.
At the moment, only Intel ICDs are known to implement this [1],
and the code uses a heuristics to chose the size of 16, which seems to
be the sweet spot according to Intel engineers.
+ Update the PNextChainBuilder class to deal with the fact that
VkComputePipelineCreateInfo::pNext is defined as a const void*,
which created compiler errors in the previous implementation.
[1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
Change-Id: I332faa53b9f854a8abe43a7271f30d8c5deb2142
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/16021
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
diff --git a/src/dawn_native/vulkan/ComputePipelineVk.cpp b/src/dawn_native/vulkan/ComputePipelineVk.cpp
index 16dd8e7..0db1421 100644
--- a/src/dawn_native/vulkan/ComputePipelineVk.cpp
+++ b/src/dawn_native/vulkan/ComputePipelineVk.cpp
@@ -18,6 +18,7 @@
#include "dawn_native/vulkan/FencedDeleter.h"
#include "dawn_native/vulkan/PipelineLayoutVk.h"
#include "dawn_native/vulkan/ShaderModuleVk.h"
+#include "dawn_native/vulkan/UtilsVulkan.h"
#include "dawn_native/vulkan/VulkanError.h"
namespace dawn_native { namespace vulkan {
@@ -50,6 +51,18 @@
createInfo.stage.pSpecializationInfo = nullptr;
Device* device = ToBackend(GetDevice());
+
+ PNextChainBuilder extChain(&createInfo);
+
+ VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeInfo = {};
+ uint32_t computeSubgroupSize = device->GetComputeSubgroupSize();
+ if (computeSubgroupSize != 0u) {
+ extChain.Add(
+ &subgroupSizeInfo,
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+ subgroupSizeInfo.requiredSubgroupSize = computeSubgroupSize;
+ }
+
return CheckVkSuccess(
device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
&createInfo, nullptr, &*mHandle),
diff --git a/src/dawn_native/vulkan/DeviceVk.cpp b/src/dawn_native/vulkan/DeviceVk.cpp
index 2c7e40f..a2b291b 100644
--- a/src/dawn_native/vulkan/DeviceVk.cpp
+++ b/src/dawn_native/vulkan/DeviceVk.cpp
@@ -388,6 +388,8 @@
*dst = mDeviceInfo.featuresExtensions.subgroupSizeControl;
featuresChain.Add(dst);
+
+ mComputeSubgroupSize = FindComputeSubgroupSize();
}
// Always require independentBlend because it is a core Dawn feature
@@ -464,6 +466,33 @@
return usedKnobs;
}
+ uint32_t Device::FindComputeSubgroupSize() const {
+ if (!mDeviceInfo.subgroupSizeControl) {
+ return 0;
+ }
+
+ const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext =
+ mDeviceInfo.propertiesExtensions.subgroupSizeControl;
+
+ if (ext.minSubgroupSize == ext.maxSubgroupSize) {
+ return 0;
+ }
+
+ // At the moment, only Intel devices support varying subgroup sizes
+ // and 16, which is the next value after the minimum of 8, is the sweet
+ // spot according to [1]. Hence the following heuristics, which may
+ // need to be adjusted in the future for other architectures, or if
+ // a specific API is added to let client code select the size..
+ //
+ // [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
+ uint32_t subgroupSize = ext.minSubgroupSize * 2;
+ if (subgroupSize <= ext.maxSubgroupSize) {
+ return subgroupSize;
+ } else {
+ return ext.minSubgroupSize;
+ }
+ }
+
void Device::GatherQueueFromDevice() {
fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
}
diff --git a/src/dawn_native/vulkan/DeviceVk.h b/src/dawn_native/vulkan/DeviceVk.h
index a4445f2..44ad7cb 100644
--- a/src/dawn_native/vulkan/DeviceVk.h
+++ b/src/dawn_native/vulkan/DeviceVk.h
@@ -100,6 +100,12 @@
ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
+ // Return the fixed subgroup size to use for compute shaders on this device,
+ // or 0 if none needs to be set.
+ uint32_t GetComputeSubgroupSize() const {
+ return mComputeSubgroupSize;
+ }
+
private:
ResultOrError<BindGroupBase*> CreateBindGroupImpl(
const BindGroupDescriptor* descriptor) override;
@@ -130,6 +136,7 @@
ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
void GatherQueueFromDevice();
+ uint32_t FindComputeSubgroupSize() const;
void InitTogglesFromDriver();
void ApplyDepth24PlusS8Toggle();
@@ -144,6 +151,7 @@
VkDevice mVkDevice = VK_NULL_HANDLE;
uint32_t mQueueFamily = 0;
VkQueue mQueue = VK_NULL_HANDLE;
+ uint32_t mComputeSubgroupSize = 0;
std::unique_ptr<DescriptorSetService> mDescriptorSetService;
std::unique_ptr<FencedDeleter> mDeleter;