Update maxFragmentCombinedOutputResources on the different backends.
- Adds an e2e test to test writing to outputs when at the limit.
Bug: dawn:1665
Change-Id: I2b2b9c2d700be0e454dc945ed8e3e1fe6b191974
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/122801
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/d3d12/AdapterD3D12.cpp b/src/dawn/native/d3d12/AdapterD3D12.cpp
index bd261c4..2d39752 100644
--- a/src/dawn/native/d3d12/AdapterD3D12.cpp
+++ b/src/dawn/native/d3d12/AdapterD3D12.cpp
@@ -262,6 +262,9 @@
limits->v1.maxSamplersPerShaderStage = maxSamplersPerStage;
limits->v1.maxColorAttachments = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT;
+ limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
+ limits->v1.maxStorageBuffersPerShaderStage +
+ limits->v1.maxStorageTexturesPerShaderStage;
// https://docs.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
// In DWORDS. Descriptor tables cost 1, Root constants cost 1, Root descriptors cost 2.
diff --git a/src/dawn/native/metal/BackendMTL.mm b/src/dawn/native/metal/BackendMTL.mm
index 512ab99..bc4438d 100644
--- a/src/dawn/native/metal/BackendMTL.mm
+++ b/src/dawn/native/metal/BackendMTL.mm
@@ -706,6 +706,10 @@
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
}
+ limits->v1.maxFragmentCombinedOutputResources = limits->v1.maxColorAttachments +
+ limits->v1.maxStorageBuffersPerShaderStage +
+ limits->v1.maxStorageTexturesPerShaderStage;
+
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
// Metal limits are per-function, so the layout limits are the same as the stage
diff --git a/src/dawn/native/vulkan/AdapterVk.cpp b/src/dawn/native/vulkan/AdapterVk.cpp
index 556f5f8..80d6112 100644
--- a/src/dawn/native/vulkan/AdapterVk.cpp
+++ b/src/dawn/native/vulkan/AdapterVk.cpp
@@ -326,6 +326,8 @@
maxUniformBuffersPerShaderStage);
CHECK_AND_SET_V1_MAX_LIMIT(maxUniformBufferRange, maxUniformBufferBindingSize);
CHECK_AND_SET_V1_MAX_LIMIT(maxStorageBufferRange, maxStorageBufferBindingSize);
+ CHECK_AND_SET_V1_MAX_LIMIT(maxFragmentCombinedOutputResources,
+ maxFragmentCombinedOutputResources);
CHECK_AND_SET_V1_MIN_LIMIT(minUniformBufferOffsetAlignment, minUniformBufferOffsetAlignment);
CHECK_AND_SET_V1_MIN_LIMIT(minStorageBufferOffsetAlignment, minStorageBufferOffsetAlignment);
@@ -382,58 +384,6 @@
limits->v1.maxBufferSize = kAssumedMaxBufferSize;
}
- // Only check maxFragmentCombinedOutputResources on mobile GPUs. Desktop GPUs drivers seem
- // to put incorrect values for this limit with things like 8 or 16 when they can do bindless
- // storage buffers. Mesa llvmpipe driver also puts 8 here.
- uint32_t vendorId = mDeviceInfo.properties.vendorID;
- if (!gpu_info::IsAMD(vendorId) && !gpu_info::IsIntel(vendorId) && !gpu_info::IsMesa(vendorId) &&
- !gpu_info::IsNvidia(vendorId)) {
- if (vkLimits.maxFragmentCombinedOutputResources <
- kMaxColorAttachments + baseLimits.v1.maxStorageTexturesPerShaderStage +
- baseLimits.v1.maxStorageBuffersPerShaderStage) {
- return DAWN_INTERNAL_ERROR(
- "Insufficient Vulkan maxFragmentCombinedOutputResources limit");
- }
-
- uint32_t maxFragmentCombinedOutputResources = kMaxColorAttachments +
- limits->v1.maxStorageTexturesPerShaderStage +
- limits->v1.maxStorageBuffersPerShaderStage;
-
- if (maxFragmentCombinedOutputResources > vkLimits.maxFragmentCombinedOutputResources) {
- // WebGPU's maxFragmentCombinedOutputResources exceeds the Vulkan limit.
- // Decrease |maxStorageTexturesPerShaderStage| and |maxStorageBuffersPerShaderStage|
- // to fit within the Vulkan limit.
- uint32_t countOverLimit =
- maxFragmentCombinedOutputResources - vkLimits.maxFragmentCombinedOutputResources;
-
- uint32_t maxStorageTexturesOverBase = limits->v1.maxStorageTexturesPerShaderStage -
- baseLimits.v1.maxStorageTexturesPerShaderStage;
- uint32_t maxStorageBuffersOverBase = limits->v1.maxStorageBuffersPerShaderStage -
- baseLimits.v1.maxStorageBuffersPerShaderStage;
-
- // Reduce the number of resources by half the overage count, but clamp to
- // to ensure we don't go below the base limits.
- uint32_t numFewerStorageTextures =
- std::min(countOverLimit / 2, maxStorageTexturesOverBase);
- uint32_t numFewerStorageBuffers =
- std::min((countOverLimit + 1) / 2, maxStorageBuffersOverBase);
-
- if (numFewerStorageTextures == maxStorageTexturesOverBase) {
- // If |numFewerStorageTextures| was clamped, subtract the remaining
- // from the storage buffers.
- numFewerStorageBuffers = countOverLimit - numFewerStorageTextures;
- ASSERT(numFewerStorageBuffers <= maxStorageBuffersOverBase);
- } else if (numFewerStorageBuffers == maxStorageBuffersOverBase) {
- // If |numFewerStorageBuffers| was clamped, subtract the remaining
- // from the storage textures.
- numFewerStorageTextures = countOverLimit - numFewerStorageBuffers;
- ASSERT(numFewerStorageTextures <= maxStorageTexturesOverBase);
- }
- limits->v1.maxStorageTexturesPerShaderStage -= numFewerStorageTextures;
- limits->v1.maxStorageBuffersPerShaderStage -= numFewerStorageBuffers;
- }
- }
-
// Using base limits for:
// TODO(crbug.com/dawn/1448):
// - maxInterStageShaderVariables
diff --git a/src/dawn/tests/end2end/MaxLimitTests.cpp b/src/dawn/tests/end2end/MaxLimitTests.cpp
index 204f262..0632318 100644
--- a/src/dawn/tests/end2end/MaxLimitTests.cpp
+++ b/src/dawn/tests/end2end/MaxLimitTests.cpp
@@ -541,6 +541,160 @@
EXPECT_BUFFER_U32_EQ(1, result, 0);
}
+// Verifies that devices can write to at least maxFragmentCombinedOutputResources of non color
+// attachment resources.
+TEST_P(MaxLimitTests, WriteToMaxFragmentCombinedOutputResources) {
+ // TODO(dawn:1692) Currently does not work on GL and GLES.
+ DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
+
+ // Compute the number of each resource type (storage buffers and storage textures) such that
+ // there is at least one color attachment, and as many of the buffer/textures as possible,
+ // splitting a shared remaining count between the two resources if they are not separately
+ // defined, or exceed the combined limit.
+ wgpu::Limits limits = GetSupportedLimits().limits;
+ uint32_t attachmentCount = 1;
+ uint32_t storageBuffers = limits.maxStorageBuffersPerShaderStage;
+ uint32_t storageTextures = limits.maxStorageTexturesPerShaderStage;
+ uint32_t maxCombinedResources = limits.maxFragmentCombinedOutputResources;
+ if (uint64_t(storageBuffers) + uint64_t(storageTextures) >= uint64_t(maxCombinedResources)) {
+ storageTextures = std::min(storageTextures, (maxCombinedResources - attachmentCount) / 2);
+ storageBuffers = maxCombinedResources - attachmentCount - storageTextures;
+ }
+ if (maxCombinedResources > attachmentCount + storageBuffers + storageTextures) {
+ // Increase the number of attachments if we still have bandwidth after maximizing the number
+ // of buffers and textures.
+ attachmentCount = std::min(limits.maxColorAttachments,
+ maxCombinedResources - storageBuffers - storageTextures);
+ }
+ ASSERT_LE(attachmentCount + storageBuffers + storageTextures, maxCombinedResources);
+
+ // Create a shader to write out to all the resources.
+ auto CreateShader = [&]() -> wgpu::ShaderModule {
+ // Header to declare storage buffer struct.
+ std::ostringstream bufferBindings;
+ std::ostringstream bufferOutputs;
+ for (uint32_t i = 0; i < storageBuffers; i++) {
+ bufferBindings << "@group(0) @binding(" << i << ") var<storage, read_write> b" << i
+ << ": u32;\n";
+ bufferOutputs << " b" << i << " = " << i << "u + 1u;\n";
+ }
+
+ std::ostringstream textureBindings;
+ std::ostringstream textureOutputs;
+ for (uint32_t i = 0; i < storageTextures; i++) {
+ textureBindings << "@group(1) @binding(" << i << ") var t" << i
+ << ": texture_storage_2d<rgba8uint, write>;\n";
+ textureOutputs << " textureStore(t" << i << ", vec2u(0, 0), vec4u(" << i
+ << "u + 1u));\n";
+ }
+
+ std::ostringstream targetBindings;
+ std::ostringstream targetOutputs;
+ for (size_t i = 0; i < attachmentCount; i++) {
+ targetBindings << "@location(" << i << ") o" << i << " : u32, ";
+ targetOutputs << i << "u + 1u, ";
+ }
+
+ std::ostringstream fsShader;
+ fsShader << bufferBindings.str();
+ fsShader << textureBindings.str();
+ fsShader << "struct Outputs { " << targetBindings.str() << "}\n";
+ fsShader << "@fragment fn main() -> Outputs {\n";
+ fsShader << bufferOutputs.str();
+ fsShader << textureOutputs.str();
+ fsShader << " return Outputs(" << targetOutputs.str() << ");\n";
+ fsShader << "}";
+ return utils::CreateShaderModule(device, fsShader.str().c_str());
+ };
+
+ // Constants used for the render pipeline.
+ wgpu::ColorTargetState kColorTargetState = {};
+ kColorTargetState.format = wgpu::TextureFormat::R8Uint;
+
+ // Create the render pipeline.
+ utils::ComboRenderPipelineDescriptor pipelineDesc;
+ pipelineDesc.vertex.module = utils::CreateShaderModule(device, R"(
+ @vertex fn main() -> @builtin(position) vec4f {
+ return vec4f(0.0, 0.0, 0.0, 1.0);
+ })");
+ pipelineDesc.vertex.entryPoint = "main";
+ pipelineDesc.primitive.topology = wgpu::PrimitiveTopology::PointList;
+ pipelineDesc.cFragment.module = CreateShader();
+ pipelineDesc.cFragment.entryPoint = "main";
+ pipelineDesc.cTargets.fill(kColorTargetState);
+ pipelineDesc.cFragment.targetCount = attachmentCount;
+ wgpu::RenderPipeline renderPipeline = device.CreateRenderPipeline(&pipelineDesc);
+
+ // Create all the resources and bindings for them.
+ std::vector<wgpu::Buffer> buffers;
+ std::vector<wgpu::BindGroupEntry> bufferEntries;
+ wgpu::BufferDescriptor bufferDesc = {};
+ bufferDesc.size = 4;
+ bufferDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc;
+ for (uint32_t i = 0; i < storageBuffers; i++) {
+ buffers.push_back(device.CreateBuffer(&bufferDesc));
+ bufferEntries.push_back(utils::BindingInitializationHelper(i, buffers[i]).GetAsBinding());
+ }
+ wgpu::BindGroupDescriptor bufferBindGroupDesc = {};
+ bufferBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(0);
+ bufferBindGroupDesc.entryCount = storageBuffers;
+ bufferBindGroupDesc.entries = bufferEntries.data();
+ wgpu::BindGroup bufferBindGroup = device.CreateBindGroup(&bufferBindGroupDesc);
+
+ std::vector<wgpu::Texture> textures;
+ std::vector<wgpu::BindGroupEntry> textureEntries;
+ wgpu::TextureDescriptor textureDesc = {};
+ textureDesc.size.width = 1;
+ textureDesc.size.height = 1;
+ textureDesc.format = wgpu::TextureFormat::RGBA8Uint;
+ textureDesc.usage = wgpu::TextureUsage::StorageBinding | wgpu::TextureUsage::CopySrc;
+ for (uint32_t i = 0; i < storageTextures; i++) {
+ textures.push_back(device.CreateTexture(&textureDesc));
+ textureEntries.push_back(
+ utils::BindingInitializationHelper(i, textures[i].CreateView()).GetAsBinding());
+ }
+ wgpu::BindGroupDescriptor textureBindGroupDesc = {};
+ textureBindGroupDesc.layout = renderPipeline.GetBindGroupLayout(1);
+ textureBindGroupDesc.entryCount = storageTextures;
+ textureBindGroupDesc.entries = textureEntries.data();
+ wgpu::BindGroup textureBindGroup = device.CreateBindGroup(&textureBindGroupDesc);
+
+ std::vector<wgpu::Texture> attachments;
+ std::vector<wgpu::TextureView> attachmentViews;
+ wgpu::TextureDescriptor attachmentDesc = {};
+ attachmentDesc.size = {1, 1};
+ attachmentDesc.format = wgpu::TextureFormat::R8Uint;
+ attachmentDesc.usage = wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::CopySrc;
+ for (size_t i = 0; i < attachmentCount; i++) {
+ attachments.push_back(device.CreateTexture(&attachmentDesc));
+ attachmentViews.push_back(attachments[i].CreateView());
+ }
+
+ // Execute the pipeline.
+ utils::ComboRenderPassDescriptor passDesc(attachmentViews);
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&passDesc);
+ pass.SetBindGroup(0, bufferBindGroup);
+ pass.SetBindGroup(1, textureBindGroup);
+ pass.SetPipeline(renderPipeline);
+ pass.Draw(1);
+ pass.End();
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+
+ // Verify the results.
+ for (uint32_t i = 0; i < storageBuffers; i++) {
+ EXPECT_BUFFER_U32_EQ(i + 1, buffers[i], 0);
+ }
+ for (uint32_t i = 0; i < storageTextures; i++) {
+ const uint32_t res = i + 1;
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(res, res, res, res), textures[i], 0, 0);
+ }
+ for (uint32_t i = 0; i < attachmentCount; i++) {
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(i + 1, 0, 0, 0), attachments[i], 0, 0);
+ }
+}
+
// Verifies that supported buffer limits do not exceed maxBufferSize.
TEST_P(MaxLimitTests, MaxBufferSizes) {
// Base limits without tiering.