Fix D3D12 shader interstage truncating transform
Don't skip the TruncateInterstageVariables transform when
user defined interstage attribute input for fragment stage
is empty. Because builtin inputs could also cause register
mismatch for D3D12 HLSL compiler.
Add a boolean flag to Tint hlsl generator option to indicate
whether to run TruncateInterstageVariables or not.
This defaults to false in Tint, while Dawn always set
this to true for vertex stage.
Bug: dawn:1733
Change-Id: Ie4c3648b226513bf15f0e03ae4ce7f3cc09fdef4
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127206
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Shrek Shao <shrekshao@google.com>
diff --git a/src/dawn/native/d3d/ShaderUtils.cpp b/src/dawn/native/d3d/ShaderUtils.cpp
index 222dc34..a0e13d4 100644
--- a/src/dawn/native/d3d/ShaderUtils.cpp
+++ b/src/dawn/native/d3d/ShaderUtils.cpp
@@ -232,6 +232,7 @@
// Pass in the actually used interstage locations for tint to potentially truncate unused
// outputs.
options.interstage_locations = r.interstageLocations;
+ options.truncate_interstage_variables = true;
}
options.polyfill_reflect_vec2_f32 = r.polyfillReflectVec2F32;
diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp
index 2e91325..b81f4ee 100644
--- a/src/dawn/tests/end2end/ShaderTests.cpp
+++ b/src/dawn/tests/end2end/ShaderTests.cpp
@@ -549,6 +549,42 @@
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
}
+// Regression test for crbug.com/dawn/1733. Even when user defined attribute input is empty,
+// Builtin input for the next stage could still cause register mismatch issue on D3D12 HLSL
+// compiler. So the TruncateInterstageVariables transform should still be run. This test is not in
+// dawn_unittests/RenderPipelineValidationTests because we want to test the compilation of the
+// pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesEmptyUserAttributeSubset) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4f,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4f,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4f(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4f(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(@builtin(position) position : vec4<f32>) -> @location(0) vec4f {
+ return vec4f(0.0, 0.0, 0.0, 1.0);
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
// This is a regression test for an issue caused by the FirstIndexOffset transfrom being done before
// the BindingRemapper, causing an intermediate AST to be invalid (and fail the overall
// compilation).
diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h
index 9b79ad1..804b449 100644
--- a/src/tint/writer/hlsl/generator.h
+++ b/src/tint/writer/hlsl/generator.h
@@ -74,6 +74,9 @@
/// This is potentially used for truncating unused interstage outputs at current shader stage.
std::bitset<16> interstage_locations;
+ /// Set to `true` to run the TruncateInterstageVariables transform.
+ bool truncate_interstage_variables = false;
+
/// Set to `true` to generate polyfill for `reflect` builtin for vec2<f32>
bool polyfill_reflect_vec2_f32 = false;
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc
index 4d07b4f..5e4ae33 100644
--- a/src/tint/writer/hlsl/generator_impl.cc
+++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -241,13 +241,11 @@
// CanonicalizeEntryPointIO must come after Robustness
manager.Add<transform::CanonicalizeEntryPointIO>();
- if (options.interstage_locations.any()) {
+ if (options.truncate_interstage_variables) {
// When interstage_locations is empty, it means there's no user-defined interstage variables
- // being used in the next stage. This is treated as a special case.
- // TruncateInterstageVariables transform is trying to solve the HLSL compiler register
- // mismatch issue. So it is not needed if no register is assigned to any interstage
- // variables. As a result we only add this transform when there is at least one interstage
- // locations being used.
+ // being used in the next stage. Still, HLSL compiler register mismatch could happen, if
+ // there's builtin inputs used in the next stage. So we still run
+ // TruncateInterstageVariables transform.
// TruncateInterstageVariables itself will skip when interstage_locations matches exactly
// with the current stage output.