Update inter stage variable subsetting validation and add tests
Sync up with current WebGPU spec to allow FS input being a
subset of VS output instead of requiring a strict match.
This patch involves changing the validation and adding tests,
together with using the TruncateInterstageVariables for hlsl
generator to workaround the extra limit for D3D12 backend.
Bug: dawn:1493
Change-Id: I2d4ba7f43dbe57f17ecd5c5d659f4ca93bb682a3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/109460
Commit-Queue: Shrek Shao <shrekshao@google.com>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Auto-Submit: Shrek Shao <shrekshao@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/RenderPipeline.cpp b/src/dawn/native/RenderPipeline.cpp
index 033231b..baa6c85 100644
--- a/src/dawn/native/RenderPipeline.cpp
+++ b/src/dawn/native/RenderPipeline.cpp
@@ -382,12 +382,24 @@
const EntryPointMetadata& fragmentMetadata =
fragmentState.module->GetEntryPoint(fragmentState.entryPoint);
- // TODO(dawn:563): Can this message give more details?
- DAWN_INVALID_IF(
- vertexMetadata.usedInterStageVariables != fragmentMetadata.usedInterStageVariables,
- "One or more fragment inputs and vertex outputs are not one-to-one matching");
+ if (DAWN_UNLIKELY(
+ (vertexMetadata.usedInterStageVariables | fragmentMetadata.usedInterStageVariables) !=
+ vertexMetadata.usedInterStageVariables)) {
+ for (size_t i : IterateBitSet(fragmentMetadata.usedInterStageVariables)) {
+ if (!vertexMetadata.usedInterStageVariables.test(i)) {
+ return DAWN_VALIDATION_ERROR(
+ "The fragment input at location %u doesn't have a corresponding vertex output.",
+ i);
+ }
+ }
+ UNREACHABLE();
+ }
for (size_t i : IterateBitSet(vertexMetadata.usedInterStageVariables)) {
+ if (!fragmentMetadata.usedInterStageVariables.test(i)) {
+ // It is valid that fragment output is a subset of vertex input
+ continue;
+ }
const auto& vertexOutputInfo = vertexMetadata.interStageVariables[i];
const auto& fragmentInputInfo = fragmentMetadata.interStageVariables[i];
DAWN_INVALID_IF(
diff --git a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
index 80bc6fc..6c8a6fb 100644
--- a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
+++ b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
@@ -357,11 +357,20 @@
PerStage<CompiledShader> compiledShader;
+ std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables = nullptr;
+ if (GetStageMask() & wgpu::ShaderStage::Fragment) {
+ // Now that only fragment shader can have interstage inputs.
+ const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment);
+ auto entryPoint = programmableStage.module->GetEntryPoint(programmableStage.entryPoint);
+ usedInterstageVariables = &entryPoint.usedInterStageVariables;
+ }
+
for (auto stage : IterateStages(GetStageMask())) {
const ProgrammableStage& programmableStage = GetStage(stage);
- DAWN_TRY_ASSIGN(compiledShader[stage], ToBackend(programmableStage.module)
- ->Compile(programmableStage, stage,
- ToBackend(GetLayout()), compileFlags));
+ DAWN_TRY_ASSIGN(compiledShader[stage],
+ ToBackend(programmableStage.module)
+ ->Compile(programmableStage, stage, ToBackend(GetLayout()),
+ compileFlags, usedInterstageVariables));
*shaders[stage] = compiledShader[stage].GetD3D12ShaderBytecode();
}
diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp
index 18e50eb..4aa4d3c 100644
--- a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp
+++ b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp
@@ -90,6 +90,7 @@
X(tint::transform::BindingRemapper::BindingPoints, remappedBindingPoints) \
X(tint::transform::BindingRemapper::AccessControls, remappedAccessControls) \
X(std::optional<tint::transform::SubstituteOverride::Config>, substituteOverrideConfig) \
+ X(std::bitset<kMaxInterStageShaderVariables>, interstageLocations) \
X(LimitsForCompilationRequest, limits) \
X(bool, disableSymbolRenaming) \
X(bool, isRobustnessEnabled) \
@@ -392,6 +393,14 @@
// them as well. This would allow us to only upload root constants that are actually
// read by the shader.
options.array_length_from_uniform = r.arrayLengthFromUniform;
+
+ if (r.stage == SingleShaderStage::Vertex) {
+ // Now that only vertex shader can have interstage outputs.
+ // Pass in the actually used interstage locations for tint to potentially truncate unused
+ // outputs.
+ options.interstage_locations = r.interstageLocations;
+ }
+
TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate");
auto result = tint::writer::hlsl::Generate(&transformedProgram, options);
DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error);
@@ -456,10 +465,12 @@
return InitializeBase(parseResult, compilationMessages);
}
-ResultOrError<CompiledShader> ShaderModule::Compile(const ProgrammableStage& programmableStage,
- SingleShaderStage stage,
- const PipelineLayout* layout,
- uint32_t compileFlags) {
+ResultOrError<CompiledShader> ShaderModule::Compile(
+ const ProgrammableStage& programmableStage,
+ SingleShaderStage stage,
+ const PipelineLayout* layout,
+ uint32_t compileFlags,
+ const std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables) {
Device* device = ToBackend(GetDevice());
TRACE_EVENT0(device->GetPlatform(), General, "ShaderModuleD3D12::Compile");
ASSERT(!IsError());
@@ -475,6 +486,10 @@
req.hlsl.disableWorkgroupInit = device->IsToggleEnabled(Toggle::DisableWorkgroupInit);
req.hlsl.dumpShaders = device->IsToggleEnabled(Toggle::DumpShaders);
+ if (usedInterstageVariables) {
+ req.hlsl.interstageLocations = *usedInterstageVariables;
+ }
+
req.bytecode.hasShaderF16Feature = device->HasFeature(Feature::ShaderF16);
req.bytecode.compileFlags = compileFlags;
@@ -596,7 +611,6 @@
std::ostringstream dumpedMsg;
dumpedMsg << "/* Dumped generated HLSL */" << std::endl
<< compiledShader->hlslSource << std::endl;
- device->EmitLog(WGPULoggingType_Info, dumpedMsg.str().c_str());
if (device->IsToggleEnabled(Toggle::UseDXC)) {
dumpedMsg << "/* Dumped disassembled DXIL */" << std::endl;
diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.h b/src/dawn/native/d3d12/ShaderModuleD3D12.h
index 8c70bb4..f646117 100644
--- a/src/dawn/native/d3d12/ShaderModuleD3D12.h
+++ b/src/dawn/native/d3d12/ShaderModuleD3D12.h
@@ -52,10 +52,12 @@
ShaderModuleParseResult* parseResult,
OwnedCompilationMessages* compilationMessages);
- ResultOrError<CompiledShader> Compile(const ProgrammableStage& programmableStage,
- SingleShaderStage stage,
- const PipelineLayout* layout,
- uint32_t compileFlags);
+ ResultOrError<CompiledShader> Compile(
+ const ProgrammableStage& programmableStage,
+ SingleShaderStage stage,
+ const PipelineLayout* layout,
+ uint32_t compileFlags,
+ const std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables = nullptr);
private:
ShaderModule(Device* device, const ShaderModuleDescriptor* descriptor);
diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp
index 8909e6e..57f10b3 100644
--- a/src/dawn/tests/end2end/ShaderTests.cpp
+++ b/src/dawn/tests/end2end/ShaderTests.cpp
@@ -318,6 +318,237 @@
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
}
+// Tests that sparse input output locations should work properly.
+// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the
+// compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesSparse) {
+ std::string shader = R"(
+struct ShaderIO {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : vec4<f32>,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> ShaderIO {
+ var output : ShaderIO;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(input : ShaderIO) -> @location(0) vec4<f32> {
+ return input.attribute1;
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
+// Tests that interstage built-in inputs and outputs usage mismatch don't mess up with input-output
+// locations.
+// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the
+// compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesBuiltinsMismatched) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+struct FragmentIn {
+ @location(3) attribute3 : vec4<f32>,
+ @builtin(front_facing) front_facing : bool,
+ @location(1) attribute1 : f32,
+ @builtin(position) position : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> {
+ _ = input.front_facing;
+ _ = input.position.x;
+ return input.attribute3;
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
+// Tests that interstage inputs could be a prefix subset of the outputs.
+// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the
+// compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesPrefixSubset) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+struct FragmentIn {
+ @location(1) attribute1 : f32,
+ @builtin(position) position : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> {
+ _ = input.position.x;
+ return vec4<f32>(input.attribute1, 0.0, 0.0, 1.0);
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
+// Tests that interstage inputs could be a sparse non-prefix subset of the outputs.
+// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the
+// compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubset) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+struct FragmentIn {
+ @location(3) attribute3 : vec4<f32>,
+ @builtin(position) position : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> {
+ _ = input.position.x;
+ return input.attribute3;
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
+// Tests that interstage inputs could be a sparse non-prefix subset of the outputs, and that
+// fragment inputs are unused. This test is not in dawn_unittests/RenderPipelineValidationTests
+// because we want to test the compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubsetUnused) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+struct FragmentIn {
+ @location(3) attribute3 : vec4<f32>,
+ @builtin(position) position : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> {
+ return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
+// Tests that interstage inputs could be empty when outputs are not.
+// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the
+// compilation of the pipeline in D3D12 backend.
+TEST_P(ShaderTests, WGSLInterstageVariablesEmptySubset) {
+ std::string shader = R"(
+struct VertexOut {
+ @builtin(position) position : vec4<f32>,
+ @location(1) attribute1 : f32,
+ @location(3) attribute3 : vec4<f32>,
+}
+
+@vertex
+fn vertexMain() -> VertexOut {
+ var output : VertexOut;
+ output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ output.attribute1 = 1.0;
+ output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0);
+ return output;
+}
+
+@fragment
+fn fragmentMain() -> @location(0) vec4<f32> {
+ return vec4<f32>(0.0, 0.0, 0.0, 1.0);
+})";
+ wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str());
+
+ utils::ComboRenderPipelineDescriptor rpDesc;
+ rpDesc.vertex.module = shaderModule;
+ rpDesc.vertex.entryPoint = "vertexMain";
+ rpDesc.cFragment.module = shaderModule;
+ rpDesc.cFragment.entryPoint = "fragmentMain";
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc);
+}
+
// This is a regression test for an issue caused by the FirstIndexOffset transfrom being done before
// the BindingRemapper, causing an intermediate AST to be invalid (and fail the overall
// compilation).
@@ -949,8 +1180,195 @@
EXPECT_BUFFER_U32_EQ(2, buf, 0);
}
+// Test that when fragment input is a subset of the vertex output, the render pipeline should be
+// valid.
+TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutput) {
+ wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @location(1) var1: f32,
+ @location(3) @interpolate(flat) var3: u32,
+ @location(5) @interpolate(flat) var5: i32,
+ @location(7) var7: f32,
+ @location(9) @interpolate(flat) var9: u32,
+ @builtin(position) pos: vec4<f32>,
+}
+
+@vertex fn main(@builtin(vertex_index) VertexIndex : u32)
+ -> ShaderIO {
+ var pos = array<vec2<f32>, 3>(
+ vec2<f32>(-1.0, 3.0),
+ vec2<f32>(-1.0, -3.0),
+ vec2<f32>(3.0, 0.0));
+
+ var shaderIO: ShaderIO;
+ shaderIO.var1 = 0.0;
+ shaderIO.var3 = 1u;
+ shaderIO.var5 = -9;
+ shaderIO.var7 = 1.0;
+ shaderIO.var9 = 0u;
+ shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0);
+
+ return shaderIO;
+})");
+
+ wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @location(3) @interpolate(flat) var3: u32,
+ @location(7) var7: f32,
+}
+
+@fragment fn main(io: ShaderIO)
+ -> @location(0) vec4<f32> {
+ return vec4<f32>(f32(io.var3), io.var7, 1.0, 1.0);
+})");
+
+ utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+ utils::ComboRenderPipelineDescriptor descriptor;
+ descriptor.vertex.module = vsModule;
+ descriptor.cFragment.module = fsModule;
+ descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
+ descriptor.cTargets[0].format = renderPass.colorFormat;
+
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor);
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+ pass.SetPipeline(pipeline);
+ pass.Draw(3);
+ pass.End();
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(255, 255, 255, 255), renderPass.color, 0, 0);
+}
+
+// Test that when fragment input is a subset of the vertex output and the order of them is
+// different, the render pipeline should be valid.
+TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputWithDifferentOrder) {
+ wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @location(5) @align(16) var5: f32,
+ @location(1) var1: f32,
+ @location(2) var2: f32,
+ @location(3) @align(8) var3: f32,
+ @location(4) var4: vec4<f32>,
+ @builtin(position) pos: vec4<f32>,
+}
+
+@vertex fn main(@builtin(vertex_index) VertexIndex : u32)
+ -> ShaderIO {
+ var pos = array<vec2<f32>, 3>(
+ vec2<f32>(-1.0, 3.0),
+ vec2<f32>(-1.0, -3.0),
+ vec2<f32>(3.0, 0.0));
+
+ var shaderIO: ShaderIO;
+ shaderIO.var1 = 0.0;
+ shaderIO.var2 = 0.0;
+ shaderIO.var3 = 1.0;
+ shaderIO.var4 = vec4<f32>(0.4, 0.4, 0.4, 0.4);
+ shaderIO.var5 = 1.0;
+ shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0);
+
+ return shaderIO;
+})");
+
+ wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @location(4) var4: vec4<f32>,
+ @location(1) var1: f32,
+ @location(5) @align(16) var5: f32,
+}
+
+@fragment fn main(io: ShaderIO)
+ -> @location(0) vec4<f32> {
+ return vec4<f32>(io.var1, io.var5, io.var4.x, 1.0);
+})");
+
+ utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+ utils::ComboRenderPipelineDescriptor descriptor;
+ descriptor.vertex.module = vsModule;
+ descriptor.cFragment.module = fsModule;
+ descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
+ descriptor.cTargets[0].format = renderPass.colorFormat;
+
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor);
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+ pass.SetPipeline(pipeline);
+ pass.Draw(3);
+ pass.End();
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0);
+}
+
+// Test that when fragment input is a subset of the vertex output and that when the builtin
+// interstage variables may mess up with the order, the render pipeline should be valid.
+TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputBuiltinOrder) {
+ wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @location(1) var1: f32,
+ @builtin(position) pos: vec4<f32>,
+ @location(8) var8: vec3<f32>,
+ @location(7) var7: f32,
+}
+
+@vertex fn main(@builtin(vertex_index) VertexIndex : u32)
+ -> ShaderIO {
+ var pos = array<vec2<f32>, 3>(
+ vec2<f32>(-1.0, 3.0),
+ vec2<f32>(-1.0, -3.0),
+ vec2<f32>(3.0, 0.0));
+
+ var shaderIO: ShaderIO;
+ shaderIO.var1 = 0.0;
+ shaderIO.var7 = 1.0;
+ shaderIO.var8 = vec3<f32>(1.0, 0.4, 0.0);
+ shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0);
+
+ return shaderIO;
+})");
+
+ wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
+struct ShaderIO {
+ @builtin(position) pos: vec4<f32>,
+ @location(7) var7: f32,
+}
+
+@fragment fn main(io: ShaderIO)
+ -> @location(0) vec4<f32> {
+ return vec4<f32>(0.0, io.var7, 0.4, 1.0);
+})");
+
+ utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1);
+
+ utils::ComboRenderPipelineDescriptor descriptor;
+ descriptor.vertex.module = vsModule;
+ descriptor.cFragment.module = fsModule;
+ descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList;
+ descriptor.cTargets[0].format = renderPass.colorFormat;
+
+ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor);
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
+ pass.SetPipeline(pipeline);
+ pass.Draw(3);
+ pass.End();
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+
+ EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0);
+}
+
DAWN_INSTANTIATE_TEST(ShaderTests,
D3D12Backend(),
+ D3D12Backend({"use_dxc"}),
MetalBackend(),
OpenGLBackend(),
OpenGLESBackend(),
diff --git a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp
index 5271228..675b1d9 100644
--- a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp
+++ b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp
@@ -1390,8 +1390,7 @@
}
};
-// Tests that creating render pipeline should fail when there is a vertex output that doesn't have
-// its corresponding fragment input at the same location, and there is a fragment input that
+// Tests that creating render pipeline should fail when there is a fragment input that
// doesn't have its corresponding vertex output at the same location.
TEST_F(InterStageVariableMatchingValidationTest, MissingDeclarationAtSameLocation) {
wgpu::ShaderModule vertexModuleOutputAtLocation0 = utils::CreateShaderModule(device, R"(
@@ -1430,7 +1429,10 @@
})");
{
- CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, false);
+ // It is okay if the fragment output is a subset of the vertex input.
+ CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, true);
+ }
+ {
CheckCreatingRenderPipeline(vsModule, fragmentModuleAtLocation0, false);
CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fragmentModuleInputAtLocation1,
false);
diff --git a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
index 63aafac..4d788e3 100644
--- a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
+++ b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
@@ -278,10 +278,13 @@
}
if (success) {
- ASSERT_DEVICE_ERROR(
- device.CreateRenderPipeline(&pDesc),
- testing::HasSubstr(
- "One or more fragment inputs and vertex outputs are not one-to-one matching"));
+ if (failingShaderStage == wgpu::ShaderStage::Vertex) {
+ // It is allowed that fragment inputs are a subset of the vertex output variables.
+ device.CreateRenderPipeline(&pDesc);
+ } else {
+ ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
+ testing::HasSubstr("The fragment input at location"));
+ }
} else {
ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
testing::HasSubstr(errorMatcher));
@@ -401,10 +404,13 @@
}
if (success) {
- ASSERT_DEVICE_ERROR(
- device.CreateRenderPipeline(&pDesc),
- testing::HasSubstr(
- "One or more fragment inputs and vertex outputs are not one-to-one matching"));
+ if (failingShaderStage == wgpu::ShaderStage::Vertex) {
+ // It is allowed that fragment inputs are a subset of the vertex output variables.
+ device.CreateRenderPipeline(&pDesc);
+ } else {
+ ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
+ testing::HasSubstr("The fragment input at location"));
+ }
} else {
ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc),
testing::HasSubstr(errorMatcher));
diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h
index 9974cde..c624943 100644
--- a/src/tint/writer/hlsl/generator.h
+++ b/src/tint/writer/hlsl/generator.h
@@ -15,6 +15,7 @@
#ifndef SRC_TINT_WRITER_HLSL_GENERATOR_H_
#define SRC_TINT_WRITER_HLSL_GENERATOR_H_
+#include <bitset>
#include <memory>
#include <optional>
#include <string>
@@ -25,6 +26,7 @@
#include "src/tint/ast/pipeline_stage.h"
#include "src/tint/reflection.h"
#include "src/tint/sem/binding_point.h"
+#include "src/tint/utils/bitset.h"
#include "src/tint/writer/array_length_from_uniform_options.h"
#include "src/tint/writer/text.h"
@@ -56,6 +58,9 @@
/// Options used to specify a mapping of binding points to indices into a UBO
/// from which to load buffer sizes.
ArrayLengthFromUniformOptions array_length_from_uniform = {};
+ /// Interstage locations actually used as inputs in the next stage of the pipeline.
+ /// This is potentially used for truncating unused interstage outputs at current shader stage.
+ std::bitset<16> interstage_locations;
/// Reflect the fields of this class so that it can be used by tint::ForeachField()
TINT_REFLECT(root_constant_binding_point,
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc
index 9cfe5d7..e3abc51 100644
--- a/src/tint/writer/hlsl/generator_impl.cc
+++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -64,6 +64,7 @@
#include "src/tint/transform/remove_continue_in_switch.h"
#include "src/tint/transform/remove_phonies.h"
#include "src/tint/transform/simplify_pointers.h"
+#include "src/tint/transform/truncate_interstage_variables.h"
#include "src/tint/transform/unshadow.h"
#include "src/tint/transform/vectorize_scalar_matrix_initializers.h"
#include "src/tint/transform/zero_init_workgroup_memory.h"
@@ -210,6 +211,27 @@
manager.Add<transform::ZeroInitWorkgroupMemory>();
}
manager.Add<transform::CanonicalizeEntryPointIO>();
+
+ if (options.interstage_locations.any()) {
+ // When interstage_locations is empty, it means there's no user-defined interstage variables
+ // being used in the next stage. This is treated as a special case.
+ // TruncateInterstageVariables transform is trying to solve the HLSL compiler register
+ // mismatch issue. So it is not needed if no register is assigned to any interstage
+ // variables. As a result we only add this transform when there is at least one interstage
+ // locations being used.
+
+ // TruncateInterstageVariables itself will skip when interstage_locations matches exactly
+ // with the current stage output.
+
+ // Build the config for internal TruncateInterstageVariables transform.
+ transform::TruncateInterstageVariables::Config truncate_interstage_variables_cfg;
+ truncate_interstage_variables_cfg.interstage_locations =
+ std::move(options.interstage_locations);
+ manager.Add<transform::TruncateInterstageVariables>();
+ data.Add<transform::TruncateInterstageVariables::Config>(
+ std::move(truncate_interstage_variables_cfg));
+ }
+
// NumWorkgroupsFromUniform must come after CanonicalizeEntryPointIO, as it
// assumes that num_workgroups builtins only appear as struct members and are
// only accessed directly via member accessors.