Update inter stage variable subsetting validation and add tests Sync up with current WebGPU spec to allow FS input being a subset of VS output instead of requiring a strict match. This patch involves changing the validation and adding tests, together with using the TruncateInterstageVariables for hlsl generator to workaround the extra limit for D3D12 backend. Bug: dawn:1493 Change-Id: I2d4ba7f43dbe57f17ecd5c5d659f4ca93bb682a3 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/109460 Commit-Queue: Shrek Shao <shrekshao@google.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org> Auto-Submit: Shrek Shao <shrekshao@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
diff --git a/src/dawn/native/RenderPipeline.cpp b/src/dawn/native/RenderPipeline.cpp index 033231b..baa6c85 100644 --- a/src/dawn/native/RenderPipeline.cpp +++ b/src/dawn/native/RenderPipeline.cpp
@@ -382,12 +382,24 @@ const EntryPointMetadata& fragmentMetadata = fragmentState.module->GetEntryPoint(fragmentState.entryPoint); - // TODO(dawn:563): Can this message give more details? - DAWN_INVALID_IF( - vertexMetadata.usedInterStageVariables != fragmentMetadata.usedInterStageVariables, - "One or more fragment inputs and vertex outputs are not one-to-one matching"); + if (DAWN_UNLIKELY( + (vertexMetadata.usedInterStageVariables | fragmentMetadata.usedInterStageVariables) != + vertexMetadata.usedInterStageVariables)) { + for (size_t i : IterateBitSet(fragmentMetadata.usedInterStageVariables)) { + if (!vertexMetadata.usedInterStageVariables.test(i)) { + return DAWN_VALIDATION_ERROR( + "The fragment input at location %u doesn't have a corresponding vertex output.", + i); + } + } + UNREACHABLE(); + } for (size_t i : IterateBitSet(vertexMetadata.usedInterStageVariables)) { + if (!fragmentMetadata.usedInterStageVariables.test(i)) { + // It is valid that fragment output is a subset of vertex input + continue; + } const auto& vertexOutputInfo = vertexMetadata.interStageVariables[i]; const auto& fragmentInputInfo = fragmentMetadata.interStageVariables[i]; DAWN_INVALID_IF(
diff --git a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp index 80bc6fc..6c8a6fb 100644 --- a/src/dawn/native/d3d12/RenderPipelineD3D12.cpp +++ b/src/dawn/native/d3d12/RenderPipelineD3D12.cpp
@@ -357,11 +357,20 @@ PerStage<CompiledShader> compiledShader; + std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables = nullptr; + if (GetStageMask() & wgpu::ShaderStage::Fragment) { + // Now that only fragment shader can have interstage inputs. + const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment); + auto entryPoint = programmableStage.module->GetEntryPoint(programmableStage.entryPoint); + usedInterstageVariables = &entryPoint.usedInterStageVariables; + } + for (auto stage : IterateStages(GetStageMask())) { const ProgrammableStage& programmableStage = GetStage(stage); - DAWN_TRY_ASSIGN(compiledShader[stage], ToBackend(programmableStage.module) - ->Compile(programmableStage, stage, - ToBackend(GetLayout()), compileFlags)); + DAWN_TRY_ASSIGN(compiledShader[stage], + ToBackend(programmableStage.module) + ->Compile(programmableStage, stage, ToBackend(GetLayout()), + compileFlags, usedInterstageVariables)); *shaders[stage] = compiledShader[stage].GetD3D12ShaderBytecode(); }
diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp index 18e50eb..4aa4d3c 100644 --- a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp +++ b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp
@@ -90,6 +90,7 @@ X(tint::transform::BindingRemapper::BindingPoints, remappedBindingPoints) \ X(tint::transform::BindingRemapper::AccessControls, remappedAccessControls) \ X(std::optional<tint::transform::SubstituteOverride::Config>, substituteOverrideConfig) \ + X(std::bitset<kMaxInterStageShaderVariables>, interstageLocations) \ X(LimitsForCompilationRequest, limits) \ X(bool, disableSymbolRenaming) \ X(bool, isRobustnessEnabled) \ @@ -392,6 +393,14 @@ // them as well. This would allow us to only upload root constants that are actually // read by the shader. options.array_length_from_uniform = r.arrayLengthFromUniform; + + if (r.stage == SingleShaderStage::Vertex) { + // Now that only vertex shader can have interstage outputs. + // Pass in the actually used interstage locations for tint to potentially truncate unused + // outputs. + options.interstage_locations = r.interstageLocations; + } + TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate"); auto result = tint::writer::hlsl::Generate(&transformedProgram, options); DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error); @@ -456,10 +465,12 @@ return InitializeBase(parseResult, compilationMessages); } -ResultOrError<CompiledShader> ShaderModule::Compile(const ProgrammableStage& programmableStage, - SingleShaderStage stage, - const PipelineLayout* layout, - uint32_t compileFlags) { +ResultOrError<CompiledShader> ShaderModule::Compile( + const ProgrammableStage& programmableStage, + SingleShaderStage stage, + const PipelineLayout* layout, + uint32_t compileFlags, + const std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables) { Device* device = ToBackend(GetDevice()); TRACE_EVENT0(device->GetPlatform(), General, "ShaderModuleD3D12::Compile"); ASSERT(!IsError()); @@ -475,6 +486,10 @@ req.hlsl.disableWorkgroupInit = device->IsToggleEnabled(Toggle::DisableWorkgroupInit); req.hlsl.dumpShaders = device->IsToggleEnabled(Toggle::DumpShaders); + if (usedInterstageVariables) { + req.hlsl.interstageLocations = *usedInterstageVariables; + } + req.bytecode.hasShaderF16Feature = device->HasFeature(Feature::ShaderF16); req.bytecode.compileFlags = compileFlags; @@ -596,7 +611,6 @@ std::ostringstream dumpedMsg; dumpedMsg << "/* Dumped generated HLSL */" << std::endl << compiledShader->hlslSource << std::endl; - device->EmitLog(WGPULoggingType_Info, dumpedMsg.str().c_str()); if (device->IsToggleEnabled(Toggle::UseDXC)) { dumpedMsg << "/* Dumped disassembled DXIL */" << std::endl;
diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.h b/src/dawn/native/d3d12/ShaderModuleD3D12.h index 8c70bb4..f646117 100644 --- a/src/dawn/native/d3d12/ShaderModuleD3D12.h +++ b/src/dawn/native/d3d12/ShaderModuleD3D12.h
@@ -52,10 +52,12 @@ ShaderModuleParseResult* parseResult, OwnedCompilationMessages* compilationMessages); - ResultOrError<CompiledShader> Compile(const ProgrammableStage& programmableStage, - SingleShaderStage stage, - const PipelineLayout* layout, - uint32_t compileFlags); + ResultOrError<CompiledShader> Compile( + const ProgrammableStage& programmableStage, + SingleShaderStage stage, + const PipelineLayout* layout, + uint32_t compileFlags, + const std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables = nullptr); private: ShaderModule(Device* device, const ShaderModuleDescriptor* descriptor);
diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp index 8909e6e..57f10b3 100644 --- a/src/dawn/tests/end2end/ShaderTests.cpp +++ b/src/dawn/tests/end2end/ShaderTests.cpp
@@ -318,6 +318,237 @@ wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); } +// Tests that sparse input output locations should work properly. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparse) { + std::string shader = R"( +struct ShaderIO { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : vec4<f32>, + @location(3) attribute3 : vec4<f32>, +} + +@vertex +fn vertexMain() -> ShaderIO { + var output : ShaderIO; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : ShaderIO) -> @location(0) vec4<f32> { + return input.attribute1; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage built-in inputs and outputs usage mismatch don't mess up with input-output +// locations. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesBuiltinsMismatched) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4<f32>, +} + +struct FragmentIn { + @location(3) attribute3 : vec4<f32>, + @builtin(front_facing) front_facing : bool, + @location(1) attribute1 : f32, + @builtin(position) position : vec4<f32>, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> { + _ = input.front_facing; + _ = input.position.x; + return input.attribute3; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a prefix subset of the outputs. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesPrefixSubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4<f32>, +} + +struct FragmentIn { + @location(1) attribute1 : f32, + @builtin(position) position : vec4<f32>, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> { + _ = input.position.x; + return vec4<f32>(input.attribute1, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a sparse non-prefix subset of the outputs. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4<f32>, +} + +struct FragmentIn { + @location(3) attribute3 : vec4<f32>, + @builtin(position) position : vec4<f32>, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> { + _ = input.position.x; + return input.attribute3; +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be a sparse non-prefix subset of the outputs, and that +// fragment inputs are unused. This test is not in dawn_unittests/RenderPipelineValidationTests +// because we want to test the compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesSparseSubsetUnused) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4<f32>, +} + +struct FragmentIn { + @location(3) attribute3 : vec4<f32>, + @builtin(position) position : vec4<f32>, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain(input : FragmentIn) -> @location(0) vec4<f32> { + return vec4<f32>(0.0, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + +// Tests that interstage inputs could be empty when outputs are not. +// This test is not in dawn_unittests/RenderPipelineValidationTests because we want to test the +// compilation of the pipeline in D3D12 backend. +TEST_P(ShaderTests, WGSLInterstageVariablesEmptySubset) { + std::string shader = R"( +struct VertexOut { + @builtin(position) position : vec4<f32>, + @location(1) attribute1 : f32, + @location(3) attribute3 : vec4<f32>, +} + +@vertex +fn vertexMain() -> VertexOut { + var output : VertexOut; + output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0); + output.attribute1 = 1.0; + output.attribute3 = vec4<f32>(0.0, 0.0, 0.0, 1.0); + return output; +} + +@fragment +fn fragmentMain() -> @location(0) vec4<f32> { + return vec4<f32>(0.0, 0.0, 0.0, 1.0); +})"; + wgpu::ShaderModule shaderModule = utils::CreateShaderModule(device, shader.c_str()); + + utils::ComboRenderPipelineDescriptor rpDesc; + rpDesc.vertex.module = shaderModule; + rpDesc.vertex.entryPoint = "vertexMain"; + rpDesc.cFragment.module = shaderModule; + rpDesc.cFragment.entryPoint = "fragmentMain"; + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&rpDesc); +} + // This is a regression test for an issue caused by the FirstIndexOffset transfrom being done before // the BindingRemapper, causing an intermediate AST to be invalid (and fail the overall // compilation). @@ -949,8 +1180,195 @@ EXPECT_BUFFER_U32_EQ(2, buf, 0); } +// Test that when fragment input is a subset of the vertex output, the render pipeline should be +// valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutput) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(1) var1: f32, + @location(3) @interpolate(flat) var3: u32, + @location(5) @interpolate(flat) var5: i32, + @location(7) var7: f32, + @location(9) @interpolate(flat) var9: u32, + @builtin(position) pos: vec4<f32>, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, 3.0), + vec2<f32>(-1.0, -3.0), + vec2<f32>(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var3 = 1u; + shaderIO.var5 = -9; + shaderIO.var7 = 1.0; + shaderIO.var9 = 0u; + shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(3) @interpolate(flat) var3: u32, + @location(7) var7: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4<f32> { + return vec4<f32>(f32(io.var3), io.var7, 1.0, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(255, 255, 255, 255), renderPass.color, 0, 0); +} + +// Test that when fragment input is a subset of the vertex output and the order of them is +// different, the render pipeline should be valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputWithDifferentOrder) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(5) @align(16) var5: f32, + @location(1) var1: f32, + @location(2) var2: f32, + @location(3) @align(8) var3: f32, + @location(4) var4: vec4<f32>, + @builtin(position) pos: vec4<f32>, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, 3.0), + vec2<f32>(-1.0, -3.0), + vec2<f32>(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var2 = 0.0; + shaderIO.var3 = 1.0; + shaderIO.var4 = vec4<f32>(0.4, 0.4, 0.4, 0.4); + shaderIO.var5 = 1.0; + shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(4) var4: vec4<f32>, + @location(1) var1: f32, + @location(5) @align(16) var5: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4<f32> { + return vec4<f32>(io.var1, io.var5, io.var4.x, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0); +} + +// Test that when fragment input is a subset of the vertex output and that when the builtin +// interstage variables may mess up with the order, the render pipeline should be valid. +TEST_P(ShaderTests, FragmentInputIsSubsetOfVertexOutputBuiltinOrder) { + wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @location(1) var1: f32, + @builtin(position) pos: vec4<f32>, + @location(8) var8: vec3<f32>, + @location(7) var7: f32, +} + +@vertex fn main(@builtin(vertex_index) VertexIndex : u32) + -> ShaderIO { + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, 3.0), + vec2<f32>(-1.0, -3.0), + vec2<f32>(3.0, 0.0)); + + var shaderIO: ShaderIO; + shaderIO.var1 = 0.0; + shaderIO.var7 = 1.0; + shaderIO.var8 = vec3<f32>(1.0, 0.4, 0.0); + shaderIO.pos = vec4<f32>(pos[VertexIndex], 0.0, 1.0); + + return shaderIO; +})"); + + wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"( +struct ShaderIO { + @builtin(position) pos: vec4<f32>, + @location(7) var7: f32, +} + +@fragment fn main(io: ShaderIO) + -> @location(0) vec4<f32> { + return vec4<f32>(0.0, io.var7, 0.4, 1.0); +})"); + + utils::BasicRenderPass renderPass = utils::CreateBasicRenderPass(device, 1, 1); + + utils::ComboRenderPipelineDescriptor descriptor; + descriptor.vertex.module = vsModule; + descriptor.cFragment.module = fsModule; + descriptor.primitive.topology = wgpu::PrimitiveTopology::TriangleList; + descriptor.cTargets[0].format = renderPass.colorFormat; + + wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&descriptor); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo); + pass.SetPipeline(pipeline); + pass.Draw(3); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + EXPECT_PIXEL_RGBA8_EQ(utils::RGBA8(0, 255, 102, 255), renderPass.color, 0, 0); +} + DAWN_INSTANTIATE_TEST(ShaderTests, D3D12Backend(), + D3D12Backend({"use_dxc"}), MetalBackend(), OpenGLBackend(), OpenGLESBackend(),
diff --git a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp index 5271228..675b1d9 100644 --- a/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/RenderPipelineValidationTests.cpp
@@ -1390,8 +1390,7 @@ } }; -// Tests that creating render pipeline should fail when there is a vertex output that doesn't have -// its corresponding fragment input at the same location, and there is a fragment input that +// Tests that creating render pipeline should fail when there is a fragment input that // doesn't have its corresponding vertex output at the same location. TEST_F(InterStageVariableMatchingValidationTest, MissingDeclarationAtSameLocation) { wgpu::ShaderModule vertexModuleOutputAtLocation0 = utils::CreateShaderModule(device, R"( @@ -1430,7 +1429,10 @@ })"); { - CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, false); + // It is okay if the fragment output is a subset of the vertex input. + CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fsModule, true); + } + { CheckCreatingRenderPipeline(vsModule, fragmentModuleAtLocation0, false); CheckCreatingRenderPipeline(vertexModuleOutputAtLocation0, fragmentModuleInputAtLocation1, false);
diff --git a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp index 63aafac..4d788e3 100644 --- a/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp +++ b/src/dawn/tests/unittests/validation/ShaderModuleValidationTests.cpp
@@ -278,10 +278,13 @@ } if (success) { - ASSERT_DEVICE_ERROR( - device.CreateRenderPipeline(&pDesc), - testing::HasSubstr( - "One or more fragment inputs and vertex outputs are not one-to-one matching")); + if (failingShaderStage == wgpu::ShaderStage::Vertex) { + // It is allowed that fragment inputs are a subset of the vertex output variables. + device.CreateRenderPipeline(&pDesc); + } else { + ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), + testing::HasSubstr("The fragment input at location")); + } } else { ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), testing::HasSubstr(errorMatcher)); @@ -401,10 +404,13 @@ } if (success) { - ASSERT_DEVICE_ERROR( - device.CreateRenderPipeline(&pDesc), - testing::HasSubstr( - "One or more fragment inputs and vertex outputs are not one-to-one matching")); + if (failingShaderStage == wgpu::ShaderStage::Vertex) { + // It is allowed that fragment inputs are a subset of the vertex output variables. + device.CreateRenderPipeline(&pDesc); + } else { + ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), + testing::HasSubstr("The fragment input at location")); + } } else { ASSERT_DEVICE_ERROR(device.CreateRenderPipeline(&pDesc), testing::HasSubstr(errorMatcher));
diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h index 9974cde..c624943 100644 --- a/src/tint/writer/hlsl/generator.h +++ b/src/tint/writer/hlsl/generator.h
@@ -15,6 +15,7 @@ #ifndef SRC_TINT_WRITER_HLSL_GENERATOR_H_ #define SRC_TINT_WRITER_HLSL_GENERATOR_H_ +#include <bitset> #include <memory> #include <optional> #include <string> @@ -25,6 +26,7 @@ #include "src/tint/ast/pipeline_stage.h" #include "src/tint/reflection.h" #include "src/tint/sem/binding_point.h" +#include "src/tint/utils/bitset.h" #include "src/tint/writer/array_length_from_uniform_options.h" #include "src/tint/writer/text.h" @@ -56,6 +58,9 @@ /// Options used to specify a mapping of binding points to indices into a UBO /// from which to load buffer sizes. ArrayLengthFromUniformOptions array_length_from_uniform = {}; + /// Interstage locations actually used as inputs in the next stage of the pipeline. + /// This is potentially used for truncating unused interstage outputs at current shader stage. + std::bitset<16> interstage_locations; /// Reflect the fields of this class so that it can be used by tint::ForeachField() TINT_REFLECT(root_constant_binding_point,
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc index 9cfe5d7..e3abc51 100644 --- a/src/tint/writer/hlsl/generator_impl.cc +++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -64,6 +64,7 @@ #include "src/tint/transform/remove_continue_in_switch.h" #include "src/tint/transform/remove_phonies.h" #include "src/tint/transform/simplify_pointers.h" +#include "src/tint/transform/truncate_interstage_variables.h" #include "src/tint/transform/unshadow.h" #include "src/tint/transform/vectorize_scalar_matrix_initializers.h" #include "src/tint/transform/zero_init_workgroup_memory.h" @@ -210,6 +211,27 @@ manager.Add<transform::ZeroInitWorkgroupMemory>(); } manager.Add<transform::CanonicalizeEntryPointIO>(); + + if (options.interstage_locations.any()) { + // When interstage_locations is empty, it means there's no user-defined interstage variables + // being used in the next stage. This is treated as a special case. + // TruncateInterstageVariables transform is trying to solve the HLSL compiler register + // mismatch issue. So it is not needed if no register is assigned to any interstage + // variables. As a result we only add this transform when there is at least one interstage + // locations being used. + + // TruncateInterstageVariables itself will skip when interstage_locations matches exactly + // with the current stage output. + + // Build the config for internal TruncateInterstageVariables transform. + transform::TruncateInterstageVariables::Config truncate_interstage_variables_cfg; + truncate_interstage_variables_cfg.interstage_locations = + std::move(options.interstage_locations); + manager.Add<transform::TruncateInterstageVariables>(); + data.Add<transform::TruncateInterstageVariables::Config>( + std::move(truncate_interstage_variables_cfg)); + } + // NumWorkgroupsFromUniform must come after CanonicalizeEntryPointIO, as it // assumes that num_workgroups builtins only appear as struct members and are // only accessed directly via member accessors.