blob: 9b8badf4658c90b54738e025c89a051fbe579484 [file] [log] [blame] [edit]
// Copyright 2023 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "dawn/native/d3d11/RenderPipelineD3D11.h"
#include <d3dcompiler.h>
#include <array>
#include <memory>
#include <utility>
#include "dawn/native/CreatePipelineAsyncTask.h"
#include "dawn/native/d3d/D3DError.h"
#include "dawn/native/d3d/ShaderUtils.h"
#include "dawn/native/d3d11/DeviceD3D11.h"
#include "dawn/native/d3d11/Forward.h"
#include "dawn/native/d3d11/PipelineLayoutD3D11.h"
#include "dawn/native/d3d11/ShaderModuleD3D11.h"
#include "dawn/native/d3d11/UtilsD3D11.h"
namespace dawn::native::d3d11 {
namespace {
D3D11_INPUT_CLASSIFICATION VertexStepModeFunction(wgpu::VertexStepMode mode) {
switch (mode) {
case wgpu::VertexStepMode::Vertex:
return D3D11_INPUT_PER_VERTEX_DATA;
case wgpu::VertexStepMode::Instance:
return D3D11_INPUT_PER_INSTANCE_DATA;
case wgpu::VertexStepMode::VertexBufferNotUsed:
UNREACHABLE();
}
}
D3D_PRIMITIVE_TOPOLOGY D3DPrimitiveTopology(wgpu::PrimitiveTopology topology) {
switch (topology) {
case wgpu::PrimitiveTopology::PointList:
return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
case wgpu::PrimitiveTopology::LineList:
return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
case wgpu::PrimitiveTopology::LineStrip:
return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
case wgpu::PrimitiveTopology::TriangleList:
return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
case wgpu::PrimitiveTopology::TriangleStrip:
return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
default:
UNREACHABLE();
}
}
D3D11_CULL_MODE D3DCullMode(wgpu::CullMode cullMode) {
switch (cullMode) {
case wgpu::CullMode::None:
return D3D11_CULL_NONE;
case wgpu::CullMode::Front:
return D3D11_CULL_FRONT;
case wgpu::CullMode::Back:
return D3D11_CULL_BACK;
default:
UNREACHABLE();
}
}
D3D11_BLEND D3DBlendFactor(wgpu::BlendFactor blendFactor) {
switch (blendFactor) {
case wgpu::BlendFactor::Zero:
return D3D11_BLEND_ZERO;
case wgpu::BlendFactor::One:
return D3D11_BLEND_ONE;
case wgpu::BlendFactor::Src:
return D3D11_BLEND_SRC_COLOR;
case wgpu::BlendFactor::OneMinusSrc:
return D3D11_BLEND_INV_SRC_COLOR;
case wgpu::BlendFactor::SrcAlpha:
return D3D11_BLEND_SRC_ALPHA;
case wgpu::BlendFactor::OneMinusSrcAlpha:
return D3D11_BLEND_INV_SRC_ALPHA;
case wgpu::BlendFactor::Dst:
return D3D11_BLEND_DEST_COLOR;
case wgpu::BlendFactor::OneMinusDst:
return D3D11_BLEND_INV_DEST_COLOR;
case wgpu::BlendFactor::DstAlpha:
return D3D11_BLEND_DEST_ALPHA;
case wgpu::BlendFactor::OneMinusDstAlpha:
return D3D11_BLEND_INV_DEST_ALPHA;
case wgpu::BlendFactor::SrcAlphaSaturated:
return D3D11_BLEND_SRC_ALPHA_SAT;
case wgpu::BlendFactor::Constant:
return D3D11_BLEND_BLEND_FACTOR;
case wgpu::BlendFactor::OneMinusConstant:
return D3D11_BLEND_INV_BLEND_FACTOR;
case wgpu::BlendFactor::Src1:
case wgpu::BlendFactor::OneMinusSrc1:
case wgpu::BlendFactor::Src1Alpha:
case wgpu::BlendFactor::OneMinusSrc1Alpha:
default:
UNREACHABLE();
}
}
// When a blend factor is defined for the alpha channel, any of the factors that don't
// explicitly state that they apply to alpha should be treated as their explicitly-alpha
// equivalents. See: https://github.com/gpuweb/gpuweb/issues/65
D3D11_BLEND D3DBlendAlphaFactor(wgpu::BlendFactor factor) {
switch (factor) {
case wgpu::BlendFactor::Src:
return D3D11_BLEND_SRC_ALPHA;
case wgpu::BlendFactor::OneMinusSrc:
return D3D11_BLEND_INV_SRC_ALPHA;
case wgpu::BlendFactor::Dst:
return D3D11_BLEND_DEST_ALPHA;
case wgpu::BlendFactor::OneMinusDst:
return D3D11_BLEND_INV_DEST_ALPHA;
// Other blend factors translate to the same D3D11 enum as the color blend factors.
default:
return D3DBlendFactor(factor);
}
}
D3D11_BLEND_OP D3DBlendOperation(wgpu::BlendOperation blendOperation) {
switch (blendOperation) {
case wgpu::BlendOperation::Add:
return D3D11_BLEND_OP_ADD;
case wgpu::BlendOperation::Subtract:
return D3D11_BLEND_OP_SUBTRACT;
case wgpu::BlendOperation::ReverseSubtract:
return D3D11_BLEND_OP_REV_SUBTRACT;
case wgpu::BlendOperation::Min:
return D3D11_BLEND_OP_MIN;
case wgpu::BlendOperation::Max:
return D3D11_BLEND_OP_MAX;
default:
UNREACHABLE();
}
}
UINT D3DColorWriteMask(wgpu::ColorWriteMask colorWriteMask) {
static_assert(static_cast<UINT>(wgpu::ColorWriteMask::Red) == D3D11_COLOR_WRITE_ENABLE_RED);
static_assert(static_cast<UINT>(wgpu::ColorWriteMask::Green) == D3D11_COLOR_WRITE_ENABLE_GREEN);
static_assert(static_cast<UINT>(wgpu::ColorWriteMask::Blue) == D3D11_COLOR_WRITE_ENABLE_BLUE);
static_assert(static_cast<UINT>(wgpu::ColorWriteMask::Alpha) == D3D11_COLOR_WRITE_ENABLE_ALPHA);
return static_cast<UINT>(colorWriteMask);
}
D3D11_STENCIL_OP StencilOp(wgpu::StencilOperation op) {
switch (op) {
case wgpu::StencilOperation::Keep:
return D3D11_STENCIL_OP_KEEP;
case wgpu::StencilOperation::Zero:
return D3D11_STENCIL_OP_ZERO;
case wgpu::StencilOperation::Replace:
return D3D11_STENCIL_OP_REPLACE;
case wgpu::StencilOperation::IncrementClamp:
return D3D11_STENCIL_OP_INCR_SAT;
case wgpu::StencilOperation::DecrementClamp:
return D3D11_STENCIL_OP_DECR_SAT;
case wgpu::StencilOperation::Invert:
return D3D11_STENCIL_OP_INVERT;
case wgpu::StencilOperation::IncrementWrap:
return D3D11_STENCIL_OP_INCR;
case wgpu::StencilOperation::DecrementWrap:
return D3D11_STENCIL_OP_DECR;
}
}
D3D11_DEPTH_STENCILOP_DESC StencilOpDesc(const StencilFaceState& descriptor) {
D3D11_DEPTH_STENCILOP_DESC desc = {};
desc.StencilFailOp = StencilOp(descriptor.failOp);
desc.StencilDepthFailOp = StencilOp(descriptor.depthFailOp);
desc.StencilPassOp = StencilOp(descriptor.passOp);
desc.StencilFunc = ToD3D11ComparisonFunc(descriptor.compare);
return desc;
}
} // namespace
// static
Ref<RenderPipeline> RenderPipeline::CreateUninitialized(
Device* device,
const RenderPipelineDescriptor* descriptor) {
return AcquireRef(new RenderPipeline(device, descriptor));
}
RenderPipeline::RenderPipeline(Device* device, const RenderPipelineDescriptor* descriptor)
: RenderPipelineBase(device, descriptor),
mD3DPrimitiveTopology(D3DPrimitiveTopology(GetPrimitiveTopology())) {}
MaybeError RenderPipeline::Initialize() {
DAWN_TRY(InitializeRasterizerState());
DAWN_TRY(InitializeBlendState());
DAWN_TRY(InitializeShaders());
DAWN_TRY(InitializeDepthStencilState());
// RTVs and UAVs share the same resoure slots. Make sure here we are not going to run out of
// slots.
uint32_t colorAttachments =
static_cast<uint8_t>(GetHighestBitIndexPlusOne(GetColorAttachmentsMask()));
uint32_t unusedUAVs = ToBackend(GetLayout())->GetUnusedUAVBindingCount();
uint32_t usedUAVs = ToBackend(GetLayout())->GetTotalUAVBindingCount() - unusedUAVs;
// TODO(dawn:1814): Move the validation to the frontend, if we eventually regard it as a compat
// restriction.
DAWN_INVALID_IF(colorAttachments > unusedUAVs,
"The pipeline uses up to color attachment %u, but there are only %u remaining "
"slots because the pipeline uses %u UAVs",
colorAttachments, unusedUAVs, usedUAVs);
SetLabelImpl();
return {};
}
RenderPipeline::~RenderPipeline() = default;
void RenderPipeline::ApplyNow(CommandRecordingContext* commandContext,
const std::array<float, 4>& blendColor,
uint32_t stencilReference) {
ID3D11DeviceContext1* d3dDeviceContext1 = commandContext->GetD3D11DeviceContext1();
d3dDeviceContext1->IASetPrimitiveTopology(mD3DPrimitiveTopology);
// TODO(dawn:1753): deduplicate these objects in the backend eventually, and to avoid redundant
// state setting.
d3dDeviceContext1->IASetInputLayout(mInputLayout.Get());
d3dDeviceContext1->RSSetState(mRasterizerState.Get());
d3dDeviceContext1->VSSetShader(mVertexShader.Get(), nullptr, 0);
d3dDeviceContext1->PSSetShader(mPixelShader.Get(), nullptr, 0);
ApplyBlendState(commandContext, blendColor);
ApplyDepthStencilState(commandContext, stencilReference);
}
void RenderPipeline::ApplyBlendState(CommandRecordingContext* commandContext,
const std::array<float, 4>& blendColor) {
ID3D11DeviceContext1* d3dDeviceContext1 = commandContext->GetD3D11DeviceContext1();
d3dDeviceContext1->OMSetBlendState(mBlendState.Get(), blendColor.data(), GetSampleMask());
}
void RenderPipeline::ApplyDepthStencilState(CommandRecordingContext* commandContext,
uint32_t stencilReference) {
ID3D11DeviceContext1* d3dDeviceContext1 = commandContext->GetD3D11DeviceContext1();
d3dDeviceContext1->OMSetDepthStencilState(mDepthStencilState.Get(), stencilReference);
}
void RenderPipeline::SetLabelImpl() {
SetDebugName(ToBackend(GetDevice()), mRasterizerState.Get(), "Dawn_RenderPipeline", GetLabel());
SetDebugName(ToBackend(GetDevice()), mInputLayout.Get(), "Dawn_RenderPipeline", GetLabel());
SetDebugName(ToBackend(GetDevice()), mVertexShader.Get(), "Dawn_RenderPipeline", GetLabel());
SetDebugName(ToBackend(GetDevice()), mPixelShader.Get(), "Dawn_RenderPipeline", GetLabel());
SetDebugName(ToBackend(GetDevice()), mBlendState.Get(), "Dawn_RenderPipeline", GetLabel());
SetDebugName(ToBackend(GetDevice()), mDepthStencilState.Get(), "Dawn_RenderPipeline",
GetLabel());
}
MaybeError RenderPipeline::InitializeRasterizerState() {
Device* device = ToBackend(GetDevice());
D3D11_RASTERIZER_DESC rasterizerDesc;
rasterizerDesc.FillMode = D3D11_FILL_SOLID;
rasterizerDesc.CullMode = D3DCullMode(GetCullMode());
rasterizerDesc.FrontCounterClockwise = (GetFrontFace() == wgpu::FrontFace::CCW) ? TRUE : FALSE;
rasterizerDesc.DepthBias = GetDepthBias();
rasterizerDesc.DepthBiasClamp = GetDepthBiasClamp();
rasterizerDesc.SlopeScaledDepthBias = GetDepthBiasSlopeScale();
rasterizerDesc.DepthClipEnable = !HasUnclippedDepth();
rasterizerDesc.ScissorEnable = TRUE;
rasterizerDesc.MultisampleEnable = (GetSampleCount() > 1) ? TRUE : FALSE;
rasterizerDesc.AntialiasedLineEnable = FALSE;
DAWN_TRY(CheckHRESULT(
device->GetD3D11Device()->CreateRasterizerState(&rasterizerDesc, &mRasterizerState),
"ID3D11Device::CreateRasterizerState"));
return {};
}
MaybeError RenderPipeline::InitializeInputLayout(const Blob& vertexShader) {
if (!GetAttributeLocationsUsed().any()) {
return {};
}
std::array<D3D11_INPUT_ELEMENT_DESC, kMaxVertexAttributes> inputElementDescriptors;
UINT count = 0;
for (VertexAttributeLocation loc : IterateBitSet(GetAttributeLocationsUsed())) {
D3D11_INPUT_ELEMENT_DESC& inputElementDescriptor = inputElementDescriptors[count++];
const VertexAttributeInfo& attribute = GetAttribute(loc);
// If the HLSL semantic is TEXCOORDN the SemanticName should be "TEXCOORD" and the
// SemanticIndex N
inputElementDescriptor.SemanticName = "TEXCOORD";
inputElementDescriptor.SemanticIndex = static_cast<uint8_t>(loc);
inputElementDescriptor.Format = d3d::DXGIVertexFormat(attribute.format);
inputElementDescriptor.InputSlot = static_cast<uint8_t>(attribute.vertexBufferSlot);
const VertexBufferInfo& input = GetVertexBuffer(attribute.vertexBufferSlot);
inputElementDescriptor.AlignedByteOffset = attribute.offset;
inputElementDescriptor.InputSlotClass = VertexStepModeFunction(input.stepMode);
if (inputElementDescriptor.InputSlotClass == D3D11_INPUT_PER_VERTEX_DATA) {
inputElementDescriptor.InstanceDataStepRate = 0;
} else {
inputElementDescriptor.InstanceDataStepRate = 1;
}
}
ID3D11Device* d3d11Device = ToBackend(GetDevice())->GetD3D11Device();
DAWN_TRY(CheckHRESULT(
d3d11Device->CreateInputLayout(inputElementDescriptors.data(), count, vertexShader.Data(),
vertexShader.Size(), &mInputLayout),
"ID3D11Device::CreateInputLayout"));
return {};
}
MaybeError RenderPipeline::InitializeBlendState() {
Device* device = ToBackend(GetDevice());
CD3D11_BLEND_DESC blendDesc(D3D11_DEFAULT);
blendDesc.AlphaToCoverageEnable = IsAlphaToCoverageEnabled();
blendDesc.IndependentBlendEnable = TRUE;
static_assert(kMaxColorAttachments == std::size(blendDesc.RenderTarget));
for (ColorAttachmentIndex i(0Ui8); i < ColorAttachmentIndex(kMaxColorAttachments); ++i) {
D3D11_RENDER_TARGET_BLEND_DESC& rtBlendDesc =
blendDesc.RenderTarget[static_cast<uint8_t>(i)];
const ColorTargetState* descriptor = GetColorTargetState(ColorAttachmentIndex(i));
rtBlendDesc.BlendEnable = descriptor->blend != nullptr;
if (rtBlendDesc.BlendEnable) {
rtBlendDesc.SrcBlend = D3DBlendFactor(descriptor->blend->color.srcFactor);
if (device->GetValidInternalFormat(descriptor->format).componentCount < 4 &&
rtBlendDesc.SrcBlend == D3D11_BLEND_DEST_ALPHA) {
// According to the D3D SPEC, the default value for missing components in an element
// format is "0" for any component except A, which gets "1". So here
// D3D11_BLEND_DEST_ALPHA should have same effect as D3D11_BLEND_ONE.
// Note that this replacement can be an optimization as using D3D11_BLEND_ONE means
// the GPU hardware no longer needs to get pixels from the destination texture. It
// can also be served as a workaround against an Intel driver issue about alpha
// blending (see http://crbug.com/dawn/1579 for more details).
rtBlendDesc.SrcBlend = D3D11_BLEND_ONE;
}
rtBlendDesc.DestBlend = D3DBlendFactor(descriptor->blend->color.dstFactor);
rtBlendDesc.BlendOp = D3DBlendOperation(descriptor->blend->color.operation);
rtBlendDesc.SrcBlendAlpha = D3DBlendAlphaFactor(descriptor->blend->alpha.srcFactor);
rtBlendDesc.DestBlendAlpha = D3DBlendAlphaFactor(descriptor->blend->alpha.dstFactor);
rtBlendDesc.BlendOpAlpha = D3DBlendOperation(descriptor->blend->alpha.operation);
}
rtBlendDesc.RenderTargetWriteMask = D3DColorWriteMask(descriptor->writeMask);
}
DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreateBlendState(&blendDesc, &mBlendState),
"ID3D11Device::CreateBlendState"));
return {};
}
MaybeError RenderPipeline::InitializeDepthStencilState() {
Device* device = ToBackend(GetDevice());
const DepthStencilState* state = GetDepthStencilState();
D3D11_DEPTH_STENCIL_DESC depthStencilDesc = {};
depthStencilDesc.DepthEnable =
(state->depthCompare == wgpu::CompareFunction::Always && !state->depthWriteEnabled) ? FALSE
: TRUE;
depthStencilDesc.DepthWriteMask =
state->depthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
depthStencilDesc.DepthFunc = ToD3D11ComparisonFunc(state->depthCompare);
depthStencilDesc.StencilEnable = StencilTestEnabled(state) ? TRUE : FALSE;
depthStencilDesc.StencilReadMask = static_cast<UINT8>(state->stencilReadMask);
depthStencilDesc.StencilWriteMask = static_cast<UINT8>(state->stencilWriteMask);
depthStencilDesc.FrontFace = StencilOpDesc(state->stencilFront);
depthStencilDesc.BackFace = StencilOpDesc(state->stencilBack);
DAWN_TRY(CheckHRESULT(
device->GetD3D11Device()->CreateDepthStencilState(&depthStencilDesc, &mDepthStencilState),
"ID3D11Device::CreateDepthStencilState"));
return {};
}
MaybeError RenderPipeline::InitializeShaders() {
Device* device = ToBackend(GetDevice());
uint32_t compileFlags = 0;
if (!device->IsToggleEnabled(Toggle::UseDXC) &&
!device->IsToggleEnabled(Toggle::FxcOptimizations)) {
compileFlags |= D3DCOMPILE_OPTIMIZATION_LEVEL0;
}
if (device->IsToggleEnabled(Toggle::EmitHLSLDebugSymbols)) {
compileFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
}
// Tint does matrix multiplication expecting row major matrices
compileFlags |= D3DCOMPILE_PACK_MATRIX_ROW_MAJOR;
// FXC can miscompile code that depends on special float values (NaN, INF, etc) when IEEE
// strictness is not enabled. See crbug.com/tint/976.
compileFlags |= D3DCOMPILE_IEEE_STRICTNESS;
PerStage<d3d::CompiledShader> compiledShader;
std::bitset<kMaxInterStageShaderVariables>* usedInterstageVariables = nullptr;
dawn::native::EntryPointMetadata fragmentEntryPoint;
if (GetStageMask() & wgpu::ShaderStage::Fragment) {
// Now that only fragment shader can have inter-stage inputs.
const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment);
fragmentEntryPoint = programmableStage.module->GetEntryPoint(programmableStage.entryPoint);
usedInterstageVariables = &fragmentEntryPoint.usedInterStageVariables;
}
if (GetStageMask() & wgpu::ShaderStage::Vertex) {
const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Vertex);
DAWN_TRY_ASSIGN(
compiledShader[SingleShaderStage::Vertex],
ToBackend(programmableStage.module)
->Compile(programmableStage, SingleShaderStage::Vertex, ToBackend(GetLayout()),
compileFlags, usedInterstageVariables));
const Blob& shaderBlob = compiledShader[SingleShaderStage::Vertex].shaderBlob;
DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreateVertexShader(
shaderBlob.Data(), shaderBlob.Size(), nullptr, &mVertexShader),
"D3D11 create vertex shader"));
DAWN_TRY(InitializeInputLayout(shaderBlob));
mUsesVertexIndex = compiledShader[SingleShaderStage::Vertex].usesVertexIndex;
mUsesInstanceIndex = compiledShader[SingleShaderStage::Vertex].usesInstanceIndex;
}
if (GetStageMask() & wgpu::ShaderStage::Fragment) {
const ProgrammableStage& programmableStage = GetStage(SingleShaderStage::Fragment);
DAWN_TRY_ASSIGN(
compiledShader[SingleShaderStage::Fragment],
ToBackend(programmableStage.module)
->Compile(programmableStage, SingleShaderStage::Fragment, ToBackend(GetLayout()),
compileFlags, usedInterstageVariables));
DAWN_TRY(CheckHRESULT(device->GetD3D11Device()->CreatePixelShader(
compiledShader[SingleShaderStage::Fragment].shaderBlob.Data(),
compiledShader[SingleShaderStage::Fragment].shaderBlob.Size(),
nullptr, &mPixelShader),
"D3D11 create pixel shader"));
}
return {};
}
void RenderPipeline::InitializeAsync(Ref<RenderPipelineBase> renderPipeline,
WGPUCreateRenderPipelineAsyncCallback callback,
void* userdata) {
std::unique_ptr<CreateRenderPipelineAsyncTask> asyncTask =
std::make_unique<CreateRenderPipelineAsyncTask>(std::move(renderPipeline), callback,
userdata);
CreateRenderPipelineAsyncTask::RunAsync(std::move(asyncTask));
}
} // namespace dawn::native::d3d11