Dawn/Tint: Polyfill reflect vec2<f32> for D3D12 FXC on Intel This CL add a toggle-controlled Tint polyfill for reflect on vec2<f32>, and enable this toggle by default on D3D12 Intel device when using FXC. This CL works around issue tint:1798. Bug: tint:1798 Change-Id: If2f4de836eaf5e7374bc2c1ae3fbe06b91a5bbd5 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121160 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
diff --git a/src/dawn/native/Toggles.cpp b/src/dawn/native/Toggles.cpp index 72f59a1..39e1fc2 100644 --- a/src/dawn/native/Toggles.cpp +++ b/src/dawn/native/Toggles.cpp
@@ -387,6 +387,11 @@ "This toggle is off by default. It is expected to turn on or get removed when WebGPU V1 " "ships and stays stable.", "https://crbug.com/dawn/1563", ToggleStage::Device}}, + {Toggle::D3D12PolyfillReflectVec2F32, + {"d3d12_polyfill_reflect_vec2_f32", + "Polyfill the reflect builtin for vec2<f32> for D3D12. This toggle is enabled by default on " + "D3D12 backends using FXC on Intel GPUs due to a driver issue on Intel D3D12 driver.", + "https://crbug.com/tint/1798", ToggleStage::Device}}, {Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget, {"no_workaround_sample_mask_becomes_zero_for_all_but_last_color_target", "MacOS 12.0+ Intel has a bug where the sample mask is only applied for the last color "
diff --git a/src/dawn/native/Toggles.h b/src/dawn/native/Toggles.h index 536a3fb..6d75fff 100644 --- a/src/dawn/native/Toggles.h +++ b/src/dawn/native/Toggles.h
@@ -92,6 +92,7 @@ UseBlitForDepthTextureToTextureCopyToNonzeroSubresource, D3D12ReplaceAddWithMinusWhenDstFactorIsZeroAndSrcFactorIsDstAlpha, DisallowDeprecatedAPIs, + D3D12PolyfillReflectVec2F32, // Unresolved issues. NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget,
diff --git a/src/dawn/native/d3d12/AdapterD3D12.cpp b/src/dawn/native/d3d12/AdapterD3D12.cpp index d05061e..8fd7993 100644 --- a/src/dawn/native/d3d12/AdapterD3D12.cpp +++ b/src/dawn/native/d3d12/AdapterD3D12.cpp
@@ -595,6 +595,12 @@ deviceToggles->ForceSet( Toggle::D3D12UseTempBufferInTextureToTextureCopyBetweenDifferentDimensions, true); } + + // Polyfill reflect builtin for vec2<f32> on Intel device in usng FXC. + // See https://crbug.com/tint/1798 for more information. + if (gpu_info::IsIntel(vendorId) && !deviceToggles->IsEnabled(Toggle::UseDXC)) { + deviceToggles->Default(Toggle::D3D12PolyfillReflectVec2F32, true); + } } ResultOrError<Ref<DeviceBase>> Adapter::CreateDeviceImpl(const DeviceDescriptor* descriptor,
diff --git a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp index d821488..6b8d28f 100644 --- a/src/dawn/native/d3d12/ShaderModuleD3D12.cpp +++ b/src/dawn/native/d3d12/ShaderModuleD3D12.cpp
@@ -95,6 +95,7 @@ X(bool, disableSymbolRenaming) \ X(bool, isRobustnessEnabled) \ X(bool, disableWorkgroupInit) \ + X(bool, polyfillReflectVec2F32) \ X(bool, dumpShaders) #define D3D_BYTECODE_COMPILATION_REQUEST_MEMBERS(X) \ @@ -401,6 +402,8 @@ options.interstage_locations = r.interstageLocations; } + options.polyfill_reflect_vec2_f32 = r.polyfillReflectVec2F32; + TRACE_EVENT0(tracePlatform.UnsafeGetValue(), General, "tint::writer::hlsl::Generate"); auto result = tint::writer::hlsl::Generate(&transformedProgram, options); DAWN_INVALID_IF(!result.success, "An error occured while generating HLSL: %s", result.error); @@ -606,6 +609,8 @@ req.hlsl.arrayLengthFromUniform = std::move(arrayLengthFromUniform); req.hlsl.substituteOverrideConfig = std::move(substituteOverrideConfig); + req.hlsl.polyfillReflectVec2F32 = device->IsToggleEnabled(Toggle::D3D12PolyfillReflectVec2F32); + const CombinedLimits& limits = device->GetLimits(); req.hlsl.limits = LimitsForCompilationRequest::Create(limits.v1);
diff --git a/src/tint/transform/builtin_polyfill.cc b/src/tint/transform/builtin_polyfill.cc index 606c6ce..259eb11 100644 --- a/src/tint/transform/builtin_polyfill.cc +++ b/src/tint/transform/builtin_polyfill.cc
@@ -582,6 +582,33 @@ return name; } + /// Builds the polyfill function for the `reflect` builtin + /// @param ty the parameter and return type for the function + /// @return the polyfill function name + Symbol reflect(const type::Type* ty) { + auto name = b.Symbols().New("tint_reflect"); + + // WGSL polyfill function: + // fn tint_reflect(e1 : T, e2 : T) -> T { + // let factor = (-2.0 * dot(e1, e2)); + // return (e1 + (factor * e2)); + // } + // Using -2.0 instead of 2.0 in factor to prevent the optimization that cause wrong result. + // See https://crbug.com/tint/1798 for more details. + auto body = utils::Vector{ + b.Decl(b.Let("factor", b.Mul(-2.0_a, b.Call("dot", "e1", "e2")))), + b.Return(b.Add("e1", b.Mul("factor", "e2"))), + }; + b.Func(name, + utils::Vector{ + b.Param("e1", T(ty)), + b.Param("e2", T(ty)), + }, + T(ty), body); + + return name; + } + /// Builds the polyfill function for the `saturate` builtin /// @param ty the parameter and return type for the function /// @return the polyfill function name @@ -1007,6 +1034,18 @@ builtin, [&] { return s.insertBits(builtin->ReturnType()); }); } break; + case sem::BuiltinType::kReflect: + // Only polyfill for vec2<f32>. See https://crbug.com/tint/1798 for more + // details. + if (polyfill.reflect_vec2_f32) { + auto& sig = builtin->Signature(); + auto* vec = sig.return_type->As<type::Vector>(); + if (vec && vec->Width() == 2 && vec->type()->Is<type::F32>()) { + fn = builtin_polyfills.GetOrCreate( + builtin, [&] { return s.reflect(builtin->ReturnType()); }); + } + } + break; case sem::BuiltinType::kSaturate: if (polyfill.saturate) { fn = builtin_polyfills.GetOrCreate(
diff --git a/src/tint/transform/builtin_polyfill.h b/src/tint/transform/builtin_polyfill.h index 521940c..b070248 100644 --- a/src/tint/transform/builtin_polyfill.h +++ b/src/tint/transform/builtin_polyfill.h
@@ -70,6 +70,8 @@ bool int_div_mod = false; /// Should float modulos be polyfilled to emit a precise modulo operation as per the spec? bool precise_float_mod = false; + /// Should `reflect()` be polyfilled for vec2<f32>? + bool reflect_vec2_f32 = false; /// Should `saturate()` be polyfilled? bool saturate = false; /// Should `sign()` be polyfilled for integer types?
diff --git a/src/tint/transform/builtin_polyfill_test.cc b/src/tint/transform/builtin_polyfill_test.cc index 09d17b8..8c0d787 100644 --- a/src/tint/transform/builtin_polyfill_test.cc +++ b/src/tint/transform/builtin_polyfill_test.cc
@@ -3045,6 +3045,177 @@ } //////////////////////////////////////////////////////////////////////////////// +// reflect for vec2<f32> +//////////////////////////////////////////////////////////////////////////////// +DataMap polyfillReflectVec2F32() { + BuiltinPolyfill::Builtins builtins; + builtins.reflect_vec2_f32 = true; + DataMap data; + data.Add<BuiltinPolyfill::Config>(builtins); + return data; +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec2_f32) { + auto* src = R"( +fn f() { + let e1 = vec2<f32>(1.0f); + let e2 = vec2<f32>(1.0f); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_TRUE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec2_f16) { + auto* src = R"( +enable f16; + +fn f() { + let e1 = vec2<f16>(1.0h); + let e2 = vec2<f16>(1.0h); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec3_f32) { + auto* src = R"( +fn f() { + let e1 = vec3<f32>(1.0f); + let e2 = vec3<f32>(1.0f); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec3_f16) { + auto* src = R"( +enable f16; + +fn f() { + let e1 = vec3<f16>(1.0h); + let e2 = vec3<f16>(1.0h); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec4_f32) { + auto* src = R"( +fn f() { + let e1 = vec3<f32>(1.0f); + let e2 = vec3<f32>(1.0f); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, ShouldRunReflect_vec4_f16) { + auto* src = R"( +enable f16; + +fn f() { + let e1 = vec3<f16>(1.0h); + let e2 = vec3<f16>(1.0h); + let x = reflect(e1, e2); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src)); + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, Reflect_ConstantExpression) { + auto* src = R"( +fn f() { + let r : vec2<f32> = reflect(vec2<f32>(1.0), vec2<f32>(1.0)); +} +)"; + + EXPECT_FALSE(ShouldRun<BuiltinPolyfill>(src, polyfillReflectVec2F32())); +} + +TEST_F(BuiltinPolyfillTest, Reflect_vec2_f32) { + auto* src = R"( +fn f() { + let v = 0.5f; + let r : vec2<f32> = reflect(vec2<f32>(v), vec2<f32>(v)); +} +)"; + + auto* expect = R"( +fn tint_reflect(e1 : vec2<f32>, e2 : vec2<f32>) -> vec2<f32> { + let factor = (-2.0 * dot(e1, e2)); + return (e1 + (factor * e2)); +} + +fn f() { + let v = 0.5f; + let r : vec2<f32> = tint_reflect(vec2<f32>(v), vec2<f32>(v)); +} +)"; + + auto got = Run<BuiltinPolyfill>(src, polyfillReflectVec2F32()); + + EXPECT_EQ(expect, str(got)); +} + +TEST_F(BuiltinPolyfillTest, Reflect_multiple_types) { + auto* src = R"( +enable f16; + +fn f() { + let in_f32 = 0.5f; + let out_f32_vec2 : vec2<f32> = reflect(vec2<f32>(in_f32), vec2<f32>(in_f32)); + let out_f32_vec3 : vec3<f32> = reflect(vec3<f32>(in_f32), vec3<f32>(in_f32)); + let out_f32_vec4 : vec4<f32> = reflect(vec4<f32>(in_f32), vec4<f32>(in_f32)); + let in_f16 = 0.5h; + let out_f16_vec2 : vec2<f16> = reflect(vec2<f16>(in_f16), vec2<f16>(in_f16)); + let out_f16_vec3 : vec3<f16> = reflect(vec3<f16>(in_f16), vec3<f16>(in_f16)); + let out_f16_vec4 : vec4<f16> = reflect(vec4<f16>(in_f16), vec4<f16>(in_f16)); +} +)"; + + auto* expect = R"( +enable f16; + +fn tint_reflect(e1 : vec2<f32>, e2 : vec2<f32>) -> vec2<f32> { + let factor = (-2.0 * dot(e1, e2)); + return (e1 + (factor * e2)); +} + +fn f() { + let in_f32 = 0.5f; + let out_f32_vec2 : vec2<f32> = tint_reflect(vec2<f32>(in_f32), vec2<f32>(in_f32)); + let out_f32_vec3 : vec3<f32> = reflect(vec3<f32>(in_f32), vec3<f32>(in_f32)); + let out_f32_vec4 : vec4<f32> = reflect(vec4<f32>(in_f32), vec4<f32>(in_f32)); + let in_f16 = 0.5h; + let out_f16_vec2 : vec2<f16> = reflect(vec2<f16>(in_f16), vec2<f16>(in_f16)); + let out_f16_vec3 : vec3<f16> = reflect(vec3<f16>(in_f16), vec3<f16>(in_f16)); + let out_f16_vec4 : vec4<f16> = reflect(vec4<f16>(in_f16), vec4<f16>(in_f16)); +} +)"; + + auto got = Run<BuiltinPolyfill>(src, polyfillReflectVec2F32()); + + EXPECT_EQ(expect, str(got)); +} + +//////////////////////////////////////////////////////////////////////////////// // saturate //////////////////////////////////////////////////////////////////////////////// DataMap polyfillSaturate() {
diff --git a/src/tint/writer/hlsl/generator.h b/src/tint/writer/hlsl/generator.h index c624943..80dd518 100644 --- a/src/tint/writer/hlsl/generator.h +++ b/src/tint/writer/hlsl/generator.h
@@ -61,6 +61,8 @@ /// Interstage locations actually used as inputs in the next stage of the pipeline. /// This is potentially used for truncating unused interstage outputs at current shader stage. std::bitset<16> interstage_locations; + /// Set to `true` to generate polyfill for `reflect` builtin for vec2<f32> + bool polyfill_reflect_vec2_f32 = false; /// Reflect the fields of this class so that it can be used by tint::ForeachField() TINT_REFLECT(root_constant_binding_point,
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc index 787962b..14f1d0c 100644 --- a/src/tint/writer/hlsl/generator_impl.cc +++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -181,6 +181,7 @@ polyfills.insert_bits = transform::BuiltinPolyfill::Level::kFull; polyfills.int_div_mod = true; polyfills.precise_float_mod = true; + polyfills.reflect_vec2_f32 = options.polyfill_reflect_vec2_f32; polyfills.texture_sample_base_clamp_to_edge_2d_f32 = true; polyfills.workgroup_uniform_load = true; data.Add<transform::BuiltinPolyfill::Config>(polyfills);
diff --git a/webgpu-cts/expectations.txt b/webgpu-cts/expectations.txt index 38fbcca..c1adddb 100644 --- a/webgpu-cts/expectations.txt +++ b/webgpu-cts/expectations.txt
@@ -233,9 +233,6 @@ crbug.com/tint/1796 [ intel-gen-9 ubuntu ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] crbug.com/tint/1796 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] crbug.com/tint/1796 [ nvidia-0x2184 win10 ] webgpu:shader,execution,expression,call,builtin,asin:f32:inputSource="uniform";vectorize=4 [ Failure ] -crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="storage_r" [ Failure ] -crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="storage_rw" [ Failure ] -crbug.com/tint/1798 [ intel-gen-9 win10 ] webgpu:shader,execution,expression,call,builtin,reflect:f32_vec2:inputSource="uniform" [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":1,"y":1,"z":1} [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":1,"y":1,"z":1};numGroups={"x":8,"y":4,"z":2} [ Failure ] crbug.com/tint/1801 [ nvidia-0x2184 target-cpu-32 ] webgpu:shader,execution,shader_io,compute_builtins:inputs:method="mixed";dispatch="indirect";groupSize={"x":3,"y":7,"z":5};numGroups={"x":13,"y":9,"z":11} [ Failure ]