d3d11: Use D3D11 multithread protection

With Graphite on Dawn D3D11 backend, Chrome's media stack uses the same
D3D11 device as Graphite and has no way of synchronizing access to the
immediate context which is otherwise protected internally in Dawn using
a device scoped mutex.

Using ID3D11Multithread::SetMultithreadProtected(TRUE) enables locking
in D3D11 which helps with this, but it has significant overhead since it
acquires/releases a mutex for every API call. However, ID3D11Multithread
provides Enter/Leave methods which allow putting entire sequences of
calls in a critical section minimizing the overhead of using the mutex.

This CL does the following:

1) Call SetMultithreadProtected(TRUE) if D3D11MultithreadProtected
   feature is enabled.

2) Provide a ScopedCriticalSection helper for CommandRecordingContext
   that calls Enter/Leave. This is used by d3d11 CommandBuffer::Execute
   to mark the entire command processing loop as a critical section.

3) In TickImpl, add the debug layer error collection call to before
   NextSerial so it's not skipped if NextSerial fails.

Bug: dawn:1927, chromium:1464550
Change-Id: Ic0a3185d9e3d623a82058142fef4e010da054125
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/142540
Auto-Submit: Sunny Sachanandani <sunnyps@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Sunny Sachanandani <sunnyps@chromium.org>
diff --git a/dawn.json b/dawn.json
index 72908b7..c306b7c 100644
--- a/dawn.json
+++ b/dawn.json
@@ -1462,10 +1462,11 @@
             {"value": 1005, "name": "chromium experimental dp4a", "tags": ["dawn"]},
             {"value": 1006, "name": "timestamp query inside passes", "tags": ["dawn"]},
             {"value": 1007, "name": "implicit device synchronization", "tags": ["dawn", "native"]},
-            {"value": 1008, "name": "surface capabilities", "tags": ["dawn"]},
+            {"value": 1008, "name": "surface capabilities", "tags": ["dawn", "native"]},
             {"value": 1009, "name": "transient attachments", "tags": ["dawn"]},
             {"value": 1010, "name": "MSAA render to single sampled", "tags": ["dawn"]},
-            {"value": 1011, "name": "dual source blending", "tags": ["dawn"]}
+            {"value": 1011, "name": "dual source blending", "tags": ["dawn"]},
+            {"value": 1012, "name": "D3D11 multithread protected", "tags": ["dawn", "native"]}
         ]
     },
     "filter mode": {
diff --git a/src/dawn/native/Features.cpp b/src/dawn/native/Features.cpp
index b50e22d..33a9d42 100644
--- a/src/dawn/native/Features.cpp
+++ b/src/dawn/native/Features.cpp
@@ -128,6 +128,10 @@
       "https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/dawn/features/"
       "dual_source_blending.md",
       FeatureInfo::FeatureState::Experimental}},
+    {Feature::D3D11MultithreadProtected,
+     {"d3d11-multithread-protected",
+      "Enable ID3D11Multithread protection for interop with external users of the D3D11 device.",
+      "https://bugs.chromium.org/p/dawn/issues/detail?id=1927", FeatureInfo::FeatureState::Stable}},
 }};
 
 Feature FromAPIFeature(wgpu::FeatureName feature) {
@@ -182,6 +186,8 @@
             return Feature::MSAARenderToSingleSampled;
         case wgpu::FeatureName::DualSourceBlending:
             return Feature::DualSourceBlending;
+        case wgpu::FeatureName::D3D11MultithreadProtected:
+            return Feature::D3D11MultithreadProtected;
     }
     return Feature::InvalidEnum;
 }
@@ -232,6 +238,8 @@
             return wgpu::FeatureName::MSAARenderToSingleSampled;
         case Feature::DualSourceBlending:
             return wgpu::FeatureName::DualSourceBlending;
+        case Feature::D3D11MultithreadProtected:
+            return wgpu::FeatureName::D3D11MultithreadProtected;
         case Feature::EnumCount:
             break;
     }
diff --git a/src/dawn/native/Features.h b/src/dawn/native/Features.h
index 7a2fc2d..af93479 100644
--- a/src/dawn/native/Features.h
+++ b/src/dawn/native/Features.h
@@ -51,6 +51,7 @@
     TransientAttachments,
     MSAARenderToSingleSampled,
     DualSourceBlending,
+    D3D11MultithreadProtected,
 
     EnumCount,
     InvalidEnum = EnumCount,
diff --git a/src/dawn/native/d3d11/CommandBufferD3D11.cpp b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
index 09b25cb..dd87fa0 100644
--- a/src/dawn/native/d3d11/CommandBufferD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandBufferD3D11.cpp
@@ -105,6 +105,10 @@
 MaybeError CommandBuffer::Execute() {
     CommandRecordingContext* commandContext = ToBackend(GetDevice())->GetPendingCommandContext();
 
+    // Mark a critical section for this entire scope to minimize the cost of mutex acquire/release
+    // when ID3D11Multithread protection is enabled.
+    auto scopedCriticalSection = commandContext->EnterScopedCriticalSection();
+
     auto LazyClearSyncScope = [commandContext](const SyncScopeResourceUsage& scope) -> MaybeError {
         for (size_t i = 0; i < scope.textures.size(); i++) {
             Texture* texture = ToBackend(scope.textures[i]);
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
index 8500f58..76b4157 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.cpp
@@ -43,9 +43,15 @@
                           "D3D11 querying immediate context for ID3D11DeviceContext4 interface"));
 
     DAWN_TRY(
-        CheckHRESULT(d3d11DeviceContext4.As(&mD3D11UserDefinedAnnotation),
+        CheckHRESULT(d3d11DeviceContext4.As(&mD3DUserDefinedAnnotation),
                      "D3D11 querying immediate context for ID3DUserDefinedAnnotation interface"));
 
+    if (device->HasFeature(Feature::D3D11MultithreadProtected)) {
+        DAWN_TRY(CheckHRESULT(d3d11DeviceContext.As(&mD3D11Multithread),
+                              "D3D11 querying immediate context for ID3D11Multithread interface"));
+        mD3D11Multithread->SetMultithreadProtected(TRUE);
+    }
+
     mD3D11Device = d3d11Device;
     mD3D11DeviceContext4 = std::move(d3d11DeviceContext4);
     mIsOpen = true;
@@ -102,7 +108,7 @@
 }
 
 ID3DUserDefinedAnnotation* CommandRecordingContext::GetD3DUserDefinedAnnotation() const {
-    return mD3D11UserDefinedAnnotation.Get();
+    return mD3DUserDefinedAnnotation.Get();
 }
 
 Buffer* CommandRecordingContext::GetUniformBuffer() const {
@@ -143,6 +149,25 @@
     mNeedsSubmit = true;
 }
 
+CommandRecordingContext::ScopedCriticalSection::ScopedCriticalSection(
+    ComPtr<ID3D11Multithread> d3d11Multithread)
+    : mD3D11Multithread(std::move(d3d11Multithread)) {
+    if (mD3D11Multithread) {
+        mD3D11Multithread->Enter();
+    }
+}
+
+CommandRecordingContext::ScopedCriticalSection::~ScopedCriticalSection() {
+    if (mD3D11Multithread) {
+        mD3D11Multithread->Leave();
+    }
+}
+
+CommandRecordingContext::ScopedCriticalSection
+CommandRecordingContext::EnterScopedCriticalSection() {
+    return ScopedCriticalSection(mD3D11Multithread);
+}
+
 void CommandRecordingContext::WriteUniformBuffer(uint32_t offset, uint32_t element) {
     ASSERT(offset < kMaxNumBuiltinElements);
     if (mUniformBufferData[offset] != element) {
diff --git a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
index 7ceb3e4..6e72d88 100644
--- a/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
+++ b/src/dawn/native/d3d11/CommandRecordingContextD3D11.h
@@ -15,6 +15,7 @@
 #ifndef SRC_DAWN_NATIVE_D3D11_COMMANDRECORDINGCONTEXT_D3D11_H_
 #define SRC_DAWN_NATIVE_D3D11_COMMANDRECORDINGCONTEXT_D3D11_H_
 
+#include "dawn/common/NonCopyable.h"
 #include "dawn/common/Ref.h"
 #include "dawn/native/Error.h"
 #include "dawn/native/d3d/d3d_platform.h"
@@ -43,6 +44,21 @@
     Buffer* GetUniformBuffer() const;
     Device* GetDevice() const;
 
+    struct ScopedCriticalSection : NonMovable {
+        explicit ScopedCriticalSection(ComPtr<ID3D11Multithread>);
+        ~ScopedCriticalSection();
+
+      private:
+        ComPtr<ID3D11Multithread> mD3D11Multithread;
+    };
+    // Returns a scoped object that marks a critical section using the
+    // ID3D11Multithread Enter and Leave methods. This allows minimizing the
+    // cost of D3D11 multithread protection by allowing a single mutex Acquire
+    // and Release call for an entire set of operations on the immediate context
+    // e.g. when executing command buffers. This only has an effect if the
+    // ImplicitDeviceSynchronization feature is enabled.
+    ScopedCriticalSection EnterScopedCriticalSection();
+
     // Write the built-in variable value to the uniform buffer.
     void WriteUniformBuffer(uint32_t offset, uint32_t element);
     MaybeError FlushUniformBuffer();
@@ -52,7 +68,8 @@
     bool mNeedsSubmit = false;
     ComPtr<ID3D11Device> mD3D11Device;
     ComPtr<ID3D11DeviceContext4> mD3D11DeviceContext4;
-    ComPtr<ID3DUserDefinedAnnotation> mD3D11UserDefinedAnnotation;
+    ComPtr<ID3D11Multithread> mD3D11Multithread;
+    ComPtr<ID3DUserDefinedAnnotation> mD3DUserDefinedAnnotation;
 
     // The maximum number of builtin elements is 4 (vec4). It must be multiple of 4.
     static constexpr size_t kMaxNumBuiltinElements = 4;
diff --git a/src/dawn/native/d3d11/DeviceD3D11.cpp b/src/dawn/native/d3d11/DeviceD3D11.cpp
index 18c9c94..74a5cd3 100644
--- a/src/dawn/native/d3d11/DeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/DeviceD3D11.cpp
@@ -154,11 +154,13 @@
     // Perform cleanup operations to free unused objects
     [[maybe_unused]] ExecutionSerial completedSerial = GetCompletedCommandSerial();
 
+    // Check for debug layer messages before executing the command context in case we encounter an
+    // error during execution and early out as a result.
+    DAWN_TRY(CheckDebugLayerAndGenerateErrors());
     if (mPendingCommands.IsOpen() && mPendingCommands.NeedsSubmit()) {
         DAWN_TRY(ExecutePendingCommandContext());
         DAWN_TRY(NextSerial());
     }
-
     DAWN_TRY(CheckDebugLayerAndGenerateErrors());
 
     return {};
diff --git a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
index b6e2a08..d30825e 100644
--- a/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
+++ b/src/dawn/native/d3d11/PhysicalDeviceD3D11.cpp
@@ -137,6 +137,7 @@
     EnableFeature(Feature::DepthClipControl);
     EnableFeature(Feature::TextureCompressionBC);
     EnableFeature(Feature::SurfaceCapabilities);
+    EnableFeature(Feature::D3D11MultithreadProtected);
 
     // To import multi planar textures, we need to at least tier 2 support.
     if (mDeviceInfo.supportsSharedResourceCapabilityTier2) {
diff --git a/src/dawn/wire/SupportedFeatures.cpp b/src/dawn/wire/SupportedFeatures.cpp
index 416b63f..2710f01 100644
--- a/src/dawn/wire/SupportedFeatures.cpp
+++ b/src/dawn/wire/SupportedFeatures.cpp
@@ -26,6 +26,7 @@
         case WGPUFeatureName_DawnShaderFloat16:  // Deprecated
         case WGPUFeatureName_ImplicitDeviceSynchronization:
         case WGPUFeatureName_SurfaceCapabilities:
+        case WGPUFeatureName_D3D11MultithreadProtected:
             return false;
         case WGPUFeatureName_Depth32FloatStencil8:
         case WGPUFeatureName_TimestampQuery: