Refactors Dawn benchmarks to pull out infra and fix existing benchmarks. - Updates the BGL benchmarks so that they more accurately reflect the expected benchmarks. - Moves device setup into helper fixture. - Renames the files in preparation for follow up change to add more benchmarks. Bug: dawn:1769 Change-Id: I0076d3e468d6292cc13f700beac3a14a408b0699 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/141280 Commit-Queue: Loko Kung <lokokung@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Austin Eng <enga@chromium.org>

commit: da67ff851b427d743138c1b70d1d78e3798f0be9 [log] [tgz]
author: Loko Kung <lokokung@google.com> Fri Jul 14 23:37:48 2023 +0000
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Jul 14 23:37:48 2023 +0000
tree: db358bd9e24afb2ec431fafdb8f870db3ddd7151
parent: 69ec4d7e9dc14be9707afc151616c3ea9e02cc00 [diff]
diff --git a/src/dawn/tests/benchmarks/BGLCreation.cpp b/src/dawn/tests/benchmarks/BGLCreation.cpp
deleted file mode 100644
index 36ff003..0000000
--- a/src/dawn/tests/benchmarks/BGLCreation.cpp
+++ /dev/null

@@ -1,115 +0,0 @@
-// Copyright 2023 The Dawn Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <benchmark/benchmark.h>
-#include <dawn/webgpu_cpp.h>
-#include <array>
-#include <vector>
-
-#include "dawn/common/Log.h"
-#include "dawn/tests/benchmarks/NullDeviceSetup.h"
-
-static void RedundantBGLCreation(benchmark::State& state) {
-    static wgpu::Device device = nullptr;
-
-    if (state.thread_index() == 0) {
-        std::vector<wgpu::FeatureName> requiredFeatures;
-        if (state.threads() > 1) {
-            requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
-        }
-
-        wgpu::DeviceDescriptor deviceDesc = {};
-        deviceDesc.requiredFeatures = requiredFeatures.data();
-        deviceDesc.requiredFeaturesCount = requiredFeatures.size();
-        device = CreateNullDevice(deviceDesc);
-    }
-
-    std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
-    for (uint32_t i = 0; i < entries.size(); ++i) {
-        entries[i].binding = i;
-        entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
-        entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
-    }
-
-    wgpu::BindGroupLayoutDescriptor bglDesc = {};
-    bglDesc.entryCount = entries.size();
-    bglDesc.entries = entries.data();
-
-    thread_local std::vector<wgpu::BindGroupLayout> bgls;
-    bgls.reserve(100000);
-    for (auto _ : state) {
-        bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
-    }
-    bgls.clear();
-
-    if (state.thread_index() == 0) {
-        device = nullptr;
-    }
-}
-
-static void UniqueBGLCreation(benchmark::State& state) {
-    static wgpu::Device device = nullptr;
-
-    if (state.thread_index() == 0) {
-        std::vector<wgpu::FeatureName> requiredFeatures;
-        if (state.threads() > 1) {
-            requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
-        }
-
-        wgpu::DeviceDescriptor deviceDesc = {};
-        deviceDesc.requiredFeatures = requiredFeatures.data();
-        deviceDesc.requiredFeaturesCount = requiredFeatures.size();
-        device = CreateNullDevice(deviceDesc);
-    }
-
-    std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
-    for (uint32_t i = 0; i < entries.size(); ++i) {
-        entries[i].binding = i;
-        entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
-        entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
-    }
-    entries[0].buffer.minBindingSize = 4u;
-
-    wgpu::BindGroupLayoutDescriptor bglDesc = {};
-    bglDesc.entryCount = entries.size();
-    bglDesc.entries = entries.data();
-
-    thread_local std::vector<wgpu::BindGroupLayout> bgls;
-    bgls.reserve(100000);
-    for (auto _ : state) {
-        entries[0].buffer.minBindingSize += 4;
-        bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
-    }
-    bgls.clear();
-
-    if (state.thread_index() == 0) {
-        device = nullptr;
-    }
-}
-
-BENCHMARK(RedundantBGLCreation)
-    ->Setup(SetupNullBackend)
-    ->Arg(1)
-    ->Arg(12)
-    ->Threads(1)
-    ->Threads(4)
-    ->Threads(16);
-
-BENCHMARK(UniqueBGLCreation)
-    ->Setup(SetupNullBackend)
-    ->Arg(1)
-    ->Arg(12)
-    ->Threads(1)
-    ->Threads(4)
-    ->Threads(16);

diff --git a/src/dawn/tests/benchmarks/BUILD.gn b/src/dawn/tests/benchmarks/BUILD.gn
index d8ea27b..813370f 100644
--- a/src/dawn/tests/benchmarks/BUILD.gn
+++ b/src/dawn/tests/benchmarks/BUILD.gn

@@ -27,9 +27,9 @@
     "//third_party/google_benchmark:benchmark_main",
   ]
   sources = [
-    "BGLCreation.cpp",
     "NullDeviceSetup.cpp",
     "NullDeviceSetup.h",
+    "ObjectCreation.cpp",
   ]
   configs += [ "${dawn_root}/include/dawn:public" ]
 }

diff --git a/src/dawn/tests/benchmarks/CMakeLists.txt b/src/dawn/tests/benchmarks/CMakeLists.txt
index f575866..9b49055 100644
--- a/src/dawn/tests/benchmarks/CMakeLists.txt
+++ b/src/dawn/tests/benchmarks/CMakeLists.txt

@@ -14,9 +14,9 @@
 
 if (${DAWN_BUILD_BENCHMARKS})
   add_executable(dawn_benchmarks
-    "BGLCreation.cpp"
     "NullDeviceSetup.cpp"
     "NullDeviceSetup.h"
+    "ObjectCreation.cpp"
   )
   set_target_properties(dawn_benchmarks PROPERTIES FOLDER "Benchmarks")
 

diff --git a/src/dawn/tests/benchmarks/NullDeviceSetup.cpp b/src/dawn/tests/benchmarks/NullDeviceSetup.cpp
index c7057fc..a280d52 100644
--- a/src/dawn/tests/benchmarks/NullDeviceSetup.cpp
+++ b/src/dawn/tests/benchmarks/NullDeviceSetup.cpp

@@ -23,58 +23,63 @@
 #include "dawn/dawn_proc.h"
 #include "dawn/native/DawnNative.h"
 
-namespace {
-std::unique_ptr<dawn::native::Instance> nativeInstance = nullptr;
-wgpu::Adapter nullBackendAdapter = nullptr;
-}  // namespace
+namespace dawn {
 
-void SetupNullBackend(const benchmark::State& state) {
-    dawnProcSetProcs(&dawn::native::GetProcs());
+void NullDeviceBenchmarkFixture::SetUp(const benchmark::State& state) {
+    // Static initialization that only happens on the first time that a fixture is created.
+    static std::unique_ptr<dawn::native::Instance> nativeInstance = []() {
+        dawnProcSetProcs(&dawn::native::GetProcs());
+        return std::make_unique<dawn::native::Instance>();
+    }();
 
-    if (!nativeInstance) {
-        nativeInstance = std::make_unique<dawn::native::Instance>();
-    }
+    if (state.thread_index() == 0) {
+        // Only thread 0 is responsible for initializing the device on each iteration.
+        {
+            std::lock_guard lock(mMutex);
 
-    if (!nullBackendAdapter) {
-        wgpu::RequestAdapterOptions options = {};
-        options.backendType = wgpu::BackendType::Null;
+            // Get an adapter to create the device with.
+            wgpu::RequestAdapterOptions options = {};
+            options.backendType = wgpu::BackendType::Null;
+            auto nativeAdapter = nativeInstance->EnumerateAdapters(&options)[0];
+            adapter = wgpu::Adapter(nativeAdapter.Get());
+            ASSERT(adapter != nullptr);
 
-        auto nativeAdapter = nativeInstance->EnumerateAdapters(&options)[0];
-        nullBackendAdapter = wgpu::Adapter(nativeAdapter.Get());
-    }
-    ASSERT(nullBackendAdapter != nullptr);
-}
-
-wgpu::Device CreateNullDevice(const wgpu::DeviceDescriptor& desc) {
-    wgpu::Device device;
-
-    nullBackendAdapter.RequestDevice(
-        &desc,
-        [](WGPURequestDeviceStatus status, WGPUDevice cDevice, char const* message,
-           void* userdata) {
-            ASSERT(status == WGPURequestDeviceStatus_Success);
-            *reinterpret_cast<wgpu::Device*>(userdata) = wgpu::Device::Acquire(cDevice);
-        },
-        &device);
-    while (!device) {
-        wgpuInstanceProcessEvents(nativeInstance->Get());
-    }
-
-    device.SetUncapturedErrorCallback(
-        [](WGPUErrorType, char const* message, void* userdata) {
-            dawn::ErrorLog() << message;
-            UNREACHABLE();
-        },
-        nullptr);
-
-    device.SetDeviceLostCallback(
-        [](WGPUDeviceLostReason reason, char const* message, void* userdata) {
-            if (reason == WGPUDeviceLostReason_Undefined) {
-                dawn::ErrorLog() << message;
-                UNREACHABLE();
+            // Create the device.
+            wgpu::DeviceDescriptor desc = GetDeviceDescriptor();
+            adapter.RequestDevice(
+                &desc,
+                [](WGPURequestDeviceStatus status, WGPUDevice cDevice, char const* message,
+                   void* userdata) {
+                    ASSERT(status == WGPURequestDeviceStatus_Success);
+                    *reinterpret_cast<wgpu::Device*>(userdata) = wgpu::Device::Acquire(cDevice);
+                },
+                &device);
+            while (!device) {
+                wgpuInstanceProcessEvents(nativeInstance->Get());
             }
-        },
-        nullptr);
 
-    return device;
+            device.SetUncapturedErrorCallback(
+                [](WGPUErrorType, char const* message, void* userdata) {
+                    dawn::ErrorLog() << message;
+                    UNREACHABLE();
+                },
+                nullptr);
+
+            device.SetDeviceLostCallback(
+                [](WGPUDeviceLostReason reason, char const* message, void* userdata) {
+                    if (reason == WGPUDeviceLostReason_Undefined) {
+                        dawn::ErrorLog() << message;
+                        UNREACHABLE();
+                    }
+                },
+                nullptr);
+        }
+        mCv.notify_all();
+    } else {
+        // All other threads should wait to proceed once the device is ready.
+        std::unique_lock lock(mMutex);
+        mCv.wait(lock, [this] { return device != nullptr; });
+    }
 }
+
+}  // namespace dawn

diff --git a/src/dawn/tests/benchmarks/NullDeviceSetup.h b/src/dawn/tests/benchmarks/NullDeviceSetup.h
index d8c7cad..f46775b 100644
--- a/src/dawn/tests/benchmarks/NullDeviceSetup.h
+++ b/src/dawn/tests/benchmarks/NullDeviceSetup.h

@@ -15,17 +15,33 @@
 #ifndef DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP
 #define DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP
 
+#include <benchmark/benchmark.h>
 #include <dawn/webgpu_cpp.h>
-
-namespace benchmark {
-class State;
-}
+#include <condition_variable>
+#include <mutex>
 
 namespace wgpu {
 struct DeviceDescriptor;
-}
+}  // namespace wgpu
 
-void SetupNullBackend(const benchmark::State& state);
-wgpu::Device CreateNullDevice(const wgpu::DeviceDescriptor& desc);
+namespace dawn {
+
+class NullDeviceBenchmarkFixture : public benchmark::Fixture {
+  public:
+    void SetUp(const benchmark::State& state) override;
+
+  protected:
+    wgpu::Adapter adapter = nullptr;
+    wgpu::Device device = nullptr;
+
+  private:
+    virtual wgpu::DeviceDescriptor GetDeviceDescriptor() const = 0;
+
+    // Lock and conditional variable used to synchronize the benchmark global adapter/device.
+    std::mutex mMutex;
+    std::condition_variable mCv;
+};
+
+}  // namespace dawn
 
 #endif  // DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP

diff --git a/src/dawn/tests/benchmarks/ObjectCreation.cpp b/src/dawn/tests/benchmarks/ObjectCreation.cpp
new file mode 100644
index 0000000..bc5aac0
--- /dev/null
+++ b/src/dawn/tests/benchmarks/ObjectCreation.cpp

@@ -0,0 +1,115 @@
+// Copyright 2023 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <benchmark/benchmark.h>
+#include <dawn/webgpu_cpp.h>
+#include <array>
+#include <vector>
+
+#include "dawn/common/Log.h"
+#include "dawn/tests/benchmarks/NullDeviceSetup.h"
+
+namespace dawn {
+namespace {
+
+// Benchmarks for creation and recreation of objects in Dawn.
+class ObjectCreation : public NullDeviceBenchmarkFixture {
+  protected:
+    ObjectCreation() {
+        // Currently, object creation still needs to be implicitly synchronized even though the
+        // frontend cache is thread-safe. Once other parts of Dawn are thread-safe, i.e. memory
+        // management, these tests should work without synchronization.
+        requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
+    }
+
+  private:
+    wgpu::DeviceDescriptor GetDeviceDescriptor() const override {
+        wgpu::DeviceDescriptor deviceDesc = {};
+        deviceDesc.requiredFeatures = requiredFeatures.data();
+        deviceDesc.requiredFeaturesCount = requiredFeatures.size();
+        return deviceDesc;
+    }
+
+    std::vector<wgpu::FeatureName> requiredFeatures;
+};
+
+BENCHMARK_DEFINE_F(ObjectCreation, SameBindGroupLayout)
+(benchmark::State& state) {
+    std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
+    for (uint32_t i = 0; i < entries.size(); ++i) {
+        entries[i].binding = i;
+        entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
+        entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
+    }
+
+    wgpu::BindGroupLayoutDescriptor bglDesc = {};
+    bglDesc.entryCount = entries.size();
+    bglDesc.entries = entries.data();
+
+    std::vector<wgpu::BindGroupLayout> bgls;
+    bgls.reserve(100000);
+    bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+    for (auto _ : state) {
+        bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+    }
+}
+BENCHMARK_REGISTER_F(ObjectCreation, SameBindGroupLayout)
+    ->Arg(1)
+    ->Arg(12)
+    ->Threads(1)
+    ->Threads(4)
+    ->Threads(16);
+
+BENCHMARK_DEFINE_F(ObjectCreation, UniqueBindGroupLayout)
+(benchmark::State& state) {
+    std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
+    for (uint32_t i = 0; i < entries.size(); ++i) {
+        entries[i].binding = i;
+        entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
+        entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
+        entries[i].buffer.minBindingSize = 4u;
+    }
+
+    wgpu::BindGroupLayoutDescriptor bglDesc = {};
+    bglDesc.entryCount = entries.size();
+    bglDesc.entries = entries.data();
+
+    // Depending on the thread index, we increment a subset of the binding sizes to ensure we create
+    // a new unique bind group descriptor. For now, this is just the thread_index if it's smaller
+    // than Arg, otherwise its the last index AND the modulo index.
+    std::vector<size_t> entryIndices;
+    if (state.thread_index() < state.range(0)) {
+        entryIndices.push_back(state.thread_index());
+    } else {
+        entryIndices.push_back(state.thread_index() % state.range(0));
+        entryIndices.push_back(state.range(0) - 1);
+    }
+
+    std::vector<wgpu::BindGroupLayout> bgls;
+    bgls.reserve(100000);
+    for (auto _ : state) {
+        for (size_t index : entryIndices) {
+            entries[index].buffer.minBindingSize += 4;
+        }
+        bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+    }
+}
+BENCHMARK_REGISTER_F(ObjectCreation, UniqueBindGroupLayout)
+    ->Arg(12)
+    ->Threads(1)
+    ->Threads(4)
+    ->Threads(16);
+
+}  // namespace
+}  // namespace dawn
commit	da67ff851b427d743138c1b70d1d78e3798f0be9	[log] [tgz]
author	Loko Kung <lokokung@google.com>	Fri Jul 14 23:37:48 2023 +0000
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Jul 14 23:37:48 2023 +0000
tree	db358bd9e24afb2ec431fafdb8f870db3ddd7151
parent	69ec4d7e9dc14be9707afc151616c3ea9e02cc00 [diff]