Refactors Dawn benchmarks to pull out infra and fix existing benchmarks.
- Updates the BGL benchmarks so that they more accurately reflect the
expected benchmarks.
- Moves device setup into helper fixture.
- Renames the files in preparation for follow up change to add more
benchmarks.
Bug: dawn:1769
Change-Id: I0076d3e468d6292cc13f700beac3a14a408b0699
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/141280
Commit-Queue: Loko Kung <lokokung@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
diff --git a/src/dawn/tests/benchmarks/BGLCreation.cpp b/src/dawn/tests/benchmarks/BGLCreation.cpp
deleted file mode 100644
index 36ff003..0000000
--- a/src/dawn/tests/benchmarks/BGLCreation.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2023 The Dawn Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <benchmark/benchmark.h>
-#include <dawn/webgpu_cpp.h>
-#include <array>
-#include <vector>
-
-#include "dawn/common/Log.h"
-#include "dawn/tests/benchmarks/NullDeviceSetup.h"
-
-static void RedundantBGLCreation(benchmark::State& state) {
- static wgpu::Device device = nullptr;
-
- if (state.thread_index() == 0) {
- std::vector<wgpu::FeatureName> requiredFeatures;
- if (state.threads() > 1) {
- requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
- }
-
- wgpu::DeviceDescriptor deviceDesc = {};
- deviceDesc.requiredFeatures = requiredFeatures.data();
- deviceDesc.requiredFeaturesCount = requiredFeatures.size();
- device = CreateNullDevice(deviceDesc);
- }
-
- std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
- for (uint32_t i = 0; i < entries.size(); ++i) {
- entries[i].binding = i;
- entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
- entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
- }
-
- wgpu::BindGroupLayoutDescriptor bglDesc = {};
- bglDesc.entryCount = entries.size();
- bglDesc.entries = entries.data();
-
- thread_local std::vector<wgpu::BindGroupLayout> bgls;
- bgls.reserve(100000);
- for (auto _ : state) {
- bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
- }
- bgls.clear();
-
- if (state.thread_index() == 0) {
- device = nullptr;
- }
-}
-
-static void UniqueBGLCreation(benchmark::State& state) {
- static wgpu::Device device = nullptr;
-
- if (state.thread_index() == 0) {
- std::vector<wgpu::FeatureName> requiredFeatures;
- if (state.threads() > 1) {
- requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
- }
-
- wgpu::DeviceDescriptor deviceDesc = {};
- deviceDesc.requiredFeatures = requiredFeatures.data();
- deviceDesc.requiredFeaturesCount = requiredFeatures.size();
- device = CreateNullDevice(deviceDesc);
- }
-
- std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
- for (uint32_t i = 0; i < entries.size(); ++i) {
- entries[i].binding = i;
- entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
- entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
- }
- entries[0].buffer.minBindingSize = 4u;
-
- wgpu::BindGroupLayoutDescriptor bglDesc = {};
- bglDesc.entryCount = entries.size();
- bglDesc.entries = entries.data();
-
- thread_local std::vector<wgpu::BindGroupLayout> bgls;
- bgls.reserve(100000);
- for (auto _ : state) {
- entries[0].buffer.minBindingSize += 4;
- bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
- }
- bgls.clear();
-
- if (state.thread_index() == 0) {
- device = nullptr;
- }
-}
-
-BENCHMARK(RedundantBGLCreation)
- ->Setup(SetupNullBackend)
- ->Arg(1)
- ->Arg(12)
- ->Threads(1)
- ->Threads(4)
- ->Threads(16);
-
-BENCHMARK(UniqueBGLCreation)
- ->Setup(SetupNullBackend)
- ->Arg(1)
- ->Arg(12)
- ->Threads(1)
- ->Threads(4)
- ->Threads(16);
diff --git a/src/dawn/tests/benchmarks/BUILD.gn b/src/dawn/tests/benchmarks/BUILD.gn
index d8ea27b..813370f 100644
--- a/src/dawn/tests/benchmarks/BUILD.gn
+++ b/src/dawn/tests/benchmarks/BUILD.gn
@@ -27,9 +27,9 @@
"//third_party/google_benchmark:benchmark_main",
]
sources = [
- "BGLCreation.cpp",
"NullDeviceSetup.cpp",
"NullDeviceSetup.h",
+ "ObjectCreation.cpp",
]
configs += [ "${dawn_root}/include/dawn:public" ]
}
diff --git a/src/dawn/tests/benchmarks/CMakeLists.txt b/src/dawn/tests/benchmarks/CMakeLists.txt
index f575866..9b49055 100644
--- a/src/dawn/tests/benchmarks/CMakeLists.txt
+++ b/src/dawn/tests/benchmarks/CMakeLists.txt
@@ -14,9 +14,9 @@
if (${DAWN_BUILD_BENCHMARKS})
add_executable(dawn_benchmarks
- "BGLCreation.cpp"
"NullDeviceSetup.cpp"
"NullDeviceSetup.h"
+ "ObjectCreation.cpp"
)
set_target_properties(dawn_benchmarks PROPERTIES FOLDER "Benchmarks")
diff --git a/src/dawn/tests/benchmarks/NullDeviceSetup.cpp b/src/dawn/tests/benchmarks/NullDeviceSetup.cpp
index c7057fc..a280d52 100644
--- a/src/dawn/tests/benchmarks/NullDeviceSetup.cpp
+++ b/src/dawn/tests/benchmarks/NullDeviceSetup.cpp
@@ -23,58 +23,63 @@
#include "dawn/dawn_proc.h"
#include "dawn/native/DawnNative.h"
-namespace {
-std::unique_ptr<dawn::native::Instance> nativeInstance = nullptr;
-wgpu::Adapter nullBackendAdapter = nullptr;
-} // namespace
+namespace dawn {
-void SetupNullBackend(const benchmark::State& state) {
- dawnProcSetProcs(&dawn::native::GetProcs());
+void NullDeviceBenchmarkFixture::SetUp(const benchmark::State& state) {
+ // Static initialization that only happens on the first time that a fixture is created.
+ static std::unique_ptr<dawn::native::Instance> nativeInstance = []() {
+ dawnProcSetProcs(&dawn::native::GetProcs());
+ return std::make_unique<dawn::native::Instance>();
+ }();
- if (!nativeInstance) {
- nativeInstance = std::make_unique<dawn::native::Instance>();
- }
+ if (state.thread_index() == 0) {
+ // Only thread 0 is responsible for initializing the device on each iteration.
+ {
+ std::lock_guard lock(mMutex);
- if (!nullBackendAdapter) {
- wgpu::RequestAdapterOptions options = {};
- options.backendType = wgpu::BackendType::Null;
+ // Get an adapter to create the device with.
+ wgpu::RequestAdapterOptions options = {};
+ options.backendType = wgpu::BackendType::Null;
+ auto nativeAdapter = nativeInstance->EnumerateAdapters(&options)[0];
+ adapter = wgpu::Adapter(nativeAdapter.Get());
+ ASSERT(adapter != nullptr);
- auto nativeAdapter = nativeInstance->EnumerateAdapters(&options)[0];
- nullBackendAdapter = wgpu::Adapter(nativeAdapter.Get());
- }
- ASSERT(nullBackendAdapter != nullptr);
-}
-
-wgpu::Device CreateNullDevice(const wgpu::DeviceDescriptor& desc) {
- wgpu::Device device;
-
- nullBackendAdapter.RequestDevice(
- &desc,
- [](WGPURequestDeviceStatus status, WGPUDevice cDevice, char const* message,
- void* userdata) {
- ASSERT(status == WGPURequestDeviceStatus_Success);
- *reinterpret_cast<wgpu::Device*>(userdata) = wgpu::Device::Acquire(cDevice);
- },
- &device);
- while (!device) {
- wgpuInstanceProcessEvents(nativeInstance->Get());
- }
-
- device.SetUncapturedErrorCallback(
- [](WGPUErrorType, char const* message, void* userdata) {
- dawn::ErrorLog() << message;
- UNREACHABLE();
- },
- nullptr);
-
- device.SetDeviceLostCallback(
- [](WGPUDeviceLostReason reason, char const* message, void* userdata) {
- if (reason == WGPUDeviceLostReason_Undefined) {
- dawn::ErrorLog() << message;
- UNREACHABLE();
+ // Create the device.
+ wgpu::DeviceDescriptor desc = GetDeviceDescriptor();
+ adapter.RequestDevice(
+ &desc,
+ [](WGPURequestDeviceStatus status, WGPUDevice cDevice, char const* message,
+ void* userdata) {
+ ASSERT(status == WGPURequestDeviceStatus_Success);
+ *reinterpret_cast<wgpu::Device*>(userdata) = wgpu::Device::Acquire(cDevice);
+ },
+ &device);
+ while (!device) {
+ wgpuInstanceProcessEvents(nativeInstance->Get());
}
- },
- nullptr);
- return device;
+ device.SetUncapturedErrorCallback(
+ [](WGPUErrorType, char const* message, void* userdata) {
+ dawn::ErrorLog() << message;
+ UNREACHABLE();
+ },
+ nullptr);
+
+ device.SetDeviceLostCallback(
+ [](WGPUDeviceLostReason reason, char const* message, void* userdata) {
+ if (reason == WGPUDeviceLostReason_Undefined) {
+ dawn::ErrorLog() << message;
+ UNREACHABLE();
+ }
+ },
+ nullptr);
+ }
+ mCv.notify_all();
+ } else {
+ // All other threads should wait to proceed once the device is ready.
+ std::unique_lock lock(mMutex);
+ mCv.wait(lock, [this] { return device != nullptr; });
+ }
}
+
+} // namespace dawn
diff --git a/src/dawn/tests/benchmarks/NullDeviceSetup.h b/src/dawn/tests/benchmarks/NullDeviceSetup.h
index d8c7cad..f46775b 100644
--- a/src/dawn/tests/benchmarks/NullDeviceSetup.h
+++ b/src/dawn/tests/benchmarks/NullDeviceSetup.h
@@ -15,17 +15,33 @@
#ifndef DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP
#define DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP
+#include <benchmark/benchmark.h>
#include <dawn/webgpu_cpp.h>
-
-namespace benchmark {
-class State;
-}
+#include <condition_variable>
+#include <mutex>
namespace wgpu {
struct DeviceDescriptor;
-}
+} // namespace wgpu
-void SetupNullBackend(const benchmark::State& state);
-wgpu::Device CreateNullDevice(const wgpu::DeviceDescriptor& desc);
+namespace dawn {
+
+class NullDeviceBenchmarkFixture : public benchmark::Fixture {
+ public:
+ void SetUp(const benchmark::State& state) override;
+
+ protected:
+ wgpu::Adapter adapter = nullptr;
+ wgpu::Device device = nullptr;
+
+ private:
+ virtual wgpu::DeviceDescriptor GetDeviceDescriptor() const = 0;
+
+ // Lock and conditional variable used to synchronize the benchmark global adapter/device.
+ std::mutex mMutex;
+ std::condition_variable mCv;
+};
+
+} // namespace dawn
#endif // DAWN_TESTS_BENCHMARKS_NULLDEVICESETUP
diff --git a/src/dawn/tests/benchmarks/ObjectCreation.cpp b/src/dawn/tests/benchmarks/ObjectCreation.cpp
new file mode 100644
index 0000000..bc5aac0
--- /dev/null
+++ b/src/dawn/tests/benchmarks/ObjectCreation.cpp
@@ -0,0 +1,115 @@
+// Copyright 2023 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <benchmark/benchmark.h>
+#include <dawn/webgpu_cpp.h>
+#include <array>
+#include <vector>
+
+#include "dawn/common/Log.h"
+#include "dawn/tests/benchmarks/NullDeviceSetup.h"
+
+namespace dawn {
+namespace {
+
+// Benchmarks for creation and recreation of objects in Dawn.
+class ObjectCreation : public NullDeviceBenchmarkFixture {
+ protected:
+ ObjectCreation() {
+ // Currently, object creation still needs to be implicitly synchronized even though the
+ // frontend cache is thread-safe. Once other parts of Dawn are thread-safe, i.e. memory
+ // management, these tests should work without synchronization.
+ requiredFeatures.push_back(wgpu::FeatureName::ImplicitDeviceSynchronization);
+ }
+
+ private:
+ wgpu::DeviceDescriptor GetDeviceDescriptor() const override {
+ wgpu::DeviceDescriptor deviceDesc = {};
+ deviceDesc.requiredFeatures = requiredFeatures.data();
+ deviceDesc.requiredFeaturesCount = requiredFeatures.size();
+ return deviceDesc;
+ }
+
+ std::vector<wgpu::FeatureName> requiredFeatures;
+};
+
+BENCHMARK_DEFINE_F(ObjectCreation, SameBindGroupLayout)
+(benchmark::State& state) {
+ std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
+ for (uint32_t i = 0; i < entries.size(); ++i) {
+ entries[i].binding = i;
+ entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
+ entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
+ }
+
+ wgpu::BindGroupLayoutDescriptor bglDesc = {};
+ bglDesc.entryCount = entries.size();
+ bglDesc.entries = entries.data();
+
+ std::vector<wgpu::BindGroupLayout> bgls;
+ bgls.reserve(100000);
+ bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+ for (auto _ : state) {
+ bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+ }
+}
+BENCHMARK_REGISTER_F(ObjectCreation, SameBindGroupLayout)
+ ->Arg(1)
+ ->Arg(12)
+ ->Threads(1)
+ ->Threads(4)
+ ->Threads(16);
+
+BENCHMARK_DEFINE_F(ObjectCreation, UniqueBindGroupLayout)
+(benchmark::State& state) {
+ std::vector<wgpu::BindGroupLayoutEntry> entries(state.range(0));
+ for (uint32_t i = 0; i < entries.size(); ++i) {
+ entries[i].binding = i;
+ entries[i].visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment;
+ entries[i].buffer.type = wgpu::BufferBindingType::Uniform;
+ entries[i].buffer.minBindingSize = 4u;
+ }
+
+ wgpu::BindGroupLayoutDescriptor bglDesc = {};
+ bglDesc.entryCount = entries.size();
+ bglDesc.entries = entries.data();
+
+ // Depending on the thread index, we increment a subset of the binding sizes to ensure we create
+ // a new unique bind group descriptor. For now, this is just the thread_index if it's smaller
+ // than Arg, otherwise its the last index AND the modulo index.
+ std::vector<size_t> entryIndices;
+ if (state.thread_index() < state.range(0)) {
+ entryIndices.push_back(state.thread_index());
+ } else {
+ entryIndices.push_back(state.thread_index() % state.range(0));
+ entryIndices.push_back(state.range(0) - 1);
+ }
+
+ std::vector<wgpu::BindGroupLayout> bgls;
+ bgls.reserve(100000);
+ for (auto _ : state) {
+ for (size_t index : entryIndices) {
+ entries[index].buffer.minBindingSize += 4;
+ }
+ bgls.push_back(device.CreateBindGroupLayout(&bglDesc));
+ }
+}
+BENCHMARK_REGISTER_F(ObjectCreation, UniqueBindGroupLayout)
+ ->Arg(12)
+ ->Threads(1)
+ ->Threads(4)
+ ->Threads(16);
+
+} // namespace
+} // namespace dawn