Make perf test tracing thread-safe
The Metal driver calls command buffer completion callbacks on a
separate thread so enqueueing these trace events needs to be made
thread-safe. In the future, Dawn will probably have other threads
that also require thread-safe tracing.
In this CL, each thread records trace events into its own buffer,
and all buffers concatenated when trace events are acquired.
Bug: dawn:254, dawn:208
Change-Id: I0f1abd404568d838091066a8f27a3bf98fa764b9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/13080
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/tests/perf_tests/DawnPerfTest.cpp b/src/tests/perf_tests/DawnPerfTest.cpp
index c9d683a..20068ee 100644
--- a/src/tests/perf_tests/DawnPerfTest.cpp
+++ b/src/tests/perf_tests/DawnPerfTest.cpp
@@ -65,6 +65,7 @@
}
value["ph"] = &phase[0];
value["id"] = traceEvent.id;
+ value["tid"] = traceEvent.threadId;
value["ts"] = microseconds;
value["pid"] = "Dawn";
@@ -265,6 +266,9 @@
}
// Wait for all GPU commands to complete.
+ // TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting()
+ // which waits for all threads to stop doing work. When we output results, there should
+ // be no additional incoming trace events.
while (signaledFenceValue != fence.GetCompletedValue()) {
mTest->WaitABit();
}
@@ -273,6 +277,9 @@
}
void DawnPerfTestBase::OutputResults() {
+ // TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting()
+ // which waits for all threads to stop doing work. When we output results, there should
+ // be no additional incoming trace events.
DawnPerfTestPlatform* platform =
reinterpret_cast<DawnPerfTestPlatform*>(gTestEnv->GetInstance()->GetPlatform());
diff --git a/src/tests/perf_tests/DawnPerfTestPlatform.cpp b/src/tests/perf_tests/DawnPerfTestPlatform.cpp
index 850b930..016211f 100644
--- a/src/tests/perf_tests/DawnPerfTestPlatform.cpp
+++ b/src/tests/perf_tests/DawnPerfTestPlatform.cpp
@@ -15,6 +15,7 @@
#include "tests/perf_tests/DawnPerfTestPlatform.h"
#include "common/Assert.h"
+#include "common/HashUtils.h"
#include "dawn_platform/tracing/TraceEvent.h"
#include "tests/perf_tests/DawnPerfTest.h"
#include "utils/Timer.h"
@@ -67,6 +68,22 @@
return mTimer->GetAbsoluteTime() - origin;
}
+std::vector<DawnPerfTestPlatform::TraceEvent>* DawnPerfTestPlatform::GetLocalTraceEventBuffer() {
+ // Cache the pointer to the vector in thread_local storage
+ thread_local std::vector<TraceEvent>* traceEventBuffer = nullptr;
+
+ if (traceEventBuffer == nullptr) {
+ auto buffer = std::make_unique<std::vector<TraceEvent>>();
+ traceEventBuffer = buffer.get();
+
+ // Add a new buffer to the map
+ std::lock_guard<std::mutex> guard(mTraceEventBufferMapMutex);
+ mTraceEventBuffers[std::this_thread::get_id()] = std::move(buffer);
+ }
+
+ return traceEventBuffer;
+}
+
// TODO(enga): Simplify this API.
uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase,
const unsigned char* categoryGroupEnabled,
@@ -90,8 +107,13 @@
const TraceCategoryInfo* info =
reinterpret_cast<const TraceCategoryInfo*>(categoryGroupEnabled);
- mTraceEventBuffer.emplace_back(phase, info->category, name, id, timestamp);
- return static_cast<uint64_t>(mTraceEventBuffer.size());
+ std::vector<TraceEvent>* buffer = GetLocalTraceEventBuffer();
+ buffer->emplace_back(phase, info->category, name, id, timestamp);
+
+ size_t hash = 0;
+ HashCombine(&hash, buffer->size());
+ HashCombine(&hash, std::this_thread::get_id());
+ return static_cast<uint64_t>(hash);
}
void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) {
@@ -99,7 +121,27 @@
}
std::vector<DawnPerfTestPlatform::TraceEvent> DawnPerfTestPlatform::AcquireTraceEventBuffer() {
- std::vector<DawnPerfTestPlatform::TraceEvent> buffer = mTraceEventBuffer;
- mTraceEventBuffer.clear();
- return buffer;
+ std::vector<TraceEvent> traceEventBuffer;
+ {
+ // AcquireTraceEventBuffer should only be called when Dawn is completely idle. There should
+ // be no threads inserting trace events.
+ // Right now, this is safe because AcquireTraceEventBuffer is called after waiting on a
+ // fence for all GPU commands to finish executing. When Dawn has multiple background threads
+ // for other work (creation, validation, submission, residency, etc), we will need to ensure
+ // all work on those threads is stopped as well.
+ std::lock_guard<std::mutex> guard(mTraceEventBufferMapMutex);
+ for (auto it = mTraceEventBuffers.begin(); it != mTraceEventBuffers.end(); ++it) {
+ std::ostringstream stream;
+ stream << it->first;
+ std::string threadId = stream.str();
+
+ std::transform(it->second->begin(), it->second->end(),
+ std::back_inserter(traceEventBuffer), [&threadId](TraceEvent ev) {
+ ev.threadId = threadId;
+ return ev;
+ });
+ it->second->clear();
+ }
+ }
+ return traceEventBuffer;
}
diff --git a/src/tests/perf_tests/DawnPerfTestPlatform.h b/src/tests/perf_tests/DawnPerfTestPlatform.h
index b835e34..3fef9e9 100644
--- a/src/tests/perf_tests/DawnPerfTestPlatform.h
+++ b/src/tests/perf_tests/DawnPerfTestPlatform.h
@@ -18,6 +18,9 @@
#include <dawn_platform/DawnPlatform.h>
#include <memory>
+#include <mutex>
+#include <thread>
+#include <unordered_map>
#include <vector>
namespace utils {
@@ -44,6 +47,7 @@
dawn_platform::TraceCategory category;
const char* name = nullptr;
uint64_t id = 0;
+ std::string threadId;
double timestamp = 0;
};
@@ -59,6 +63,8 @@
double MonotonicallyIncreasingTime() override;
+ std::vector<TraceEvent>* GetLocalTraceEventBuffer();
+
uint64_t AddTraceEvent(char phase,
const unsigned char* categoryGroupEnabled,
const char* name,
@@ -74,7 +80,12 @@
std::unique_ptr<utils::Timer> mTimer;
// Trace event record.
- std::vector<TraceEvent> mTraceEventBuffer;
+ // Each uses their own trace event buffer, but the PerfTestPlatform owns all of them in
+ // this map. The map stores all of them so we can iterate through them and flush when
+ // AcquireTraceEventBuffer is called.
+ std::unordered_map<std::thread::id, std::unique_ptr<std::vector<TraceEvent>>>
+ mTraceEventBuffers;
+ std::mutex mTraceEventBufferMapMutex;
};
#endif // TESTS_PERFTESTS_DAWNPERFTESTPLATFORM_H_