Improve timing quality in Dawn perf test harness

 - Use compute pass timestamps in ShaderRobustnessPerf.
   These are more accurate than WriteTimestamp.
 - Accumulate the GPU time that is reported. As GPU times are measured,
   they need to be accumulated. Then, the harness will report the average
   time across the steps.

Change-Id: I2f61e93c403591a9e5eefc5a8072c67bb0b30d65
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/178342
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: David Neto <dneto@google.com>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn/tests/perf_tests/DawnPerfTest.cpp b/src/dawn/tests/perf_tests/DawnPerfTest.cpp
index 95c9269..8e45625 100644
--- a/src/dawn/tests/perf_tests/DawnPerfTest.cpp
+++ b/src/dawn/tests/perf_tests/DawnPerfTest.cpp
@@ -249,6 +249,7 @@
 
     mNumStepsPerformed = 0;
     mCpuTime = 0;
+    mGPUTime = std::nullopt;
     mRunning = true;
 
     uint64_t finishedIterations = 0;
@@ -373,8 +374,12 @@
     }
 }
 
-void DawnPerfTestBase::SetGPUTime(double GPUTime) {
-    mGPUTime = GPUTime;
+void DawnPerfTestBase::AddGPUTime(double time) {
+    if (!mGPUTime.has_value()) {
+        mGPUTime = time;
+    } else {
+        *mGPUTime += time;
+    }
 }
 
 void DawnPerfTestBase::PrintPerIterationResultFromSeconds(const std::string& trace,
diff --git a/src/dawn/tests/perf_tests/DawnPerfTest.h b/src/dawn/tests/perf_tests/DawnPerfTest.h
index fb95263..2ad3379 100644
--- a/src/dawn/tests/perf_tests/DawnPerfTest.h
+++ b/src/dawn/tests/perf_tests/DawnPerfTest.h
@@ -106,7 +106,7 @@
                      unsigned int value,
                      const std::string& units,
                      bool important) const;
-    void SetGPUTime(double GPUTime);
+    void AddGPUTime(double GPUTime);
 
   private:
     void DoRunLoop(double maxRunTime);
@@ -166,6 +166,18 @@
 
     void RecordEndTimestampAndResolveQuerySet(wgpu::CommandEncoder encoder) {
         encoder.WriteTimestamp(mTimestampQuerySet, 1);
+        ResolveTimestamps(encoder);
+    }
+
+    wgpu::ComputePassTimestampWrites GetComputePassTimestampWrites() const {
+        wgpu::ComputePassTimestampWrites timestampWrites;
+        timestampWrites.querySet = mTimestampQuerySet;
+        timestampWrites.beginningOfPassWriteIndex = 0;
+        timestampWrites.endOfPassWriteIndex = 1;
+        return timestampWrites;
+    }
+
+    void ResolveTimestamps(wgpu::CommandEncoder encoder) {
         encoder.ResolveQuerySet(mTimestampQuerySet, 0, kTimestampQueryCount, mResolveBuffer, 0);
         encoder.CopyBufferToBuffer(mResolveBuffer, 0, mReadbackBuffer, 0,
                                    sizeof(uint64_t) * kTimestampQueryCount);
@@ -186,7 +198,7 @@
             static_cast<const uint64_t*>(mReadbackBuffer.GetConstMappedRange());
         ASSERT_EQ(2u, kTimestampQueryCount);
         double gpuTimeElapsed = (readbackValues[1] - readbackValues[0]) / 1e9;
-        SetGPUTime(gpuTimeElapsed);
+        AddGPUTime(gpuTimeElapsed);
         mReadbackBuffer.Unmap();
     }
 
diff --git a/src/dawn/tests/perf_tests/ShaderRobustnessPerf.cpp b/src/dawn/tests/perf_tests/ShaderRobustnessPerf.cpp
index 48d5dde..8057e4f 100644
--- a/src/dawn/tests/perf_tests/ShaderRobustnessPerf.cpp
+++ b/src/dawn/tests/perf_tests/ShaderRobustnessPerf.cpp
@@ -509,13 +509,18 @@
 }
 
 void ShaderRobustnessPerf::Step() {
+    bool useTimestamps = SupportsTimestampQuery();
+
     wgpu::CommandBuffer commands;
     {
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
-        if (SupportsTimestampQuery()) {
-            RecordBeginTimestamp(encoder);
+        wgpu::ComputePassDescriptor computePassDesc;
+        wgpu::ComputePassTimestampWrites timestampWrites;
+        if (useTimestamps) {
+            timestampWrites = GetComputePassTimestampWrites();
+            computePassDesc.timestampWrites = &timestampWrites;
         }
-        wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+        wgpu::ComputePassEncoder pass = encoder.BeginComputePass(&computePassDesc);
         pass.SetPipeline(mPipeline);
         pass.SetBindGroup(0, mBindGroup);
         for (unsigned int i = 0; i < kNumIterations; ++i) {
@@ -523,8 +528,8 @@
                                     ceil(static_cast<float>(mDimAOuter) / float{kTileSize}), 1);
         }
         pass.End();
-        if (SupportsTimestampQuery()) {
-            RecordEndTimestampAndResolveQuerySet(encoder);
+        if (useTimestamps) {
+            ResolveTimestamps(encoder);
         }
 
         commands = encoder.Finish();
@@ -532,7 +537,7 @@
 
     queue.Submit(1, &commands);
 
-    if (SupportsTimestampQuery()) {
+    if (useTimestamps) {
         ComputeGPUElapsedTime();
     }
 }