Fix query index of availability in timestamp shader Currently we use offset to calculate the index of the queries in timestamp compute shader, which is incorrect. The offset is the buffer offset where we start to write the query results, and has nothing to do with query index. In the query availability detection, the query index should be based on the parameter firstQuery. Add new test for resolving a timestamp query twice to the same destination buffer with potentially overlapping ranges. Bug: dawn:434 Change-Id: I2b5c5b192cf5d987ac48187e8240a25937957f51 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/50760 Reviewed-by: Austin Eng <enga@chromium.org> Commit-Queue: Hao Li <hao.x.li@intel.com>

commit: 880a3d6311971046304506bd18dc2e936447cb15 [log] [tgz]
author: Hao Li <hao.x.li@intel.com> Tue May 18 01:13:08 2021 +0000
committer: Commit Bot service account <commit-bot@chromium.org> Tue May 18 01:13:08 2021 +0000
tree: d4f700c5921cece383299a8b111d8884654a165b
parent: 21ce5d2965485853c2f87e386fd356689486b7c2 [diff]
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index fa1013f..05d0656 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp

@@ -424,6 +424,7 @@
 
         MaybeError EncodeTimestampsToNanosecondsConversion(CommandEncoder* encoder,
                                                            QuerySetBase* querySet,
+                                                           uint32_t firstQuery,
                                                            uint32_t queryCount,
                                                            BufferBase* destination,
                                                            uint64_t destinationOffset) {
@@ -447,7 +448,8 @@
                                                      availability.size() * sizeof(uint32_t)));
 
             // Timestamp params uniform buffer
-            TimestampParams params = {queryCount, static_cast<uint32_t>(destinationOffset),
+            TimestampParams params = {firstQuery, queryCount,
+                                      static_cast<uint32_t>(destinationOffset),
                                       device->GetTimestampPeriodInNS()};
 
             BufferDescriptor parmsDesc = {};
@@ -882,8 +884,8 @@
 
             // Encode internal compute pipeline for timestamp query
             if (querySet->GetQueryType() == wgpu::QueryType::Timestamp) {
-                DAWN_TRY(EncodeTimestampsToNanosecondsConversion(this, querySet, queryCount, destination,
-                                                        destinationOffset));
+                DAWN_TRY(EncodeTimestampsToNanosecondsConversion(
+                    this, querySet, firstQuery, queryCount, destination, destinationOffset));
             }
 
             return {};

diff --git a/src/dawn_native/QueryHelper.cpp b/src/dawn_native/QueryHelper.cpp
index efc851b..b9f58c5 100644
--- a/src/dawn_native/QueryHelper.cpp
+++ b/src/dawn_native/QueryHelper.cpp

@@ -28,9 +28,10 @@
     namespace {
 
         // Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader
-        static_assert(offsetof(dawn_native::TimestampParams, count) == 0, "");
-        static_assert(offsetof(dawn_native::TimestampParams, offset) == 4, "");
-        static_assert(offsetof(dawn_native::TimestampParams, period) == 8, "");
+        static_assert(offsetof(dawn_native::TimestampParams, first) == 0, "");
+        static_assert(offsetof(dawn_native::TimestampParams, count) == 4, "");
+        static_assert(offsetof(dawn_native::TimestampParams, offset) == 8, "");
+        static_assert(offsetof(dawn_native::TimestampParams, period) == 12, "");
 
         static const char sConvertTimestampsToNanoseconds[] = R"(
             struct Timestamp {
@@ -47,6 +48,7 @@
             };
 
             [[block]] struct TimestampParams {
+                first  : u32;
                 count  : u32;
                 offset : u32;
                 period : f32;
@@ -70,7 +72,7 @@
                 var timestamp : Timestamp = timestamps.t[index];
 
                 // Return 0 for the unavailable value.
-                if (availability.v[index] == 0u) {
+                if (availability.v[GlobalInvocationID.x + params.first] == 0u) {
                     timestamps.t[index].low = 0u;
                     timestamps.t[index].high = 0u;
                     return;

diff --git a/src/dawn_native/QueryHelper.h b/src/dawn_native/QueryHelper.h
index 82ed5e1..90f3398 100644
--- a/src/dawn_native/QueryHelper.h
+++ b/src/dawn_native/QueryHelper.h

@@ -24,6 +24,7 @@
     class CommandEncoder;
 
     struct TimestampParams {
+        uint32_t first;
         uint32_t count;
         uint32_t offset;
         float period;

diff --git a/src/tests/end2end/QueryTests.cpp b/src/tests/end2end/QueryTests.cpp
index 861a3f1..55aab8a 100644
--- a/src/tests/end2end/QueryTests.cpp
+++ b/src/tests/end2end/QueryTests.cpp

@@ -755,10 +755,10 @@
 
 // Test resolving timestamp query to one slot in the buffer
 TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
-    // TODO(hao.x.li@intel.com): Fail to resolve query to buffer with offset on Windows Vulkan and
-    // Metal on Intel platforms, need investigation.
-    DAWN_SKIP_TEST_IF(IsWindows() && IsIntel() && IsVulkan());
-    DAWN_SKIP_TEST_IF(IsIntel() && IsMetal());
+    // TODO(hao.x.li@intel.com): Fails on Intel Windows Vulkan due to a driver issue that
+    // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it util
+    // the issue is fixed.
+    DAWN_SKIP_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
 
     // TODO(hao.x.li@intel.com): Crash occurs if we only call WriteTimestamp in a command encoder
     // without any copy commands on Metal on AMD GPU. See https://crbug.com/dawn/545.
@@ -774,7 +774,6 @@
         wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
         encoder.WriteTimestamp(querySet, 0);
-        encoder.WriteTimestamp(querySet, 1);
         encoder.ResolveQuerySet(querySet, 0, 1, destination, 0);
         wgpu::CommandBuffer commands = encoder.Finish();
         queue.Submit(1, &commands);
@@ -789,7 +788,6 @@
         wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
         encoder.WriteTimestamp(querySet, 0);
-        encoder.WriteTimestamp(querySet, 1);
         encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t));
         wgpu::CommandBuffer commands = encoder.Finish();
         queue.Submit(1, &commands);
@@ -799,6 +797,31 @@
     }
 }
 
+// Test resolving a query set twice into the same destination buffer with potentially overlapping
+// ranges
+TEST_P(TimestampQueryTests, ResolveTwiceToSameBuffer) {
+    // TODO(hao.x.li@intel.com): Fails on Intel Windows Vulkan due to a driver issue that
+    // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it util
+    // the issue is fixed.
+    DAWN_SKIP_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
+
+    constexpr uint32_t kQueryCount = 3;
+
+    wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);
+    wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.WriteTimestamp(querySet, 0);
+    encoder.WriteTimestamp(querySet, 1);
+    encoder.WriteTimestamp(querySet, 2);
+    encoder.ResolveQuerySet(querySet, 0, 2, destination, 0);
+    encoder.ResolveQuerySet(querySet, 1, 2, destination, sizeof(uint64_t));
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation);
+}
+
 DAWN_INSTANTIATE_TEST(TimestampQueryTests,
                       D3D12Backend(),
                       MetalBackend(),

diff --git a/src/tests/white_box/QueryInternalShaderTests.cpp b/src/tests/white_box/QueryInternalShaderTests.cpp
index 11b9a12..4dad29c 100644
--- a/src/tests/white_box/QueryInternalShaderTests.cpp
+++ b/src/tests/white_box/QueryInternalShaderTests.cpp

@@ -125,12 +125,13 @@
         wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer timestampsBuffer = device.CreateBuffer(&timestampsDesc);
 
-    auto PrepareExpectedResults = [&](uint32_t offset) -> std::vector<uint64_t> {
+    auto PrepareExpectedResults = [&](uint32_t first, uint32_t count,
+                                      uint32_t offset) -> std::vector<uint64_t> {
         ASSERT(offset % sizeof(uint64_t) == 0);
         std::vector<uint64_t> expected;
         for (size_t i = 0; i < kTimestampCount; i++) {
-            // The data before offset remains as it is
-            if (i < offset / sizeof(uint64_t)) {
+            // The data out of the rang [first, first + count] remains as it is
+            if (i < first || i >= first + count) {
                 expected.push_back(timestamps[i]);
                 continue;
             }
@@ -149,7 +150,9 @@
     };
 
     // Convert timestamps in timestamps buffer with offset 0
+    // Test for ResolveQuerySet(querySet, 0, kTimestampCount, timestampsBuffer, 0)
     {
+        constexpr uint32_t kFirst = 0u;
         constexpr uint32_t kOffset = 0u;
 
         // Write orignal timestamps to timestamps buffer
@@ -157,7 +160,7 @@
                           kTimestampCount * sizeof(uint64_t));
 
         // The params uniform buffer
-        dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod};
+        dawn_native::TimestampParams params = {kFirst, kTimestampCount, kOffset, kPeriod};
         wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
                                                                 wgpu::BufferUsage::Uniform);
 
@@ -168,13 +171,15 @@
         queue.Submit(1, &commands);
 
         // Expected results: Timestamp * period
-        std::vector<uint64_t> expected = PrepareExpectedResults(kOffset);
+        std::vector<uint64_t> expected = PrepareExpectedResults(0, kTimestampCount, kOffset);
         EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
                       new InternalShaderExpectation(expected.data(), kTimestampCount));
     }
 
     // Convert timestamps in timestamps buffer with offset 8
+    // Test for ResolveQuerySet(querySet, 1, kTimestampCount - 1, timestampsBuffer, 8)
     {
+        constexpr uint32_t kFirst = 1u;
         constexpr uint32_t kOffset = 8u;
 
         // Write orignal timestamps to timestamps buffer
@@ -182,7 +187,7 @@
                           kTimestampCount * sizeof(uint64_t));
 
         // The params uniform buffer
-        dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod};
+        dawn_native::TimestampParams params = {kFirst, kTimestampCount - kFirst, kOffset, kPeriod};
         wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
                                                                 wgpu::BufferUsage::Uniform);
 
@@ -193,7 +198,36 @@
         queue.Submit(1, &commands);
 
         // Expected results: Timestamp * period
-        std::vector<uint64_t> expected = PrepareExpectedResults(kOffset);
+        std::vector<uint64_t> expected =
+            PrepareExpectedResults(kFirst, kTimestampCount - kFirst, kOffset);
+        EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
+                      new InternalShaderExpectation(expected.data(), kTimestampCount));
+    }
+
+    // Convert partial timestamps in timestamps buffer with offset 8
+    // Test for ResolveQuerySet(querySet, 1, 3, timestampsBuffer, 8)
+    {
+        constexpr uint32_t kFirst = 1u;
+        constexpr uint32_t kCount = 3u;
+        constexpr uint32_t kOffset = 8u;
+
+        // Write orignal timestamps to timestamps buffer
+        queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(),
+                          kTimestampCount * sizeof(uint64_t));
+
+        // The params uniform buffer
+        dawn_native::TimestampParams params = {kFirst, kCount, kOffset, kPeriod};
+        wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
+                                                                wgpu::BufferUsage::Uniform);
+
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer,
+                                             paramsBuffer);
+        wgpu::CommandBuffer commands = encoder.Finish();
+        queue.Submit(1, &commands);
+
+        // Expected results: Timestamp * period
+        std::vector<uint64_t> expected = PrepareExpectedResults(kFirst, kCount, kOffset);
         EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
                       new InternalShaderExpectation(expected.data(), kTimestampCount));
     }
commit	880a3d6311971046304506bd18dc2e936447cb15	[log] [tgz]
author	Hao Li <hao.x.li@intel.com>	Tue May 18 01:13:08 2021 +0000
committer	Commit Bot service account <commit-bot@chromium.org>	Tue May 18 01:13:08 2021 +0000
tree	d4f700c5921cece383299a8b111d8884654a165b
parent	21ce5d2965485853c2f87e386fd356689486b7c2 [diff]