Query API: Timestamp Query on Vulkan

- Add WriteTimestamp and ResolveQuerySet on Vulkan
- Enable end2end tests of timestamp query on Vulkan
- Lazy initialize the distination buffer in ResolveQuerySet

Bug: dawn:434
Change-Id: Idbd117d6c198f6726ed922c08d030bb6fbae8be6
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27540
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp
index 21f7db2..c31db7a 100644
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@@ -51,6 +51,12 @@
             if (usage & wgpu::BufferUsage::Indirect) {
                 flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
             }
+            if (usage & wgpu::BufferUsage::QueryResolve) {
+                // VK_BUFFER_USAGE_TRANSFER_DST_BIT is required by vkCmdCopyQueryPoolResults
+                // but we also add VK_BUFFER_USAGE_STORAGE_BUFFER_BIT because the queries will
+                // be post-processed by a compute shader and written to this buffer.
+                flags |= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+            }
 
             return flags;
         }
@@ -76,6 +82,9 @@
             if (usage & wgpu::BufferUsage::Indirect) {
                 flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
             }
+            if (usage & wgpu::BufferUsage::QueryResolve) {
+                flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+            }
 
             return flags;
         }
@@ -110,6 +119,9 @@
             if (usage & wgpu::BufferUsage::Indirect) {
                 flags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
             }
+            if (usage & wgpu::BufferUsage::QueryResolve) {
+                flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
+            }
 
             return flags;
         }
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index b238fc0..5c22de0 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -27,6 +27,7 @@
 #include "dawn_native/vulkan/DeviceVk.h"
 #include "dawn_native/vulkan/FencedDeleter.h"
 #include "dawn_native/vulkan/PipelineLayoutVk.h"
+#include "dawn_native/vulkan/QuerySetVk.h"
 #include "dawn_native/vulkan/RenderPassCache.h"
 #include "dawn_native/vulkan/RenderPipelineVk.h"
 #include "dawn_native/vulkan/TextureVk.h"
@@ -400,6 +401,26 @@
 
             return {};
         }
+
+        void ResetUsedQuerySets(Device* device,
+                                VkCommandBuffer commands,
+                                const std::set<QuerySetBase*>& usedQuerySets) {
+            // TODO(hao.x.li@intel.com): Reset the queries based on the used indexes.
+            for (QuerySetBase* querySet : usedQuerySets) {
+                device->fn.CmdResetQueryPool(commands, ToBackend(querySet)->GetHandle(), 0,
+                                             querySet->GetQueryCount());
+            }
+        }
+
+        void RecordWriteTimestampCmd(CommandRecordingContext* recordingContext,
+                                     Device* device,
+                                     WriteTimestampCmd* cmd) {
+            VkCommandBuffer commands = recordingContext->commandBuffer;
+            QuerySet* querySet = ToBackend(cmd->querySet.Get());
+
+            device->fn.CmdWriteTimestamp(commands, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                         querySet->GetHandle(), cmd->queryIndex);
+        }
     }  // anonymous namespace
 
     // static
@@ -533,6 +554,9 @@
         const std::vector<PassResourceUsage>& passResourceUsages = GetResourceUsages().perPass;
         size_t nextPassNumber = 0;
 
+        // QuerySet must be reset between uses.
+        ResetUsedQuerySets(device, commands, GetResourceUsages().usedQuerySets);
+
         Command type;
         while (mCommands.NextCommandId(&type)) {
             switch (type) {
@@ -730,11 +754,27 @@
                 }
 
                 case Command::ResolveQuerySet: {
-                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                    ResolveQuerySetCmd* cmd = mCommands.NextCommand<ResolveQuerySetCmd>();
+                    QuerySet* querySet = ToBackend(cmd->querySet.Get());
+                    Buffer* destination = ToBackend(cmd->destination.Get());
+
+                    destination->EnsureDataInitializedAsDestination(
+                        recordingContext, cmd->destinationOffset,
+                        cmd->queryCount * sizeof(uint64_t));
+                    destination->TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst);
+
+                    device->fn.CmdCopyQueryPoolResults(
+                        commands, querySet->GetHandle(), cmd->firstQuery, cmd->queryCount,
+                        destination->GetHandle(), cmd->destinationOffset, sizeof(uint64_t),
+                        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+                    break;
                 }
 
                 case Command::WriteTimestamp: {
-                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                    WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
+
+                    RecordWriteTimestampCmd(recordingContext, device, cmd);
+                    break;
                 }
 
                 default: {
@@ -857,7 +897,10 @@
                 }
 
                 case Command::WriteTimestamp: {
-                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                    WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
+
+                    RecordWriteTimestampCmd(recordingContext, device, cmd);
+                    break;
                 }
 
                 default: {
@@ -1130,7 +1173,10 @@
                 }
 
                 case Command::WriteTimestamp: {
-                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                    WriteTimestampCmd* cmd = mCommands.NextCommand<WriteTimestampCmd>();
+
+                    RecordWriteTimestampCmd(recordingContext, device, cmd);
+                    break;
                 }
 
                 default: {
diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp
index abb181c..c8a07e0 100644
--- a/src/tests/end2end/BufferZeroInitTests.cpp
+++ b/src/tests/end2end/BufferZeroInitTests.cpp
@@ -1174,9 +1174,8 @@
     // Timestamp query is not supported on OpenGL
     DAWN_SKIP_TEST_IF(IsOpenGL());
 
-    // TODO(hao.x.li@intel.com): Remove it after timestamp query is implementated on Vulkan and
-    // Metal
-    DAWN_SKIP_TEST_IF(IsVulkan() || IsMetal());
+    // TODO(hao.x.li@intel.com): Remove it after timestamp query is implementated on Metal
+    DAWN_SKIP_TEST_IF(IsMetal());
 
     // Skip if timestamp extension is not supported on device
     DAWN_SKIP_TEST_IF(!SupportsExtensions({"timestamp_query"}));
diff --git a/src/tests/end2end/QueryTests.cpp b/src/tests/end2end/QueryTests.cpp
index 00522af..943ead2 100644
--- a/src/tests/end2end/QueryTests.cpp
+++ b/src/tests/end2end/QueryTests.cpp
@@ -194,6 +194,9 @@
 
 // Test resolving timestamp query to one slot in the buffer
 TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
+    // TODO(hao.x.li@intel.com): Failed on old Intel Vulkan driver on Windows, need investigation.
+    DAWN_SKIP_TEST_IF(IsWindows() && IsIntel() && IsVulkan());
+
     constexpr uint32_t kQueryCount = 2;
     constexpr uint64_t kZero = 0;
 
@@ -229,4 +232,4 @@
     }
 }
 
-DAWN_INSTANTIATE_TEST(TimestampQueryTests, D3D12Backend());
+DAWN_INSTANTIATE_TEST(TimestampQueryTests, D3D12Backend(), VulkanBackend());