Add internal compute pipeline in ResolveQuerySet for Timestamp Query

- Enable internal compute pipeline in ResolveQuerySet for Timestamp
  Query.
- Known issue:
  The user-provided resolve buffer cannot be used as binding resources
  due to missing STORAGE usage. Currently implictly add STORAGE usage
  if the buffer is created with QUERY_RESOLVE usage as a workaround.
  Next we will add STORAGE_INTERNAL usage instead.

Bug: dawn:434
Change-Id: Ie66090de38bc3a04a58986669cd2a128b528f960
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/36222
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
diff --git a/src/dawn_native/Buffer.cpp b/src/dawn_native/Buffer.cpp
index 79ecaba..74d74eb 100644
--- a/src/dawn_native/Buffer.cpp
+++ b/src/dawn_native/Buffer.cpp
@@ -135,6 +135,14 @@
         if (mUsage & wgpu::BufferUsage::Storage) {
             mUsage |= kReadOnlyStorageBuffer;
         }
+
+        // TODO(hao.x.li@intel.com): This is just a workaround to make QueryResolve buffer pass the
+        // binding group validation when used as an internal resource. Instead the buffer made with
+        // QueryResolve usage would implicitly get StorageInternal usage which is only compatible
+        // with StorageBufferInternal binding type in BGL, not StorageBuffer binding type.
+        if (mUsage & wgpu::BufferUsage::QueryResolve) {
+            mUsage |= wgpu::BufferUsage::Storage;
+        }
     }
 
     BufferBase::BufferBase(DeviceBase* device,
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index 4af35ca..329f4ab 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -25,7 +25,9 @@
 #include "dawn_native/ComputePassEncoder.h"
 #include "dawn_native/Device.h"
 #include "dawn_native/ErrorData.h"
+#include "dawn_native/QueryHelper.h"
 #include "dawn_native/QuerySet.h"
+#include "dawn_native/Queue.h"
 #include "dawn_native/RenderPassEncoder.h"
 #include "dawn_native/RenderPipeline.h"
 #include "dawn_native/ValidationUtils_autogen.h"
@@ -390,6 +392,43 @@
             return {};
         }
 
+        void EncodeTimestampsToNanosecondsConversion(CommandEncoder* encoder,
+                                                     QuerySetBase* querySet,
+                                                     uint32_t queryCount,
+                                                     BufferBase* destination,
+                                                     uint64_t destinationOffset) {
+            DeviceBase* device = encoder->GetDevice();
+
+            std::vector<uint32_t> availability;
+            auto it = encoder->GetQueryAvailabilityMap().find(querySet);
+            if (it != encoder->GetQueryAvailabilityMap().end()) {
+                availability = {it->second.begin(), it->second.end()};
+            } else {
+                availability.resize(querySet->GetQueryCount());
+            }
+
+            // Timestamp availability storage buffer
+            BufferDescriptor availabilityDesc = {};
+            availabilityDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst;
+            availabilityDesc.size = querySet->GetQueryCount() * sizeof(uint32_t);
+            Ref<BufferBase> availabilityBuffer =
+                AcquireRef(device->CreateBuffer(&availabilityDesc));
+            device->GetDefaultQueue()->WriteBuffer(availabilityBuffer.Get(), 0, availability.data(),
+                                                   querySet->GetQueryCount() * sizeof(uint32_t));
+
+            // Timestamp params uniform buffer
+            TimestampParams params = {queryCount, static_cast<uint32_t>(destinationOffset),
+                                      device->GetTimestampPeriodInNS()};
+            BufferDescriptor parmsDesc = {};
+            parmsDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+            parmsDesc.size = sizeof(params);
+            Ref<BufferBase> paramsBuffer = AcquireRef(device->CreateBuffer(&parmsDesc));
+            device->GetDefaultQueue()->WriteBuffer(paramsBuffer.Get(), 0, &params, sizeof(params));
+
+            EncodeConvertTimestampsToNanoseconds(encoder, destination, availabilityBuffer.Get(),
+                                                 paramsBuffer.Get());
+        }
+
     }  // namespace
 
     CommandEncoder::CommandEncoder(DeviceBase* device, const CommandEncoderDescriptor*)
@@ -791,6 +830,12 @@
             cmd->destination = destination;
             cmd->destinationOffset = destinationOffset;
 
+            // Encode internal compute pipeline for timestamp query
+            if (querySet->GetQueryType() == wgpu::QueryType::Timestamp) {
+                EncodeTimestampsToNanosecondsConversion(this, querySet, queryCount, destination,
+                                                        destinationOffset);
+            }
+
             return {};
         });
     }
diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp
index 00f76cc..b927329 100644
--- a/src/tests/end2end/BufferZeroInitTests.cpp
+++ b/src/tests/end2end/BufferZeroInitTests.cpp
@@ -1206,6 +1206,10 @@
     // Skip if timestamp extension is not supported on device
     DAWN_SKIP_TEST_IF(!SupportsExtensions({"timestamp_query"}));
 
+    // TODO(crbug.com/tint/255, crbug.com/tint/256, crbug.com/tint/400, crbug.com/tint/417):
+    // Skip use_tint_generator due to runtime array not supported in WGSL
+    DAWN_SKIP_TEST_IF(HasToggleEnabled("use_tint_generator"));
+
     constexpr uint64_t kBufferSize = 16u;
     constexpr wgpu::BufferUsage kBufferUsage =
         wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopyDst;
diff --git a/src/tests/end2end/QueryTests.cpp b/src/tests/end2end/QueryTests.cpp
index 5406b08..ff64300 100644
--- a/src/tests/end2end/QueryTests.cpp
+++ b/src/tests/end2end/QueryTests.cpp
@@ -284,6 +284,10 @@
 
         // Skip all tests if timestamp extension is not supported
         DAWN_SKIP_TEST_IF(!SupportsExtensions({"timestamp_query"}));
+
+        // TODO(crbug.com/tint/255, crbug.com/tint/256, crbug.com/tint/400, crbug.com/tint/417):
+        // Skip use_tint_generator due to runtime array not supported in WGSL
+        DAWN_SKIP_TEST_IF(HasToggleEnabled("use_tint_generator"));
     }
 
     std::vector<const char*> GetRequiredExtensions() override {
diff --git a/src/tests/unittests/validation/BindGroupValidationTests.cpp b/src/tests/unittests/validation/BindGroupValidationTests.cpp
index 5b07aeb..99ba07c 100644
--- a/src/tests/unittests/validation/BindGroupValidationTests.cpp
+++ b/src/tests/unittests/validation/BindGroupValidationTests.cpp
@@ -434,6 +434,23 @@
     ASSERT_DEVICE_ERROR(utils::MakeBindGroup(device, layout, {{0, mUBO, 0, 256}}));
 }
 
+// Check that a resolve buffer with internal storge usage cannot be used as SSBO
+// TODO(hao.x.li@intel.com): Disable until internal storage usage is implemented
+TEST_F(BindGroupValidationTest, DISABLED_BufferUsageQueryResolve) {
+    wgpu::BindGroupLayout layout = utils::MakeBindGroupLayout(
+        device, {{0, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Storage}});
+
+    // Control case: using a buffer with the storage usage works
+    utils::MakeBindGroup(device, layout, {{0, mSSBO, 0, 256}});
+
+    // Using a resolve buffer with the internal storage usage fails
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 1024;
+    descriptor.usage = wgpu::BufferUsage::QueryResolve;
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+    ASSERT_DEVICE_ERROR(utils::MakeBindGroup(device, layout, {{0, buffer, 0, 256}}));
+}
+
 // Tests constraints on the buffer offset for bind groups.
 TEST_F(BindGroupValidationTest, BufferOffsetAlignment) {
     wgpu::BindGroupLayout layout = utils::MakeBindGroupLayout(