Query API: ResolveQuerySet

Add ResovleQuerySet on CommandEncoder and its validation tests.

Bug: dawn:434
Change-Id: Ibba166dd11e15430cd5f6647676a47ce67481844
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/24303
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
diff --git a/dawn.json b/dawn.json
index 2e3d4b5..3005dfa 100644
--- a/dawn.json
+++ b/dawn.json
@@ -290,7 +290,8 @@
             {"value": 32, "name": "vertex"},
             {"value": 64, "name": "uniform"},
             {"value": 128, "name": "storage"},
-            {"value": 256, "name": "indirect"}
+            {"value": 256, "name": "indirect"},
+            {"value": 512, "name": "query resolve"}
         ]
     },
     "char": {
@@ -412,6 +413,16 @@
                 ]
             },
             {
+                "name": "resolve query set",
+                "args": [
+                    {"name": "query set", "type": "query set"},
+                    {"name": "first query", "type": "uint32_t"},
+                    {"name": "query count", "type": "uint32_t"},
+                    {"name": "destination", "type": "buffer"},
+                    {"name": "destination offset", "type": "uint64_t"}
+                ]
+            },
+            {
                 "name": "write timestamp",
                 "args": [
                     {"name": "query set", "type": "query set"},
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index f228cb3..da99bbd 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -435,6 +435,44 @@
             return fixedView;
         }
 
+        MaybeError ValidateQuerySetResolve(const QuerySetBase* querySet,
+                                           uint32_t firstQuery,
+                                           uint32_t queryCount,
+                                           const BufferBase* destination,
+                                           uint64_t destinationOffset) {
+            if (firstQuery >= querySet->GetQueryCount()) {
+                return DAWN_VALIDATION_ERROR("Query index out of bounds");
+            }
+
+            if (queryCount > querySet->GetQueryCount() - firstQuery) {
+                return DAWN_VALIDATION_ERROR(
+                    "The sum of firstQuery and queryCount exceeds the number of queries in query "
+                    "set");
+            }
+
+            // TODO(hao.x.li@intel.com): Validate that the queries between [firstQuery, firstQuery +
+            // queryCount - 1] must be available(written by query operations).
+
+            // The destinationOffset must be a multiple of 8 bytes on D3D12 and Vulkan
+            if (destinationOffset % 8 != 0) {
+                return DAWN_VALIDATION_ERROR(
+                    "The alignment offset into the destination buffer must be a multiple of 8 "
+                    "bytes");
+            }
+
+            uint64_t bufferSize = destination->GetSize();
+            // The destination buffer must have enough storage, from destination offset, to contain
+            // the result of resolved queries
+            bool fitsInBuffer = destinationOffset <= bufferSize &&
+                                (static_cast<uint64_t>(queryCount) * sizeof(uint64_t) <=
+                                 (bufferSize - destinationOffset));
+            if (!fitsInBuffer) {
+                return DAWN_VALIDATION_ERROR("The resolved query data would overflow the buffer");
+            }
+
+            return {};
+        }
+
     }  // namespace
 
     CommandEncoder::CommandEncoder(DeviceBase* device, const CommandEncoderDescriptor*)
@@ -783,6 +821,37 @@
         });
     }
 
+    void CommandEncoder::ResolveQuerySet(QuerySetBase* querySet,
+                                         uint32_t firstQuery,
+                                         uint32_t queryCount,
+                                         BufferBase* destination,
+                                         uint64_t destinationOffset) {
+        mEncodingContext.TryEncode(this, [&](CommandAllocator* allocator) -> MaybeError {
+            if (GetDevice()->IsValidationEnabled()) {
+                DAWN_TRY(GetDevice()->ValidateObject(querySet));
+                DAWN_TRY(GetDevice()->ValidateObject(destination));
+
+                DAWN_TRY(ValidateQuerySetResolve(querySet, firstQuery, queryCount, destination,
+                                                 destinationOffset));
+
+                DAWN_TRY(ValidateCanUseAs(destination, wgpu::BufferUsage::QueryResolve));
+
+                TrackUsedQuerySet(querySet);
+                mTopLevelBuffers.insert(destination);
+            }
+
+            ResolveQuerySetCmd* cmd =
+                allocator->Allocate<ResolveQuerySetCmd>(Command::ResolveQuerySet);
+            cmd->querySet = querySet;
+            cmd->firstQuery = firstQuery;
+            cmd->queryCount = queryCount;
+            cmd->destination = destination;
+            cmd->destinationOffset = destinationOffset;
+
+            return {};
+        });
+    }
+
     void CommandEncoder::WriteTimestamp(QuerySetBase* querySet, uint32_t queryIndex) {
         mEncodingContext.TryEncode(this, [&](CommandAllocator* allocator) -> MaybeError {
             if (GetDevice()->IsValidationEnabled()) {
@@ -884,6 +953,11 @@
                     break;
                 }
 
+                case Command::ResolveQuerySet: {
+                    commands->NextCommand<ResolveQuerySetCmd>();
+                    break;
+                }
+
                 case Command::WriteTimestamp: {
                     commands->NextCommand<WriteTimestampCmd>();
                     break;
diff --git a/src/dawn_native/CommandEncoder.h b/src/dawn_native/CommandEncoder.h
index 9caa1b2..2fe80104 100644
--- a/src/dawn_native/CommandEncoder.h
+++ b/src/dawn_native/CommandEncoder.h
@@ -60,6 +60,11 @@
         void PopDebugGroup();
         void PushDebugGroup(const char* groupLabel);
 
+        void ResolveQuerySet(QuerySetBase* querySet,
+                             uint32_t firstQuery,
+                             uint32_t queryCount,
+                             BufferBase* destination,
+                             uint64_t destinationOffset);
         void WriteTimestamp(QuerySetBase* querySet, uint32_t queryIndex);
 
         CommandBufferBase* Finish(const CommandBufferDescriptor* descriptor);
diff --git a/src/dawn_native/Commands.cpp b/src/dawn_native/Commands.cpp
index 7da04ad..9528e82 100644
--- a/src/dawn_native/Commands.cpp
+++ b/src/dawn_native/Commands.cpp
@@ -128,6 +128,11 @@
                     cmd->~PushDebugGroupCmd();
                     break;
                 }
+                case Command::ResolveQuerySet: {
+                    ResolveQuerySetCmd* cmd = commands->NextCommand<ResolveQuerySetCmd>();
+                    cmd->~ResolveQuerySetCmd();
+                    break;
+                }
                 case Command::SetComputePipeline: {
                     SetComputePipelineCmd* cmd = commands->NextCommand<SetComputePipelineCmd>();
                     cmd->~SetComputePipelineCmd();
@@ -266,6 +271,11 @@
                 break;
             }
 
+            case Command::ResolveQuerySet: {
+                commands->NextCommand<ResolveQuerySetCmd>();
+                break;
+            }
+
             case Command::SetComputePipeline:
                 commands->NextCommand<SetComputePipelineCmd>();
                 break;
diff --git a/src/dawn_native/Commands.h b/src/dawn_native/Commands.h
index 70214ff..85e462b 100644
--- a/src/dawn_native/Commands.h
+++ b/src/dawn_native/Commands.h
@@ -51,6 +51,7 @@
         InsertDebugMarker,
         PopDebugGroup,
         PushDebugGroup,
+        ResolveQuerySet,
         SetComputePipeline,
         SetRenderPipeline,
         SetStencilReference,
@@ -186,6 +187,14 @@
         uint32_t length;
     };
 
+    struct ResolveQuerySetCmd {
+        Ref<QuerySetBase> querySet;
+        uint32_t firstQuery;
+        uint32_t queryCount;
+        Ref<BufferBase> destination;
+        uint64_t destinationOffset;
+    };
+
     struct SetComputePipelineCmd {
         Ref<ComputePipelineBase> pipeline;
     };
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index 834b57a..1c301d2 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -836,6 +836,10 @@
                     break;
                 }
 
+                case Command::ResolveQuerySet: {
+                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                }
+
                 case Command::WriteTimestamp: {
                     return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
                 }
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 97c0ad1..0e4d439 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -716,6 +716,10 @@
                     break;
                 }
 
+                case Command::ResolveQuerySet: {
+                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                }
+
                 case Command::WriteTimestamp: {
                     return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
                 }
diff --git a/src/dawn_native/opengl/CommandBufferGL.cpp b/src/dawn_native/opengl/CommandBufferGL.cpp
index 3689277..b06dac4 100644
--- a/src/dawn_native/opengl/CommandBufferGL.cpp
+++ b/src/dawn_native/opengl/CommandBufferGL.cpp
@@ -716,6 +716,12 @@
                     break;
                 }
 
+                case Command::ResolveQuerySet: {
+                    // TODO(hao.x.li@intel.com): Resolve non-precise occlusion query.
+                    SkipCommand(&mCommands, type);
+                    break;
+                }
+
                 case Command::WriteTimestamp: {
                     // WriteTimestamp is not supported on OpenGL
                     UNREACHABLE();
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index 1159a3e..f7fb0a3 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -599,6 +599,10 @@
                     break;
                 }
 
+                case Command::ResolveQuerySet: {
+                    return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
+                }
+
                 case Command::WriteTimestamp: {
                     return DAWN_UNIMPLEMENTED_ERROR("Waiting for implementation.");
                 }
diff --git a/src/tests/unittests/validation/QuerySetValidationTests.cpp b/src/tests/unittests/validation/QuerySetValidationTests.cpp
index 185c047..5bf7bab 100644
--- a/src/tests/unittests/validation/QuerySetValidationTests.cpp
+++ b/src/tests/unittests/validation/QuerySetValidationTests.cpp
@@ -314,3 +314,149 @@
         ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
     }
 }
+
+class ResolveQuerySetValidationTest : public QuerySetValidationTest {
+  protected:
+    wgpu::Buffer CreateBuffer(wgpu::Device cDevice, uint64_t size, wgpu::BufferUsage usage) {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = size;
+        descriptor.usage = usage;
+
+        return cDevice.CreateBuffer(&descriptor);
+    }
+};
+
+// Test resolve query set with invalid query set, first query and query count
+TEST_F(ResolveQuerySetValidationTest, ResolveInvalidQuerySetAndIndexCount) {
+    constexpr uint32_t kQueryCount = 4;
+
+    wgpu::QuerySet querySet = CreateQuerySet(device, wgpu::QueryType::Occlusion, kQueryCount);
+    wgpu::Buffer destination =
+        CreateBuffer(device, kQueryCount * sizeof(uint64_t), wgpu::BufferUsage::QueryResolve);
+
+    // Success
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
+        wgpu::CommandBuffer commands = encoder.Finish();
+
+        wgpu::Queue queue = device.GetDefaultQueue();
+        queue.Submit(1, &commands);
+    }
+
+    // Fail to resolve query set from another device
+    {
+        wgpu::CommandEncoder encoder = deviceWithTimestamp.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set if first query out of range
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, kQueryCount, 0, destination, 0);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set if the sum of first query and query count is larger than queries
+    //  number in the query set
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 1, kQueryCount, destination, 0);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    // Fail to resolve a destroyed query set
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
+        wgpu::CommandBuffer commands = encoder.Finish();
+
+        wgpu::Queue queue = device.GetDefaultQueue();
+        querySet.Destroy();
+        ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+    }
+}
+
+// Test resolve query set with invalid query set, first query and query count
+TEST_F(ResolveQuerySetValidationTest, ResolveToInvalidBufferAndOffset) {
+    constexpr uint32_t kQueryCount = 4;
+    constexpr uint64_t kBufferSize = kQueryCount * sizeof(uint64_t);
+
+    wgpu::QuerySet querySet = CreateQuerySet(device, wgpu::QueryType::Occlusion, kQueryCount);
+    wgpu::Buffer destination = CreateBuffer(device, kBufferSize, wgpu::BufferUsage::QueryResolve);
+
+    // Success
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 1, kQueryCount - 1, destination, 8);
+        wgpu::CommandBuffer commands = encoder.Finish();
+
+        wgpu::Queue queue = device.GetDefaultQueue();
+        queue.Submit(1, &commands);
+    }
+
+    // Fail to resolve query set to a buffer created from another device
+    {
+        wgpu::Buffer bufferOnTimestamp =
+            CreateBuffer(deviceWithTimestamp, kBufferSize, wgpu::BufferUsage::QueryResolve);
+        wgpu::CommandEncoder encoder = deviceWithTimestamp.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, bufferOnTimestamp, 0);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set to a buffer if offset is not a multiple of 8 bytes
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 4);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set to a buffer if the data size overflow the buffer
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 8);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set to a buffer if the offset is past the end of the buffer
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, 1, destination, kBufferSize);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    //  Fail to resolve query set to a buffer does not have the usage of QueryResolve
+    {
+        wgpu::Buffer dstBuffer = CreateBuffer(device, kBufferSize, wgpu::BufferUsage::CopyDst);
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, dstBuffer, 0);
+        ASSERT_DEVICE_ERROR(encoder.Finish());
+    }
+
+    // Fail to resolve query set to a destroyed buffer.
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
+        wgpu::CommandBuffer commands = encoder.Finish();
+
+        wgpu::Queue queue = device.GetDefaultQueue();
+        destination.Destroy();
+        ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+    }
+}
+
+// Check that in 32bit mode the computation of queryCount * sizeof(uint64_t) doesn't overflow (which
+// would skip validation).
+TEST_F(ResolveQuerySetValidationTest, BufferOverflowOn32Bits) {
+    // If compiling for 32-bits mode, the data size calculated by queryCount * sizeof(uint64_t)
+    // is 8, which is less than the buffer size.
+    constexpr uint32_t kQueryCount = std::numeric_limits<uint32_t>::max() / sizeof(uint64_t) + 2;
+
+    wgpu::QuerySet querySet = CreateQuerySet(device, wgpu::QueryType::Occlusion, kQueryCount);
+    wgpu::Buffer destination = CreateBuffer(device, 1024, wgpu::BufferUsage::QueryResolve);
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 0);
+
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+}