Fix internal storage buffer usage

TimestampQueryTests.ResolveTwiceToSameBuffer fails on Intel latest
driver on Windows, because the kInternalStorageBuffer is not treated
in buffer usage when adding resource barrier.

Add missed kInternalStorageBuffer in buffer usage and remove
D3D12_RESOURCE_STATE_UNORDERED_ACCESS from QueryResolve, which will be
added by kInternalStorageBuffer.

Bug: dawn:797
Change-Id: I78607002179ba443b0db09c9c3bbc85fcc97a85b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/56523
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
diff --git a/src/dawn_native/BindGroupLayout.cpp b/src/dawn_native/BindGroupLayout.cpp
index 875d940..ddfc2e9 100644
--- a/src/dawn_native/BindGroupLayout.cpp
+++ b/src/dawn_native/BindGroupLayout.cpp
@@ -62,7 +62,8 @@
     }  // anonymous namespace
 
     MaybeError ValidateBindGroupLayoutDescriptor(DeviceBase* device,
-                                                 const BindGroupLayoutDescriptor* descriptor) {
+                                                 const BindGroupLayoutDescriptor* descriptor,
+                                                 bool allowInternalBinding) {
         if (descriptor->nextInChain != nullptr) {
             return DAWN_VALIDATION_ERROR("nextInChain must be nullptr");
         }
@@ -88,7 +89,11 @@
 
                 // The kInternalStorageBufferBinding is used internally and not a value
                 // in wgpu::BufferBindingType.
-                if (buffer.type != kInternalStorageBufferBinding) {
+                if (buffer.type == kInternalStorageBufferBinding) {
+                    if (!allowInternalBinding) {
+                        return DAWN_VALIDATION_ERROR("Internal binding types are disallowed");
+                    }
+                } else {
                     DAWN_TRY(ValidateBufferBindingType(buffer.type));
                 }
 
diff --git a/src/dawn_native/BindGroupLayout.h b/src/dawn_native/BindGroupLayout.h
index 641bf45..8db6492 100644
--- a/src/dawn_native/BindGroupLayout.h
+++ b/src/dawn_native/BindGroupLayout.h
@@ -33,7 +33,8 @@
 namespace dawn_native {
 
     MaybeError ValidateBindGroupLayoutDescriptor(DeviceBase* device,
-                                                 const BindGroupLayoutDescriptor* descriptor);
+                                                 const BindGroupLayoutDescriptor* descriptor,
+                                                 bool allowInternalBinding = false);
 
     // Bindings are specified as a |BindingNumber| in the BindGroupLayoutDescriptor.
     // These numbers may be arbitrary and sparse. Internally, Dawn packs these numbers
diff --git a/src/dawn_native/Buffer.cpp b/src/dawn_native/Buffer.cpp
index 56c6ea6..7bb1633 100644
--- a/src/dawn_native/Buffer.cpp
+++ b/src/dawn_native/Buffer.cpp
@@ -138,9 +138,12 @@
             mUsage |= kReadOnlyStorageBuffer;
         }
 
-        // The buffer made with QueryResolve usage implicitly get InternalStorage usage which is
-        // only compatible with InternalStorageBuffer binding type in BGL, not StorageBuffer binding
-        // type.
+        // The query resolve buffer need to be used as a storage buffer in the internal compute
+        // pipeline which does timestamp uint conversion for timestamp query, it requires the buffer
+        // has Storage usage in the binding group. Implicitly add an InternalStorage usage which is
+        // only compatible with InternalStorageBuffer binding type in BGL. It shouldn't be
+        // compatible with StorageBuffer binding type and the query resolve buffer cannot be bound
+        // as storage buffer if it's created without Storage usage.
         if (mUsage & wgpu::BufferUsage::QueryResolve) {
             mUsage |= kInternalStorageBuffer;
         }
diff --git a/src/dawn_native/Device.cpp b/src/dawn_native/Device.cpp
index dd01d25..ac9c51b 100644
--- a/src/dawn_native/Device.cpp
+++ b/src/dawn_native/Device.cpp
@@ -1034,10 +1034,11 @@
     }
 
     ResultOrError<Ref<BindGroupLayoutBase>> DeviceBase::CreateBindGroupLayout(
-        const BindGroupLayoutDescriptor* descriptor) {
+        const BindGroupLayoutDescriptor* descriptor,
+        bool allowInternalBinding) {
         DAWN_TRY(ValidateIsAlive());
         if (IsValidationEnabled()) {
-            DAWN_TRY(ValidateBindGroupLayoutDescriptor(this, descriptor));
+            DAWN_TRY(ValidateBindGroupLayoutDescriptor(this, descriptor, allowInternalBinding));
         }
         return GetOrCreateBindGroupLayout(descriptor);
     }
diff --git a/src/dawn_native/Device.h b/src/dawn_native/Device.h
index bd59142..06b9f8e 100644
--- a/src/dawn_native/Device.h
+++ b/src/dawn_native/Device.h
@@ -147,7 +147,8 @@
         // Object creation methods that be used in a reentrant manner.
         ResultOrError<Ref<BindGroupBase>> CreateBindGroup(const BindGroupDescriptor* descriptor);
         ResultOrError<Ref<BindGroupLayoutBase>> CreateBindGroupLayout(
-            const BindGroupLayoutDescriptor* descriptor);
+            const BindGroupLayoutDescriptor* descriptor,
+            bool allowInternalBinding = false);
         ResultOrError<Ref<BufferBase>> CreateBuffer(const BufferDescriptor* descriptor);
         ResultOrError<Ref<ComputePipelineBase>> CreateComputePipeline(
             const ComputePipelineDescriptor* descriptor);
diff --git a/src/dawn_native/QueryHelper.cpp b/src/dawn_native/QueryHelper.cpp
index cc504df..2614269 100644
--- a/src/dawn_native/QueryHelper.cpp
+++ b/src/dawn_native/QueryHelper.cpp
@@ -135,7 +135,7 @@
                 bglDesc.entryCount = static_cast<uint32_t>(entries.size());
                 bglDesc.entries = entries.data();
                 Ref<BindGroupLayoutBase> bgl;
-                DAWN_TRY_ASSIGN(bgl, device->CreateBindGroupLayout(&bglDesc));
+                DAWN_TRY_ASSIGN(bgl, device->CreateBindGroupLayout(&bglDesc, true));
 
                 // Create pipeline layout
                 PipelineLayoutDescriptor plDesc;
diff --git a/src/dawn_native/d3d12/BufferD3D12.cpp b/src/dawn_native/d3d12/BufferD3D12.cpp
index 5f73b7e..779bec0 100644
--- a/src/dawn_native/d3d12/BufferD3D12.cpp
+++ b/src/dawn_native/d3d12/BufferD3D12.cpp
@@ -53,7 +53,7 @@
             if (usage & wgpu::BufferUsage::Index) {
                 resourceState |= D3D12_RESOURCE_STATE_INDEX_BUFFER;
             }
-            if (usage & wgpu::BufferUsage::Storage) {
+            if (usage & (wgpu::BufferUsage::Storage | kInternalStorageBuffer)) {
                 resourceState |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
             }
             if (usage & kReadOnlyStorageBuffer) {
@@ -64,11 +64,7 @@
                 resourceState |= D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
             }
             if (usage & wgpu::BufferUsage::QueryResolve) {
-                // D3D12_RESOURCE_STATE_COPY_DEST is required by ResolveQueryData but we also add
-                // D3D12_RESOURCE_STATE_UNORDERED_ACCESS because the queries will be post-processed
-                // by a compute shader and written to this buffer via a UAV.
-                resourceState |=
-                    (D3D12_RESOURCE_STATE_UNORDERED_ACCESS | D3D12_RESOURCE_STATE_COPY_DEST);
+                resourceState |= D3D12_RESOURCE_STATE_COPY_DEST;
             }
 
             return resourceState;
diff --git a/src/dawn_native/vulkan/BufferVk.cpp b/src/dawn_native/vulkan/BufferVk.cpp
index f0d70b4..2763caf 100644
--- a/src/dawn_native/vulkan/BufferVk.cpp
+++ b/src/dawn_native/vulkan/BufferVk.cpp
@@ -45,17 +45,15 @@
             if (usage & wgpu::BufferUsage::Uniform) {
                 flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
             }
-            if (usage & (wgpu::BufferUsage::Storage | kReadOnlyStorageBuffer)) {
+            if (usage &
+                (wgpu::BufferUsage::Storage | kInternalStorageBuffer | kReadOnlyStorageBuffer)) {
                 flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
             }
             if (usage & wgpu::BufferUsage::Indirect) {
                 flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
             }
             if (usage & wgpu::BufferUsage::QueryResolve) {
-                // VK_BUFFER_USAGE_TRANSFER_DST_BIT is required by vkCmdCopyQueryPoolResults
-                // but we also add VK_BUFFER_USAGE_STORAGE_BUFFER_BIT because the queries will
-                // be post-processed by a compute shader and written to this buffer.
-                flags |= (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+                flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
             }
 
             return flags;
diff --git a/src/tests/BUILD.gn b/src/tests/BUILD.gn
index 99dc365..74f47a4 100644
--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@@ -435,6 +435,7 @@
 
   sources += [
     "white_box/InternalResourceUsageTests.cpp",
+    "white_box/InternalStorageBufferBindingTests.cpp",
     "white_box/QueryInternalShaderTests.cpp",
   ]
 
diff --git a/src/tests/white_box/InternalResourceUsageTests.cpp b/src/tests/white_box/InternalResourceUsageTests.cpp
index 267f5dd..92c28a4 100644
--- a/src/tests/white_box/InternalResourceUsageTests.cpp
+++ b/src/tests/white_box/InternalResourceUsageTests.cpp
@@ -16,18 +16,25 @@
 
 #include "dawn_native/dawn_platform.h"
 
-class InternalResourceUsageTests : public DawnTest {};
+class InternalResourceUsageTests : public DawnTest {
+  protected:
+    wgpu::Buffer CreateBuffer(wgpu::BufferUsage usage) {
+        wgpu::BufferDescriptor descriptor;
+        descriptor.size = 4;
+        descriptor.usage = usage;
+
+        return device.CreateBuffer(&descriptor);
+    }
+};
 
 // Verify it is an error to create a buffer with a buffer usage that should only be used
 // internally.
 TEST_P(InternalResourceUsageTests, InternalBufferUsage) {
     DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("skip_validation"));
 
-    wgpu::BufferDescriptor descriptor;
-    descriptor.size = 4;
-    descriptor.usage = dawn_native::kReadOnlyStorageBuffer;
+    ASSERT_DEVICE_ERROR(CreateBuffer(dawn_native::kReadOnlyStorageBuffer));
 
-    ASSERT_DEVICE_ERROR(device.CreateBuffer(&descriptor));
+    ASSERT_DEVICE_ERROR(CreateBuffer(dawn_native::kInternalStorageBuffer));
 }
 
 // Verify it is an error to create a texture with a texture usage that should only be used
@@ -43,3 +50,23 @@
 }
 
 DAWN_INSTANTIATE_TEST(InternalResourceUsageTests, NullBackend());
+
+class InternalBindingTypeTests : public DawnTest {};
+
+// Verify it is an error to create a bind group layout with a buffer binding type that should only
+// be used internally.
+TEST_P(InternalBindingTypeTests, InternalStorageBufferBindingType) {
+    DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("skip_validation"));
+
+    wgpu::BindGroupLayoutEntry bglEntry;
+    bglEntry.binding = 0;
+    bglEntry.buffer.type = dawn_native::kInternalStorageBufferBinding;
+    bglEntry.visibility = wgpu::ShaderStage::Compute;
+
+    wgpu::BindGroupLayoutDescriptor bglDesc;
+    bglDesc.entryCount = 1;
+    bglDesc.entries = &bglEntry;
+    ASSERT_DEVICE_ERROR(device.CreateBindGroupLayout(&bglDesc));
+}
+
+DAWN_INSTANTIATE_TEST(InternalBindingTypeTests, NullBackend());
diff --git a/src/tests/white_box/InternalStorageBufferBindingTests.cpp b/src/tests/white_box/InternalStorageBufferBindingTests.cpp
new file mode 100644
index 0000000..44c56c9
--- /dev/null
+++ b/src/tests/white_box/InternalStorageBufferBindingTests.cpp
@@ -0,0 +1,113 @@
+// Copyright 2021 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tests/DawnTest.h"
+
+#include "dawn_native/BindGroupLayout.h"
+#include "dawn_native/Device.h"
+#include "dawn_native/dawn_platform.h"
+#include "utils/WGPUHelpers.h"
+
+class InternalStorageBufferBindingTests : public DawnTest {
+  protected:
+    static constexpr uint32_t kNumValues = 4;
+    static constexpr uint32_t kIterations = 4;
+
+    void SetUp() override {
+        DawnTest::SetUp();
+        DAWN_TEST_UNSUPPORTED_IF(UsesWire());
+    }
+
+    wgpu::ComputePipeline CreateComputePipelineWithInternalStorage() {
+        wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
+            [[block]] struct Buf {
+                data : array<u32, 4>;
+            };
+
+            [[group(0), binding(0)]] var<storage, read_write> buf : Buf;
+
+            [[stage(compute), workgroup_size(1)]]
+            fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
+                buf.data[GlobalInvocationID.x] = buf.data[GlobalInvocationID.x] + 0x1234u;
+            }
+        )");
+
+        // Create binding group layout with internal storage buffer binding type
+        dawn_native::BindGroupLayoutEntry bglEntry;
+        bglEntry.binding = 0;
+        bglEntry.buffer.type = dawn_native::kInternalStorageBufferBinding;
+        bglEntry.visibility = wgpu::ShaderStage::Compute;
+
+        dawn_native::BindGroupLayoutDescriptor bglDesc;
+        bglDesc.entryCount = 1;
+        bglDesc.entries = &bglEntry;
+
+        dawn_native::DeviceBase* nativeDevice =
+            reinterpret_cast<dawn_native::DeviceBase*>(device.Get());
+
+        Ref<dawn_native::BindGroupLayoutBase> bglRef =
+            nativeDevice->CreateBindGroupLayout(&bglDesc, true).AcquireSuccess();
+
+        wgpu::BindGroupLayout bgl =
+            wgpu::BindGroupLayout::Acquire(reinterpret_cast<WGPUBindGroupLayout>(bglRef.Detach()));
+
+        // Create pipeline layout
+        wgpu::PipelineLayoutDescriptor plDesc;
+        plDesc.bindGroupLayoutCount = 1;
+        plDesc.bindGroupLayouts = &bgl;
+        wgpu::PipelineLayout layout = device.CreatePipelineLayout(&plDesc);
+
+        wgpu::ComputePipelineDescriptor pipelineDesc = {};
+        pipelineDesc.layout = layout;
+        pipelineDesc.compute.module = module;
+        pipelineDesc.compute.entryPoint = "main";
+
+        return device.CreateComputePipeline(&pipelineDesc);
+    }
+};
+
+// Test that query resolve buffer can be bound as internal storage buffer, multiple dispatches to
+// increment values in the query resolve buffer are synchronized.
+TEST_P(InternalStorageBufferBindingTests, QueryResolveBufferBoundAsInternalStorageBuffer) {
+    std::vector<uint32_t> data(kNumValues, 0);
+    std::vector<uint32_t> expected(kNumValues, 0x1234u * kIterations);
+
+    uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
+    wgpu::Buffer buffer =
+        utils::CreateBufferFromData(device, data.data(), bufferSize,
+                                    wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopySrc);
+
+    wgpu::ComputePipeline pipeline = CreateComputePipelineWithInternalStorage();
+
+    wgpu::BindGroup bindGroup =
+        utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer, 0, bufferSize}});
+
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+    pass.SetPipeline(pipeline);
+    pass.SetBindGroup(0, bindGroup);
+    for (uint32_t i = 0; i < kIterations; ++i) {
+        pass.Dispatch(kNumValues);
+    }
+    pass.EndPass();
+    wgpu::CommandBuffer commands = encoder.Finish();
+    queue.Submit(1, &commands);
+
+    EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), buffer, 0, kNumValues);
+}
+
+DAWN_INSTANTIATE_TEST(InternalStorageBufferBindingTests,
+                      D3D12Backend(),
+                      MetalBackend(),
+                      VulkanBackend());