Add UniformBufferUpdatePerf.cpp

This perf test draws triangles with the color values stored in a uniform
buffer, measuring various ways of updating the uniform buffer, such as
partial or full size updating, and WriteBuffer or copying from a staging
buffer.

Bug: chromium:1485789
Change-Id: Ia1726c2f2a7e3a0cd28b5ccf18d6ed791a4c0171
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/172740
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
diff --git a/src/dawn/tests/BUILD.gn b/src/dawn/tests/BUILD.gn
index b67165d..05fe619 100644
--- a/src/dawn/tests/BUILD.gn
+++ b/src/dawn/tests/BUILD.gn
@@ -881,6 +881,7 @@
     "perf_tests/DrawCallPerf.cpp",
     "perf_tests/ShaderRobustnessPerf.cpp",
     "perf_tests/SubresourceTrackingPerf.cpp",
+    "perf_tests/UniformBufferUpdatePerf.cpp",
     "perf_tests/VulkanZeroInitializeWorkgroupMemoryPerf.cpp",
   ]
 
diff --git a/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp b/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp
new file mode 100644
index 0000000..b7663b0
--- /dev/null
+++ b/src/dawn/tests/perf_tests/UniformBufferUpdatePerf.cpp
@@ -0,0 +1,351 @@
+// Copyright 2024 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <queue>
+#include <vector>
+
+#include "dawn/common/MutexProtected.h"
+#include "dawn/tests/perf_tests/DawnPerfTest.h"
+#include "dawn/utils/ComboRenderPipelineDescriptor.h"
+#include "dawn/utils/WGPUHelpers.h"
+
+// This is for developers only to ensure the triangle color drawn is as expected.
+// #define PIXEL_CHECK 1
+
+namespace dawn {
+namespace {
+
+constexpr unsigned int kNumIterations = 100;
+
+constexpr uint32_t kTextureSize = 128;
+constexpr size_t kUniformDataSize = 4 * sizeof(float);
+constexpr size_t kUniformBufferSize = 256;
+
+constexpr float kVertexData[12] = {
+    0.0f, 0.5f, 0.0f, 1.0f, -0.5f, -0.5f, 0.0f, 1.0f, 0.5f, -0.5f, 0.0f, 1.0f,
+};
+
+constexpr char kVertexShader[] = R"(
+        @vertex fn main(
+            @location(0) pos : vec4f
+        ) -> @builtin(position) vec4f {
+            return pos;
+        })";
+
+constexpr char kFragmentShader[] = R"(
+        @group(0) @binding(0) var<uniform> color : vec3f;
+        @fragment fn main() -> @location(0) vec4f {
+            return vec4f(color * (1.0 / %d), 1.0);
+        })";
+
+enum class UploadMethod {
+    WriteBuffer,
+    StagingBuffer,
+};
+
+enum class UploadSize {
+    Partial,
+    Full,
+};
+
+enum class UniformBuffer {
+    Single,    // Use one same uniform buffer for all draws.
+    Multiple,  // Switch uniform buffers between draws.
+};
+
+struct UniformBufferUpdateParams : AdapterTestParam {
+    UniformBufferUpdateParams(const AdapterTestParam& param,
+                              UploadMethod uploadMethod,
+                              UploadSize uploadSize,
+                              UniformBuffer uniformBuffer)
+        : AdapterTestParam(param),
+          uploadMethod(uploadMethod),
+          uploadSize(uploadSize),
+          uniformBuffer(uniformBuffer) {}
+
+    UploadMethod uploadMethod;
+    UploadSize uploadSize;
+    UniformBuffer uniformBuffer;
+};
+
+std::ostream& operator<<(std::ostream& ostream, const UniformBufferUpdateParams& param) {
+    ostream << static_cast<const AdapterTestParam&>(param);
+
+    switch (param.uploadMethod) {
+        case UploadMethod::WriteBuffer:
+            ostream << "_WriteBuffer";
+            break;
+        case UploadMethod::StagingBuffer:
+            ostream << "_StagingBuffer";
+            break;
+    }
+
+    switch (param.uploadSize) {
+        case UploadSize::Partial:
+            ostream << "_Partial";
+            break;
+        case UploadSize::Full:
+            ostream << "_Full";
+            break;
+    }
+
+    switch (param.uniformBuffer) {
+        case UniformBuffer::Single:
+            ostream << "_SingleUniformBuffer";
+            break;
+        case UniformBuffer::Multiple:
+            ostream << "_MultipleUniformBuffer";
+            break;
+    }
+
+    return ostream;
+}
+
+// Test updating a uniform buffer |kNumIterations| times.
+class UniformBufferUpdatePerf : public DawnPerfTestWithParams<UniformBufferUpdateParams> {
+  public:
+    UniformBufferUpdatePerf() : DawnPerfTestWithParams(kNumIterations, 1) {}
+    ~UniformBufferUpdatePerf() override = default;
+
+    void SetUp() override;
+
+  private:
+    // Data needed for buffer returning.
+    struct CallbackData {
+        UniformBufferUpdatePerf* self;
+        wgpu::Buffer buffer;
+    };
+    void Step() override;
+
+    size_t GetBufferSize();
+    wgpu::Buffer FindOrCreateUniformBuffer();
+    void ReturnUniformBuffer(wgpu::Buffer buffer);
+    wgpu::Buffer FindOrCreateStagingBuffer();
+    void ReturnStagingBuffer(wgpu::Buffer buffer);
+
+    wgpu::Texture mColorAttachmentTexture;
+    wgpu::TextureView mColorAttachmentTextureView;
+    wgpu::TextureView mDepthStencilAttachment;
+    wgpu::Buffer mVertexBuffer;
+    wgpu::BindGroupLayout mUniformBindGroupLayout;
+    wgpu::RenderPipeline mPipeline;
+
+    // Free uniform buffers to be re-used.
+    MutexProtected<std::queue<wgpu::Buffer>> mUniformBuffers;
+    // Free staging buffers to be re-used. All buffers are mapped already.
+    MutexProtected<std::queue<wgpu::Buffer>> mStagingBuffers;
+};
+
+size_t UniformBufferUpdatePerf::GetBufferSize() {
+    // The actual data size, and buffer create size should be same for full upload size.
+    return GetParam().uploadSize == UploadSize::Full ? kUniformDataSize : kUniformBufferSize;
+}
+
+// Try to grab a free uniform buffer. If unavailable, create a new one on-the-fly.
+wgpu::Buffer UniformBufferUpdatePerf::FindOrCreateUniformBuffer() {
+    if (!mUniformBuffers->empty()) {
+        wgpu::Buffer buffer = mUniformBuffers->front();
+        mUniformBuffers->pop();
+        return buffer;
+    }
+    wgpu::BufferDescriptor descriptor;
+    descriptor.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+    descriptor.size = GetBufferSize();
+    return device.CreateBuffer(&descriptor);
+}
+
+// Return a uniform buffer, so that it's free to be re-used.
+void UniformBufferUpdatePerf::ReturnUniformBuffer(wgpu::Buffer buffer) {
+    mUniformBuffers->push(buffer);
+}
+
+// Try to grab a free staging buffer. If unavailable, create a new one on-the-fly.
+wgpu::Buffer UniformBufferUpdatePerf::FindOrCreateStagingBuffer() {
+    if (!mStagingBuffers->empty()) {
+        wgpu::Buffer buffer = mStagingBuffers->front();
+        mStagingBuffers->pop();
+        return buffer;
+    }
+    wgpu::BufferDescriptor descriptor;
+    descriptor.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc;
+    descriptor.size = GetBufferSize();
+    descriptor.mappedAtCreation = true;
+    return device.CreateBuffer(&descriptor);
+}
+
+// Return a staging buffer, so that it's free to be re-used.
+void UniformBufferUpdatePerf::ReturnStagingBuffer(wgpu::Buffer buffer) {
+    mStagingBuffers->push(buffer);
+}
+
+void UniformBufferUpdatePerf::SetUp() {
+    DawnPerfTestWithParams<UniformBufferUpdateParams>::SetUp();
+
+    // Create the color / depth stencil attachments.
+    wgpu::TextureDescriptor descriptor = {};
+    descriptor.dimension = wgpu::TextureDimension::e2D;
+    descriptor.size.width = kTextureSize;
+    descriptor.size.height = kTextureSize;
+    descriptor.size.depthOrArrayLayers = 1;
+    descriptor.usage = wgpu::TextureUsage::RenderAttachment;
+#ifdef PIXEL_CHECK
+    descriptor.usage |= wgpu::TextureUsage::CopySrc;
+#endif
+    descriptor.format = wgpu::TextureFormat::RGBA8Unorm;
+    mColorAttachmentTexture = device.CreateTexture(&descriptor);
+    mColorAttachmentTextureView = mColorAttachmentTexture.CreateView();
+
+    descriptor.format = wgpu::TextureFormat::Depth24PlusStencil8;
+    mDepthStencilAttachment = device.CreateTexture(&descriptor).CreateView();
+
+    // Create the vertex buffer
+    mVertexBuffer = utils::CreateBufferFromData(device, kVertexData, sizeof(kVertexData),
+                                                wgpu::BufferUsage::Vertex);
+
+    // Create the bind group layout.
+    mUniformBindGroupLayout = utils::MakeBindGroupLayout(
+        device, {
+                    {0, wgpu::ShaderStage::Fragment, wgpu::BufferBindingType::Uniform, false},
+                });
+
+    // Setup the base render pipeline descriptor.
+    utils::ComboRenderPipelineDescriptor renderPipelineDesc;
+    renderPipelineDesc.vertex.bufferCount = 1;
+    renderPipelineDesc.cBuffers[0].arrayStride = 4 * sizeof(float);
+    renderPipelineDesc.cBuffers[0].attributeCount = 1;
+    renderPipelineDesc.cAttributes[0].format = wgpu::VertexFormat::Float32x4;
+    renderPipelineDesc.EnableDepthStencil(wgpu::TextureFormat::Depth24PlusStencil8);
+    renderPipelineDesc.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
+
+    // Create the pipeline layout for the pipeline.
+    wgpu::PipelineLayoutDescriptor pipelineLayoutDesc = {};
+    pipelineLayoutDesc.bindGroupLayouts = &mUniformBindGroupLayout;
+    pipelineLayoutDesc.bindGroupLayoutCount = 1;
+    wgpu::PipelineLayout pipelineLayout = device.CreatePipelineLayout(&pipelineLayoutDesc);
+
+    // Create the shaders for the pipeline.
+    wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, kVertexShader);
+    // Inject kNumIterations into the fragment shader.
+    char fragmentShader[sizeof(kFragmentShader) + 16];
+    snprintf(fragmentShader, sizeof(fragmentShader), kFragmentShader, kNumIterations);
+    wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, fragmentShader);
+
+    // Create the pipeline.
+    renderPipelineDesc.layout = pipelineLayout;
+    renderPipelineDesc.vertex.module = vsModule;
+    renderPipelineDesc.cFragment.module = fsModule;
+    mPipeline = device.CreateRenderPipeline(&renderPipelineDesc);
+}
+
+void UniformBufferUpdatePerf::Step() {
+    for (unsigned int i = 0; i < kNumIterations; ++i) {
+        std::vector<float> data(kUniformDataSize, 1.0f * i);
+        wgpu::CommandEncoder commands = device.CreateCommandEncoder();
+        wgpu::Buffer uniformBuffer = FindOrCreateUniformBuffer();
+        wgpu::Buffer stagingBuffer = nullptr;
+        switch (GetParam().uploadMethod) {
+            case UploadMethod::WriteBuffer:
+                queue.WriteBuffer(uniformBuffer, 0, data.data(), data.size());
+                break;
+            case UploadMethod::StagingBuffer:
+                stagingBuffer = FindOrCreateStagingBuffer();
+                memcpy(stagingBuffer.GetMappedRange(0, data.size()), data.data(), data.size());
+                stagingBuffer.Unmap();
+                commands.CopyBufferToBuffer(stagingBuffer, 0, uniformBuffer, 0, data.size());
+                break;
+        }
+        utils::ComboRenderPassDescriptor renderPass({mColorAttachmentTextureView},
+                                                    mDepthStencilAttachment);
+        wgpu::RenderPassEncoder pass = commands.BeginRenderPass(&renderPass);
+        pass.SetPipeline(mPipeline);
+        pass.SetVertexBuffer(0, mVertexBuffer);
+        wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, mUniformBindGroupLayout,
+                                                         {{0, uniformBuffer, 0, GetBufferSize()}});
+        pass.SetBindGroup(0, bindGroup);
+        pass.Draw(3);
+        pass.End();
+        wgpu::CommandBuffer commandBuffer = commands.Finish();
+        queue.Submit(1, &commandBuffer);
+
+        // Return the staging buffer once it's done with the last usage and re-mapped.
+        if (GetParam().uploadMethod == UploadMethod::StagingBuffer) {
+            CallbackData* callbackData = new CallbackData({this, stagingBuffer});
+            stagingBuffer.MapAsync(
+                wgpu::MapMode::Write, 0, GetBufferSize(),
+                [](WGPUBufferMapAsyncStatus status, void* userdata) {
+                    CallbackData* data = static_cast<CallbackData*>(userdata);
+                    if (status == WGPUBufferMapAsyncStatus::WGPUBufferMapAsyncStatus_Success) {
+                        data->self->ReturnStagingBuffer(data->buffer);
+                    }
+                    delete data;
+                },
+                callbackData);
+        }
+
+        switch (GetParam().uniformBuffer) {
+            case UniformBuffer::Single:
+                // Return the uniform buffer immediately so that we always use the same one.
+                ReturnUniformBuffer(uniformBuffer);
+                break;
+            case UniformBuffer::Multiple:
+                // Return the uniform buffer once it's done with the last submit.
+                CallbackData* callbackData = new CallbackData({this, uniformBuffer});
+                queue.OnSubmittedWorkDone(
+                    [](WGPUQueueWorkDoneStatus status, void* userdata) {
+                        CallbackData* data = static_cast<CallbackData*>(userdata);
+                        if (status == WGPUQueueWorkDoneStatus::WGPUQueueWorkDoneStatus_Success) {
+                            data->self->ReturnUniformBuffer(data->buffer);
+                        }
+                        delete data;
+                    },
+                    callbackData);
+                break;
+        }
+
+#ifdef PIXEL_CHECK
+        uint8_t u8 = std::floor(i * 255.0 / kNumIterations);
+        utils::RGBA8 color0(u8, u8, u8, 255);
+        utils::RGBA8 color1(u8 + 1, u8 + 1, u8 + 1, 255);
+        EXPECT_PIXEL_RGBA8_BETWEEN(color0, color1, mColorAttachmentTexture, kTextureSize / 2,
+                                   kTextureSize / 2);
+#endif
+    }
+}
+
+TEST_P(UniformBufferUpdatePerf, Run) {
+    RunTest();
+}
+
+DAWN_INSTANTIATE_TEST_P(UniformBufferUpdatePerf,
+                        {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
+                         OpenGLESBackend(), VulkanBackend()},
+                        {UploadMethod::WriteBuffer, UploadMethod::StagingBuffer},
+                        {UploadSize::Partial, UploadSize::Full},
+                        {UniformBuffer::Single, UniformBuffer::Multiple});
+
+}  // anonymous namespace
+}  // namespace dawn