| // Copyright 2026 The Dawn & Tint Authors |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // 1. Redistributions of source code must retain the above copyright notice, this |
| // list of conditions and the following disclaimer. |
| // |
| // 2. Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // 3. Neither the name of the copyright holder nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifdef UNSAFE_BUFFERS_BUILD |
| // TODO(crbug.com/40285824): Remove this and convert code to safer constructs. |
| #pragma allow_unsafe_buffers |
| #endif |
| |
| #include <algorithm> |
| #include <ostream> |
| #include <vector> |
| |
| #include "dawn/tests/perf_tests/DawnPerfTest.h" |
| #include "dawn/utils/ComboRenderPipelineDescriptor.h" |
| #include "dawn/utils/WGPUHelpers.h" |
| |
| namespace dawn { |
| namespace { |
| |
| constexpr unsigned int kNumIterations = 20; |
| |
| enum class LoadStoreOp { |
| ClearAndDiscard, // LoadOp=Clear + StoreOp=Discard (MSAA attachments only) |
| ClearAndStore, // LoadOp=Clear + StoreOp=Store |
| LoadAndStore, // LoadOp=Load + StoreOp=Store |
| }; |
| |
| std::ostream& operator<<(std::ostream& o, LoadStoreOp op) { |
| switch (op) { |
| case LoadStoreOp::ClearAndDiscard: |
| return o << "ClearAndDiscard"; |
| case LoadStoreOp::ClearAndStore: |
| return o << "ClearAndStore"; |
| case LoadStoreOp::LoadAndStore: |
| return o << "LoadAndStore"; |
| } |
| } |
| |
| DAWN_TEST_PARAM_STRUCT(LoadStoreOpParams, LoadStoreOp); |
| |
| // Perf test comparing load+store configurations on MSAA attachments: |
| // - LoadOp=Clear + StoreOp=Discard |
| // - LoadOp=Clear + StoreOp=Store |
| // - LoadOp=Load + StoreOp=Store |
| // Each step renders multiple passes across several large (4096x4096) MSAA + resolve texture pairs |
| // to measure how the choice of load/store ops affects GPU throughput in a realistic multi-pass |
| // scenario. We use multiple textures (kNumTextures) to avoid the driver potentially caching results |
| // and effectively skipping work when the same operation is performed repeatedly. |
| class LoadStoreOpPerfTest : public DawnPerfTestWithParams<LoadStoreOpParams> { |
| public: |
| LoadStoreOpPerfTest() : DawnPerfTestWithParams<LoadStoreOpParams>(kNumIterations, 1) {} |
| ~LoadStoreOpPerfTest() override = default; |
| |
| private: |
| void SetUpPerfTest() override; |
| void Step() override; |
| |
| wgpu::Texture CreateTexture(wgpu::TextureFormat format, uint32_t sampleCount) { |
| wgpu::TextureDescriptor descriptor; |
| descriptor.dimension = wgpu::TextureDimension::e2D; |
| descriptor.size.width = kWidth; |
| descriptor.size.height = kHeight; |
| descriptor.size.depthOrArrayLayers = 1; |
| descriptor.sampleCount = sampleCount; |
| descriptor.format = format; |
| descriptor.mipLevelCount = 1; |
| |
| descriptor.usage = |
| wgpu::TextureUsage::RenderAttachment | wgpu::TextureUsage::TextureBinding; |
| |
| return device.CreateTexture(&descriptor); |
| } |
| |
| static constexpr uint32_t kWidth = 4096; |
| static constexpr uint32_t kHeight = 4096; |
| static constexpr uint32_t kNumTextures = 5; |
| |
| wgpu::Texture msaaTexture[kNumTextures]; |
| wgpu::TextureView msaaTextureView[kNumTextures]; |
| wgpu::Texture resolveTexture[kNumTextures]; |
| wgpu::TextureView resolveTextureView[kNumTextures]; |
| |
| wgpu::Texture srcTexture1[kNumTextures]; |
| wgpu::Texture srcTexture2[kNumTextures]; |
| wgpu::TextureView srcTextureView1[kNumTextures]; |
| wgpu::TextureView srcTextureView2[kNumTextures]; |
| |
| wgpu::RenderPipeline msaaPipeline; |
| wgpu::RenderPipeline singleSampledPipeline; |
| wgpu::BindGroup blitBindGroup1[kNumTextures]; |
| wgpu::BindGroup blitBindGroup2[kNumTextures]; |
| }; |
| |
| void LoadStoreOpPerfTest::SetUpPerfTest() { |
| wgpu::BindGroupLayout bgl = utils::MakeBindGroupLayout( |
| device, { |
| {0, wgpu::ShaderStage::Fragment, wgpu::TextureSampleType::Float}, |
| }); |
| |
| wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bgl}); |
| |
| const char* vs = R"( |
| @vertex |
| fn main(@builtin(vertex_index) VertexIndex : u32) -> @builtin(position) vec4f { |
| const pos = array( |
| vec2f(-1.0, -1.0), |
| vec2f( 3.0, -1.0), |
| vec2f(-1.0, 3.0)); |
| return vec4f(pos[VertexIndex], 0.0, 1.0); |
| })"; |
| constexpr char fs[] = R"( |
| @group(0) @binding(0) var colorMap: texture_2d<f32>; |
| @fragment |
| fn main(@builtin(position) fragPosition : vec4<f32>) -> @location(0) vec4<f32> { |
| let coords = vec2<i32>(i32(fragPosition.x), i32(fragPosition.y)); |
| return textureLoad(colorMap, coords, 0); |
| })"; |
| utils::ComboRenderPipelineDescriptor pipelineDescriptor; |
| |
| pipelineDescriptor.vertex.module = utils::CreateShaderModule(device, vs); |
| pipelineDescriptor.cFragment.module = utils::CreateShaderModule(device, fs); |
| |
| pipelineDescriptor.cFragment.targetCount = 1; |
| pipelineDescriptor.cTargets[0].writeMask = wgpu::ColorWriteMask::All; |
| pipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm; |
| |
| pipelineDescriptor.layout = pipelineLayout; |
| |
| pipelineDescriptor.multisample.mask = 0xFFFFFFFF; |
| pipelineDescriptor.multisample.count = 4; |
| msaaPipeline = device.CreateRenderPipeline(&pipelineDescriptor); |
| |
| pipelineDescriptor.multisample.count = 1; |
| singleSampledPipeline = device.CreateRenderPipeline(&pipelineDescriptor); |
| |
| for (uint32_t i = 0; i < kNumTextures; ++i) { |
| msaaTexture[i] = CreateTexture(wgpu::TextureFormat::RGBA8Unorm, 4); |
| resolveTexture[i] = CreateTexture(wgpu::TextureFormat::RGBA8Unorm, 1); |
| srcTexture1[i] = CreateTexture(wgpu::TextureFormat::RGBA8Unorm, 1); |
| srcTexture2[i] = CreateTexture(wgpu::TextureFormat::RGBA8Unorm, 1); |
| |
| msaaTextureView[i] = msaaTexture[i].CreateView(); |
| resolveTextureView[i] = resolveTexture[i].CreateView(); |
| srcTextureView1[i] = srcTexture1[i].CreateView(); |
| srcTextureView2[i] = srcTexture2[i].CreateView(); |
| |
| blitBindGroup1[i] = utils::MakeBindGroup(device, bgl, {{0, srcTextureView1[i]}}); |
| blitBindGroup2[i] = utils::MakeBindGroup(device, bgl, {{0, srcTextureView2[i]}}); |
| |
| // Clear the textures |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| float colorScale = std::max(0.1f, i / float(kNumTextures)); |
| { |
| utils::ComboRenderPassDescriptor renderPass({msaaTextureView[i]}); |
| renderPass.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; |
| renderPass.cColorAttachments[0].clearValue = {0.0f, colorScale, 0.0f, 0.0f}; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.End(); |
| } |
| { |
| utils::ComboRenderPassDescriptor renderPass({srcTextureView1[i]}); |
| renderPass.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; |
| renderPass.cColorAttachments[0].clearValue = {colorScale, 0.0f, colorScale, 0.0f}; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.End(); |
| } |
| { |
| utils::ComboRenderPassDescriptor renderPass({srcTextureView2[i]}); |
| renderPass.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; |
| renderPass.cColorAttachments[0].clearValue = {0.0f, colorScale, colorScale, 0.0f}; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.End(); |
| } |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| } |
| } |
| |
| void LoadStoreOpPerfTest::Step() { |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| |
| if (SupportsTimestampQuery()) { |
| RecordBeginTimestamp(encoder); |
| } |
| |
| for (unsigned int iteration = 0; iteration < kNumIterations; ++iteration) { |
| for (uint32_t i = 0; i < kNumTextures; ++i) { |
| const bool isClear = GetParam().mLoadStoreOp != LoadStoreOp::LoadAndStore; |
| const bool isDiscard = GetParam().mLoadStoreOp == LoadStoreOp::ClearAndDiscard; |
| |
| // We perform two passes to mimic typical Skia multi-pass use cases where a resolve |
| // texture is used both as a resolve target in an MSAA pass and directly as a |
| // render attachment in a single-sampled pass. This structure also prevents the |
| // driver from merging multiple identical render passes into one. |
| |
| // 1st pass: blit the src texture 1 to the MSAA texture. |
| { |
| utils::ComboRenderPassDescriptor renderPass({msaaTextureView[i]}); |
| renderPass.cColorAttachments[0].resolveTarget = resolveTextureView[i]; |
| renderPass.cColorAttachments[0].loadOp = |
| isClear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load; |
| renderPass.cColorAttachments[0].storeOp = |
| isDiscard ? wgpu::StoreOp::Discard : wgpu::StoreOp::Store; |
| renderPass.cColorAttachments[0].clearValue = {0.0f, 0.0f, 0.0f, 0.0f}; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.SetBindGroup(0, blitBindGroup1[i]); |
| renderPassEncoder.SetPipeline(msaaPipeline); |
| renderPassEncoder.Draw(3); |
| renderPassEncoder.End(); |
| } |
| // 2nd pass: blit the src texture 2 to the resolve texture. |
| // This is a non-MSAA attachment so we always use StoreOp=Store — discarding it |
| // would throw away the resolve output that subsequent passes depend on. |
| { |
| utils::ComboRenderPassDescriptor renderPass({resolveTextureView[i]}); |
| renderPass.cColorAttachments[0].loadOp = |
| isClear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load; |
| renderPass.cColorAttachments[0].clearValue = {0.0f, 0.0f, 0.0f, 0.0f}; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.SetBindGroup(0, blitBindGroup2[i]); |
| renderPassEncoder.SetPipeline(singleSampledPipeline); |
| renderPassEncoder.Draw(3); |
| renderPassEncoder.End(); |
| } |
| } |
| } |
| |
| if (SupportsTimestampQuery()) { |
| RecordEndTimestampAndResolveQuerySet(encoder); |
| } |
| |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| if (SupportsTimestampQuery()) { |
| ComputeGPUElapsedTime(); |
| } |
| } |
| |
| TEST_P(LoadStoreOpPerfTest, Run) { |
| RunTest(); |
| } |
| |
| DAWN_INSTANTIATE_TEST_P(LoadStoreOpPerfTest, |
| {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(), |
| VulkanBackend()}, |
| {LoadStoreOp::ClearAndDiscard, LoadStoreOp::ClearAndStore, |
| LoadStoreOp::LoadAndStore}); |
| |
| using StoreOp = wgpu::StoreOp; |
| DAWN_TEST_PARAM_STRUCT(StoreOpParams, StoreOp); |
| |
| // Measures the cost of rendering with a depth attachment using loadOp=Clear and configurable |
| // storeOp (Discard vs Store). |
| class StoreOpDepthPerfTest : public DawnPerfTestWithParams<StoreOpParams> { |
| public: |
| StoreOpDepthPerfTest() : DawnPerfTestWithParams(kNumIterations, 1) {} |
| ~StoreOpDepthPerfTest() override = default; |
| |
| private: |
| void SetUpPerfTest() override; |
| void Step() override; |
| |
| wgpu::Texture CreateColorTexture() { |
| wgpu::TextureDescriptor descriptor; |
| descriptor.dimension = wgpu::TextureDimension::e2D; |
| descriptor.size.width = kWidth; |
| descriptor.size.height = kHeight; |
| descriptor.size.depthOrArrayLayers = 1; |
| descriptor.sampleCount = 1; |
| descriptor.format = wgpu::TextureFormat::RGBA8Unorm; |
| descriptor.mipLevelCount = 1; |
| descriptor.usage = wgpu::TextureUsage::RenderAttachment; |
| return device.CreateTexture(&descriptor); |
| } |
| |
| wgpu::Texture CreateDepthTexture() { |
| wgpu::TextureDescriptor descriptor; |
| descriptor.dimension = wgpu::TextureDimension::e2D; |
| descriptor.size.width = kWidth; |
| descriptor.size.height = kHeight; |
| descriptor.size.depthOrArrayLayers = 1; |
| descriptor.sampleCount = 1; |
| descriptor.format = wgpu::TextureFormat::Depth24Plus; |
| descriptor.mipLevelCount = 1; |
| descriptor.usage = wgpu::TextureUsage::RenderAttachment; |
| return device.CreateTexture(&descriptor); |
| } |
| |
| static constexpr uint32_t kWidth = 4096; |
| static constexpr uint32_t kHeight = 4096; |
| static constexpr uint32_t kNumTextures = 5; |
| |
| wgpu::Texture colorTexture[kNumTextures]; |
| wgpu::TextureView colorTextureView[kNumTextures]; |
| wgpu::Texture depthTexture[kNumTextures]; |
| wgpu::TextureView depthTextureView[kNumTextures]; |
| |
| wgpu::RenderPipeline pipeline; |
| }; |
| |
| void StoreOpDepthPerfTest::SetUpPerfTest() { |
| const char* vs = R"( |
| @vertex |
| fn main(@builtin(vertex_index) VertexIndex : u32) -> @builtin(position) vec4f { |
| const pos = array( |
| vec2f(-1.0, -1.0), |
| vec2f( 3.0, -1.0), |
| vec2f(-1.0, 3.0)); |
| return vec4f(pos[VertexIndex], 0.0, 1.0); |
| })"; |
| const char* fs = R"( |
| @fragment |
| fn main() -> @location(0) vec4f { |
| return vec4f(0.0, 1.0, 0.0, 1.0); |
| })"; |
| |
| utils::ComboRenderPipelineDescriptor pipelineDescriptor; |
| pipelineDescriptor.vertex.module = utils::CreateShaderModule(device, vs); |
| pipelineDescriptor.cFragment.module = utils::CreateShaderModule(device, fs); |
| pipelineDescriptor.cFragment.targetCount = 1; |
| pipelineDescriptor.cTargets[0].writeMask = wgpu::ColorWriteMask::All; |
| pipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm; |
| pipelineDescriptor.EnableDepthStencil(wgpu::TextureFormat::Depth24Plus); |
| pipelineDescriptor.cDepthStencil.depthWriteEnabled = wgpu::OptionalBool::True; |
| pipelineDescriptor.cDepthStencil.depthCompare = wgpu::CompareFunction::Less; |
| pipeline = device.CreateRenderPipeline(&pipelineDescriptor); |
| |
| for (uint32_t i = 0; i < kNumTextures; ++i) { |
| colorTexture[i] = CreateColorTexture(); |
| depthTexture[i] = CreateDepthTexture(); |
| colorTextureView[i] = colorTexture[i].CreateView(); |
| depthTextureView[i] = depthTexture[i].CreateView(); |
| } |
| } |
| |
| void StoreOpDepthPerfTest::Step() { |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| |
| if (SupportsTimestampQuery()) { |
| RecordBeginTimestamp(encoder); |
| } |
| |
| for (unsigned int iteration = 0; iteration < kNumIterations; ++iteration) { |
| for (uint32_t i = 0; i < kNumTextures; ++i) { |
| utils::ComboRenderPassDescriptor renderPass({colorTextureView[i]}, depthTextureView[i]); |
| renderPass.UnsetDepthStencilLoadStoreOpsForFormat(wgpu::TextureFormat::Depth24Plus); |
| |
| // Color attachment |
| renderPass.cColorAttachments[0].loadOp = wgpu::LoadOp::Clear; |
| renderPass.cColorAttachments[0].storeOp = wgpu::StoreOp::Store; |
| renderPass.cColorAttachments[0].clearValue = {0.0f, 0.0f, 0.0f, 0.0f}; |
| |
| // Depth attachment: loadOp=Clear, storeOp=configurable |
| renderPass.cDepthStencilAttachmentInfo.depthLoadOp = wgpu::LoadOp::Clear; |
| renderPass.cDepthStencilAttachmentInfo.depthStoreOp = GetParam().mStoreOp; |
| renderPass.cDepthStencilAttachmentInfo.depthClearValue = 1.0f; |
| |
| wgpu::RenderPassEncoder renderPassEncoder = encoder.BeginRenderPass(&renderPass); |
| renderPassEncoder.SetPipeline(pipeline); |
| renderPassEncoder.Draw(3); |
| renderPassEncoder.End(); |
| } |
| } |
| |
| if (SupportsTimestampQuery()) { |
| RecordEndTimestampAndResolveQuerySet(encoder); |
| } |
| |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| if (SupportsTimestampQuery()) { |
| ComputeGPUElapsedTime(); |
| } |
| } |
| |
| TEST_P(StoreOpDepthPerfTest, Run) { |
| RunTest(); |
| } |
| |
| DAWN_INSTANTIATE_TEST_P(StoreOpDepthPerfTest, |
| {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(), |
| VulkanBackend()}, |
| {wgpu::StoreOp::Discard, wgpu::StoreOp::Store}); |
| |
| } // anonymous namespace |
| } // namespace dawn |