src/dawn_native/QueryHelper.cpp - dawn - Git at Google

 // Copyright 2020 The Dawn Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "dawn_native/QueryHelper.h"

 #include "dawn_native/BindGroup.h"
 #include "dawn_native/BindGroupLayout.h"
 #include "dawn_native/Buffer.h"
 #include "dawn_native/CommandEncoder.h"
 #include "dawn_native/ComputePassEncoder.h"
 #include "dawn_native/ComputePipeline.h"
 #include "dawn_native/Device.h"
 #include "dawn_native/InternalPipelineStore.h"
 #include "dawn_native/utils/WGPUHelpers.h"

 #include <cmath>

 namespace dawn::native {

     namespace {

         // Assert the offsets in dawn::native::TimestampParams are same with the ones in the shader
         static_assert(offsetof(dawn::native::TimestampParams, first) == 0);
         static_assert(offsetof(dawn::native::TimestampParams, count) == 4);
         static_assert(offsetof(dawn::native::TimestampParams, offset) == 8);
         static_assert(offsetof(dawn::native::TimestampParams, multiplier) == 12);
         static_assert(offsetof(dawn::native::TimestampParams, rightShift) == 16);

         static const char sConvertTimestampsToNanoseconds[] = R"(
             struct Timestamp {
                 low  : u32;
                 high : u32;
             };

             struct TimestampArr {
                 t : array<Timestamp>;
             };

             struct AvailabilityArr {
                 v : array<u32>;
             };

             struct TimestampParams {
                 first  : u32;
                 count  : u32;
                 offset : u32;
                 multiplier : u32;
                 right_shift  : u32;
             };

             @group(0) @binding(0) var<storage, read_write> timestamps : TimestampArr;
             @group(0) @binding(1) var<storage, read> availability : AvailabilityArr;
             @group(0) @binding(2) var<uniform> params : TimestampParams;

             let sizeofTimestamp : u32 = 8u;

             @stage(compute) @workgroup_size(8, 1, 1)
             fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3<u32>) {
                 if (GlobalInvocationID.x >= params.count) { return; }

                 var index = GlobalInvocationID.x + params.offset / sizeofTimestamp;

                 // Return 0 for the unavailable value.
                 if (availability.v[GlobalInvocationID.x + params.first] == 0u) {
                     timestamps.t[index].low = 0u;
                     timestamps.t[index].high = 0u;
                     return;
                 }

                 var timestamp = timestamps.t[index];

                 // TODO(dawn:1250): Consider using the umulExtended and uaddCarry intrinsics once
                 // available.
                 var chunks : array<u32, 5>;
                 chunks[0] = timestamp.low & 0xFFFFu;
                 chunks[1] = timestamp.low >> 16u;
                 chunks[2] = timestamp.high & 0xFFFFu;
                 chunks[3] = timestamp.high >> 16u;
                 chunks[4] = 0u;

                 // Multiply all the chunks with the integer period.
                 for (var i = 0u; i < 4u; i = i + 1u) {
                     chunks[i] = chunks[i] * params.multiplier;
                 }

                 // Propagate the carry
                 var carry = 0u;
                 for (var i = 0u; i < 4u; i = i + 1u) {
                     var chunk_with_carry = chunks[i] + carry;
                     carry = chunk_with_carry >> 16u;
                     chunks[i] = chunk_with_carry & 0xFFFFu;
                 }
                 chunks[4] = carry;

                 // Apply the right shift.
                 for (var i = 0u; i < 4u; i = i + 1u) {
                     var low = chunks[i] >> params.right_shift;
                     var high = (chunks[i + 1u] << (16u - params.right_shift)) & 0xFFFFu;
                     chunks[i] = low | high;
                 }

                 timestamps.t[index].low = chunks[0] | (chunks[1] << 16u);
                 timestamps.t[index].high = chunks[2] | (chunks[3] << 16u);
             }
         )";

         ResultOrError<ComputePipelineBase*> GetOrCreateTimestampComputePipeline(
             DeviceBase* device) {
             InternalPipelineStore* store = device->GetInternalPipelineStore();

             if (store->timestampComputePipeline == nullptr) {
                 // Create compute shader module if not cached before.
                 if (store->timestampCS == nullptr) {
                     DAWN_TRY_ASSIGN(
                         store->timestampCS,
                         utils::CreateShaderModule(device, sConvertTimestampsToNanoseconds));
                 }

                 // Create binding group layout
                 Ref<BindGroupLayoutBase> bgl;
                 DAWN_TRY_ASSIGN(
                     bgl, utils::MakeBindGroupLayout(
                              device,
                              {
                                  {0, wgpu::ShaderStage::Compute, kInternalStorageBufferBinding},
                                  {1, wgpu::ShaderStage::Compute,
                                   wgpu::BufferBindingType::ReadOnlyStorage},
                                  {2, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform},
                              },
                              /* allowInternalBinding */ true));

                 // Create pipeline layout
                 Ref<PipelineLayoutBase> layout;
                 DAWN_TRY_ASSIGN(layout, utils::MakeBasicPipelineLayout(device, bgl));

                 // Create ComputePipeline.
                 ComputePipelineDescriptor computePipelineDesc = {};
                 // Generate the layout based on shader module.
                 computePipelineDesc.layout = layout.Get();
                 computePipelineDesc.compute.module = store->timestampCS.Get();
                 computePipelineDesc.compute.entryPoint = "main";

                 DAWN_TRY_ASSIGN(store->timestampComputePipeline,
                                 device->CreateComputePipeline(&computePipelineDesc));
             }

             return store->timestampComputePipeline.Get();
         }

     }  // anonymous namespace

     TimestampParams::TimestampParams(uint32_t first, uint32_t count, uint32_t offset, float period)
         : first(first), count(count), offset(offset) {
         // The overall conversion happening, if p is the period, m the multiplier, s the shift, is::
         //
         //   m = round(p * 2^s)
         //
         // Then in the shader we compute:
         //
         //   m / 2^s = round(p * 2^s) / 2*s ~= p
         //
         // The goal is to find the best shift to keep the precision of computations. The
         // conversion shader uses chunks of 16 bits to compute the multiplication with the perios,
         // so we need to keep the multiplier under 2^16. At the same time, the larger the
         // multiplier, the better the precision, so we maximize the value of the right shift while
         // keeping the multiplier under 2 ^ 16
         uint32_t upperLog2 = ceil(log2(period));

         // Clamp the shift to 16 because we're doing computations in 16bit chunks. The
         // multiplication by the period will overflow the chunks, but timestamps are mostly
         // informational so that's ok.
         rightShift = 16u - std::min(upperLog2, 16u);
         multiplier = uint32_t(period * (1 << rightShift));
     }

     MaybeError EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
                                                     BufferBase* timestamps,
                                                     BufferBase* availability,
                                                     BufferBase* params) {
         DeviceBase* device = encoder->GetDevice();

         ComputePipelineBase* pipeline;
         DAWN_TRY_ASSIGN(pipeline, GetOrCreateTimestampComputePipeline(device));

         // Prepare bind group layout.
         Ref<BindGroupLayoutBase> layout;
         DAWN_TRY_ASSIGN(layout, pipeline->GetBindGroupLayout(0));

         // Create bind group after all binding entries are set.
         Ref<BindGroupBase> bindGroup;
         DAWN_TRY_ASSIGN(bindGroup,
                         utils::MakeBindGroup(device, layout,
                                              {{0, timestamps}, {1, availability}, {2, params}}));

         // Create compute encoder and issue dispatch.
         ComputePassDescriptor passDesc = {};
         // TODO(dawn:723): change to not use AcquireRef for reentrant object creation.
         Ref<ComputePassEncoder> pass = AcquireRef(encoder->APIBeginComputePass(&passDesc));
         pass->APISetPipeline(pipeline);
         pass->APISetBindGroup(0, bindGroup.Get());
         pass->APIDispatch(
             static_cast<uint32_t>((timestamps->GetSize() / sizeof(uint64_t) + 7) / 8));
         pass->APIEndPass();

         return {};
     }

 }  // namespace dawn::native
	// Copyright 2020 The Dawn Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "dawn_native/QueryHelper.h"

	#include "dawn_native/BindGroup.h"
	#include "dawn_native/BindGroupLayout.h"
	#include "dawn_native/Buffer.h"
	#include "dawn_native/CommandEncoder.h"
	#include "dawn_native/ComputePassEncoder.h"
	#include "dawn_native/ComputePipeline.h"
	#include "dawn_native/Device.h"
	#include "dawn_native/InternalPipelineStore.h"
	#include "dawn_native/utils/WGPUHelpers.h"

	#include <cmath>

	namespace dawn::native {

	namespace {

	// Assert the offsets in dawn::native::TimestampParams are same with the ones in the shader
	static_assert(offsetof(dawn::native::TimestampParams, first) == 0);
	static_assert(offsetof(dawn::native::TimestampParams, count) == 4);
	static_assert(offsetof(dawn::native::TimestampParams, offset) == 8);
	static_assert(offsetof(dawn::native::TimestampParams, multiplier) == 12);
	static_assert(offsetof(dawn::native::TimestampParams, rightShift) == 16);

	static const char sConvertTimestampsToNanoseconds[] = R"(
	struct Timestamp {
	low : u32;
	high : u32;
	};

	struct TimestampArr {
	t : array<Timestamp>;
	};

	struct AvailabilityArr {
	v : array<u32>;
	};

	struct TimestampParams {
	first : u32;
	count : u32;
	offset : u32;
	multiplier : u32;
	right_shift : u32;
	};

	@group(0) @binding(0) var<storage, read_write> timestamps : TimestampArr;
	@group(0) @binding(1) var<storage, read> availability : AvailabilityArr;
	@group(0) @binding(2) var<uniform> params : TimestampParams;

	let sizeofTimestamp : u32 = 8u;

	@stage(compute) @workgroup_size(8, 1, 1)
	fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3<u32>) {
	if (GlobalInvocationID.x >= params.count) { return; }

	var index = GlobalInvocationID.x + params.offset / sizeofTimestamp;

	// Return 0 for the unavailable value.
	if (availability.v[GlobalInvocationID.x + params.first] == 0u) {
	timestamps.t[index].low = 0u;
	timestamps.t[index].high = 0u;
	return;
	}

	var timestamp = timestamps.t[index];

	// TODO(dawn:1250): Consider using the umulExtended and uaddCarry intrinsics once
	// available.
	var chunks : array<u32, 5>;
	chunks[0] = timestamp.low & 0xFFFFu;
	chunks[1] = timestamp.low >> 16u;
	chunks[2] = timestamp.high & 0xFFFFu;
	chunks[3] = timestamp.high >> 16u;
	chunks[4] = 0u;

	// Multiply all the chunks with the integer period.
	for (var i = 0u; i < 4u; i = i + 1u) {
	chunks[i] = chunks[i] * params.multiplier;
	}

	// Propagate the carry
	var carry = 0u;
	for (var i = 0u; i < 4u; i = i + 1u) {
	var chunk_with_carry = chunks[i] + carry;
	carry = chunk_with_carry >> 16u;
	chunks[i] = chunk_with_carry & 0xFFFFu;
	}
	chunks[4] = carry;

	// Apply the right shift.
	for (var i = 0u; i < 4u; i = i + 1u) {
	var low = chunks[i] >> params.right_shift;
	var high = (chunks[i + 1u] << (16u - params.right_shift)) & 0xFFFFu;
	chunks[i] = low \| high;
	}

	timestamps.t[index].low = chunks[0] \| (chunks[1] << 16u);
	timestamps.t[index].high = chunks[2] \| (chunks[3] << 16u);
	}
	)";

	ResultOrError<ComputePipelineBase*> GetOrCreateTimestampComputePipeline(
	DeviceBase* device) {
	InternalPipelineStore* store = device->GetInternalPipelineStore();

	if (store->timestampComputePipeline == nullptr) {
	// Create compute shader module if not cached before.
	if (store->timestampCS == nullptr) {
	DAWN_TRY_ASSIGN(
	store->timestampCS,
	utils::CreateShaderModule(device, sConvertTimestampsToNanoseconds));
	}

	// Create binding group layout
	Ref<BindGroupLayoutBase> bgl;
	DAWN_TRY_ASSIGN(
	bgl, utils::MakeBindGroupLayout(
	device,
	{
	{0, wgpu::ShaderStage::Compute, kInternalStorageBufferBinding},
	{1, wgpu::ShaderStage::Compute,
	wgpu::BufferBindingType::ReadOnlyStorage},
	{2, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform},
	},
	/* allowInternalBinding */ true));

	// Create pipeline layout
	Ref<PipelineLayoutBase> layout;
	DAWN_TRY_ASSIGN(layout, utils::MakeBasicPipelineLayout(device, bgl));

	// Create ComputePipeline.
	ComputePipelineDescriptor computePipelineDesc = {};
	// Generate the layout based on shader module.
	computePipelineDesc.layout = layout.Get();
	computePipelineDesc.compute.module = store->timestampCS.Get();
	computePipelineDesc.compute.entryPoint = "main";

	DAWN_TRY_ASSIGN(store->timestampComputePipeline,
	device->CreateComputePipeline(&computePipelineDesc));
	}

	return store->timestampComputePipeline.Get();
	}

	} // anonymous namespace

	TimestampParams::TimestampParams(uint32_t first, uint32_t count, uint32_t offset, float period)
	: first(first), count(count), offset(offset) {
	// The overall conversion happening, if p is the period, m the multiplier, s the shift, is::
	//
	// m = round(p * 2^s)
	//
	// Then in the shader we compute:
	//
	// m / 2^s = round(p * 2^s) / 2*s ~= p
	//
	// The goal is to find the best shift to keep the precision of computations. The
	// conversion shader uses chunks of 16 bits to compute the multiplication with the perios,
	// so we need to keep the multiplier under 2^16. At the same time, the larger the
	// multiplier, the better the precision, so we maximize the value of the right shift while
	// keeping the multiplier under 2 ^ 16
	uint32_t upperLog2 = ceil(log2(period));

	// Clamp the shift to 16 because we're doing computations in 16bit chunks. The
	// multiplication by the period will overflow the chunks, but timestamps are mostly
	// informational so that's ok.
	rightShift = 16u - std::min(upperLog2, 16u);
	multiplier = uint32_t(period * (1 << rightShift));
	}

	MaybeError EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
	BufferBase* timestamps,
	BufferBase* availability,
	BufferBase* params) {
	DeviceBase* device = encoder->GetDevice();

	ComputePipelineBase* pipeline;
	DAWN_TRY_ASSIGN(pipeline, GetOrCreateTimestampComputePipeline(device));

	// Prepare bind group layout.
	Ref<BindGroupLayoutBase> layout;
	DAWN_TRY_ASSIGN(layout, pipeline->GetBindGroupLayout(0));

	// Create bind group after all binding entries are set.
	Ref<BindGroupBase> bindGroup;
	DAWN_TRY_ASSIGN(bindGroup,
	utils::MakeBindGroup(device, layout,
	{{0, timestamps}, {1, availability}, {2, params}}));

	// Create compute encoder and issue dispatch.
	ComputePassDescriptor passDesc = {};
	// TODO(dawn:723): change to not use AcquireRef for reentrant object creation.
	Ref<ComputePassEncoder> pass = AcquireRef(encoder->APIBeginComputePass(&passDesc));
	pass->APISetPipeline(pipeline);
	pass->APISetBindGroup(0, bindGroup.Get());
	pass->APIDispatch(
	static_cast<uint32_t>((timestamps->GetSize() / sizeof(uint64_t) + 7) / 8));
	pass->APIEndPass();

	return {};
	}

	} // namespace dawn::native