Properly implement per-dispatch synchronization scopes.
Below are the list of all the individual changes, which are a good order
in which to review this CL.
Core changes:
- Change the tracking in the frontend to produce a synchronization
scope per dispatch instead of per compute pass. Some bindgroups might
not be part of any synchronization scopes so we also track all the
referenced resources on the side so they can be checked during
Queue::Submit validation.
- Fix clearing in the GL and Metal backends to use the per-dispatch
synchronization scopes.
- Fix the Vulkan backend to use the per dispatch synchronization scopes
to produce the correct pipeline barriers. This allows the removal of
previous logic that was subtly incorrect for Indirect buffer. This
allows the merging of the Compute and Render DescriptorSetTracker into
a single small helper class.
- D3D12 changes are similar to Vulkan, but the simplification is just a
the suppression of a branch with a lot of code in
BindGroupStateTracker.
Test changes:
- Fixup all the ResourceUsageTracking tests to follow the WebGPU spec
for synchronization scopes (fixing a lot of TODOs).
- Add additional tests checking that Indirect buffers are not allowed
to be used as a writeable storage in the same synchronization scope.
- Add tests for Queue::Submit validation correctly taking into account
resources that are bound but unused in compute passes.
- Add an end2end test for using a buffer as Indirect and Storage at the
same time in a DispatchIndirect, which would previously produce
incorrect barriers in the Vulkan and D3D12 backends.
Other small changes (that I was to lazy to put in a different CL):
- Add the utils::MakePipelineLayout helper function.
- Fix Indirect not being in the list of readonly buffer usages (caught
by a test added in this CL).
Bug: dawn:632
Change-Id: I77263c3535a4ba995faccbf26255da9a2f6ed3b5
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/49887
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Stephen White <senorblanco@chromium.org>
diff --git a/src/dawn_native/Buffer.h b/src/dawn_native/Buffer.h
index 8e848da..ade139e 100644
--- a/src/dawn_native/Buffer.h
+++ b/src/dawn_native/Buffer.h
@@ -34,7 +34,8 @@
static constexpr wgpu::BufferUsage kReadOnlyBufferUsages =
wgpu::BufferUsage::MapRead | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::Index |
- wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Uniform | kReadOnlyStorageBuffer;
+ wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Uniform | kReadOnlyStorageBuffer |
+ wgpu::BufferUsage::Indirect;
class BufferBase : public ObjectBase {
enum class BufferState {
diff --git a/src/dawn_native/CommandBufferStateTracker.cpp b/src/dawn_native/CommandBufferStateTracker.cpp
index 7b6d367..24d8342 100644
--- a/src/dawn_native/CommandBufferStateTracker.cpp
+++ b/src/dawn_native/CommandBufferStateTracker.cpp
@@ -227,4 +227,12 @@
mAspects &= ~kLazyAspects;
}
+ BindGroupBase* CommandBufferStateTracker::GetBindGroup(BindGroupIndex index) const {
+ return mBindgroups[index];
+ }
+
+ PipelineLayoutBase* CommandBufferStateTracker::GetPipelineLayout() const {
+ return mLastPipelineLayout;
+ }
+
} // namespace dawn_native
diff --git a/src/dawn_native/CommandBufferStateTracker.h b/src/dawn_native/CommandBufferStateTracker.h
index 84a3e6d..8c223cf 100644
--- a/src/dawn_native/CommandBufferStateTracker.h
+++ b/src/dawn_native/CommandBufferStateTracker.h
@@ -50,6 +50,8 @@
wgpu::IndexFormat GetIndexFormat() {
return mIndexFormat;
}
+ BindGroupBase* GetBindGroup(BindGroupIndex index) const;
+ PipelineLayoutBase* GetPipelineLayout() const;
private:
MaybeError ValidateOperation(ValidationAspects requiredAspects);
diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp
index f71cad8..54621f6 100644
--- a/src/dawn_native/CommandEncoder.cpp
+++ b/src/dawn_native/CommandEncoder.cpp
@@ -949,8 +949,12 @@
for (const RenderPassResourceUsage& passUsage : mEncodingContext.GetRenderPassUsages()) {
DAWN_TRY(ValidateSyncScopeResourceUsage(passUsage));
}
- // TODO(dawn:632): The synchronization scopes of compute passes should be validated here
- // once they are tracked per-dispatch.
+
+ for (const ComputePassResourceUsage& passUsage : mEncodingContext.GetComputePassUsages()) {
+ for (const SyncScopeResourceUsage& scope : passUsage.dispatchUsages) {
+ DAWN_TRY(ValidateSyncScopeResourceUsage(scope));
+ }
+ }
if (mDebugGroupStackSize != 0) {
return DAWN_VALIDATION_ERROR("Each Push must be balanced by a corresponding Pop.");
diff --git a/src/dawn_native/CommandValidation.cpp b/src/dawn_native/CommandValidation.cpp
index 05e3a9e..9b647db 100644
--- a/src/dawn_native/CommandValidation.cpp
+++ b/src/dawn_native/CommandValidation.cpp
@@ -37,7 +37,8 @@
if (!readOnly && !singleUse) {
return DAWN_VALIDATION_ERROR(
- "Buffer used as writable usage and another usage in pass");
+ "Buffer used as writable usage and another usage in the same synchronization "
+ "scope");
}
}
@@ -50,7 +51,8 @@
bool singleUse = wgpu::HasZeroOrOneBits(usage);
if (!readOnly && !singleUse && !error.IsError()) {
error = DAWN_VALIDATION_ERROR(
- "Texture used as writable usage and another usage in render pass");
+ "Texture used as writable usage and another usage in the same "
+ "synchronization scope");
}
});
DAWN_TRY(std::move(error));
diff --git a/src/dawn_native/ComputePassEncoder.cpp b/src/dawn_native/ComputePassEncoder.cpp
index dbc6f41..88c7e69 100644
--- a/src/dawn_native/ComputePassEncoder.cpp
+++ b/src/dawn_native/ComputePassEncoder.cpp
@@ -20,6 +20,7 @@
#include "dawn_native/Commands.h"
#include "dawn_native/ComputePipeline.h"
#include "dawn_native/Device.h"
+#include "dawn_native/PassResourceUsageTracker.h"
#include "dawn_native/QuerySet.h"
namespace dawn_native {
@@ -64,6 +65,9 @@
DAWN_TRY(mCommandBufferState.ValidateCanDispatch());
}
+ // Record the synchronization scope for Dispatch, which is just the current bindgroups.
+ AddDispatchSyncScope();
+
DispatchCmd* dispatch = allocator->Allocate<DispatchCmd>(Command::Dispatch);
dispatch->x = x;
dispatch->y = y;
@@ -101,13 +105,18 @@
}
}
+ // Record the synchronization scope for Dispatch, both the bindgroups and the indirect
+ // buffer.
+ SyncScopeUsageTracker scope;
+ scope.BufferUsedAs(indirectBuffer, wgpu::BufferUsage::Indirect);
+ mUsageTracker.AddReferencedBuffer(indirectBuffer);
+ AddDispatchSyncScope(std::move(scope));
+
DispatchIndirectCmd* dispatch =
allocator->Allocate<DispatchIndirectCmd>(Command::DispatchIndirect);
dispatch->indirectBuffer = indirectBuffer;
dispatch->indirectOffset = indirectOffset;
- mUsageTracker.BufferUsedAs(indirectBuffer, wgpu::BufferUsage::Indirect);
-
return {};
});
}
@@ -140,13 +149,11 @@
ValidateSetBindGroup(groupIndex, group, dynamicOffsetCount, dynamicOffsets));
}
+ mUsageTracker.AddResourcesReferencedByBindGroup(group);
+
RecordSetBindGroup(allocator, groupIndex, group, dynamicOffsetCount, dynamicOffsets);
mCommandBufferState.SetBindGroup(groupIndex, group);
- // TODO(dawn:632): This doesn't match the WebGPU specification. Instead the
- // synchronization scopes should be created on Dispatch().
- mUsageTracker.AddBindGroup(group);
-
return {};
});
}
@@ -169,4 +176,12 @@
});
}
+ void ComputePassEncoder::AddDispatchSyncScope(SyncScopeUsageTracker scope) {
+ PipelineLayoutBase* layout = mCommandBufferState.GetPipelineLayout();
+ for (BindGroupIndex i : IterateBitSet(layout->GetBindGroupLayoutsMask())) {
+ scope.AddBindGroup(mCommandBufferState.GetBindGroup(i));
+ }
+ mUsageTracker.AddDispatch(scope.AcquireSyncScopeUsage());
+ }
+
} // namespace dawn_native
diff --git a/src/dawn_native/ComputePassEncoder.h b/src/dawn_native/ComputePassEncoder.h
index 18305d8..cbf4f61 100644
--- a/src/dawn_native/ComputePassEncoder.h
+++ b/src/dawn_native/ComputePassEncoder.h
@@ -22,6 +22,8 @@
namespace dawn_native {
+ class SyncScopeUsageTracker;
+
class ComputePassEncoder final : public ProgrammablePassEncoder {
public:
ComputePassEncoder(DeviceBase* device,
@@ -53,6 +55,10 @@
private:
CommandBufferStateTracker mCommandBufferState;
+
+ // Adds the bindgroups used for the current dispatch to the SyncScopeResourceUsage and
+ // records it in mUsageTracker.
+ void AddDispatchSyncScope(SyncScopeUsageTracker scope = {});
ComputePassResourceUsageTracker mUsageTracker;
// For render and compute passes, the encoding context is borrowed from the command encoder.
diff --git a/src/dawn_native/PassResourceUsage.h b/src/dawn_native/PassResourceUsage.h
index 701319f..3168c39 100644
--- a/src/dawn_native/PassResourceUsage.h
+++ b/src/dawn_native/PassResourceUsage.h
@@ -48,12 +48,23 @@
// Contains all the resource usage data for a compute pass.
//
- // TODO(dawn:632) Not now, but in the future, compute passes will contain a list of
- // SyncScopeResourceUsage, one per Dispatch as required by the WebGPU specification. They will
- // also store inline the set of all buffers and textures used, because some unused BindGroups
- // may not be used at all in synchronization scope but their resources still need to be
- // validated on Queue::Submit.
- struct ComputePassResourceUsage : public SyncScopeResourceUsage {};
+ // Essentially a list of SyncScopeResourceUsage, one per Dispatch as required by the WebGPU
+ // specification. ComputePassResourceUsage also stores nline the set of all buffers and
+ // textures used, because some unused BindGroups may not be used at all in synchronization
+ // scope but their resources still need to be validated on Queue::Submit.
+ struct ComputePassResourceUsage {
+ // Somehow without this defaulted constructor, MSVC or its STDlib have an issue where they
+ // use the copy constructor (that's deleted) when doing operations on a
+ // vector<ComputePassResourceUsage>
+ ComputePassResourceUsage(ComputePassResourceUsage&&) = default;
+ ComputePassResourceUsage() = default;
+
+ std::vector<SyncScopeResourceUsage> dispatchUsages;
+
+ // All the resources referenced by this compute pass for validation in Queue::Submit.
+ std::set<BufferBase*> referencedBuffers;
+ std::set<TextureBase*> referencedTextures;
+ };
// Contains all the resource usage data for a render pass.
//
diff --git a/src/dawn_native/PassResourceUsageTracker.cpp b/src/dawn_native/PassResourceUsageTracker.cpp
index 25ebe67..75db456 100644
--- a/src/dawn_native/PassResourceUsageTracker.cpp
+++ b/src/dawn_native/PassResourceUsageTracker.cpp
@@ -138,10 +138,39 @@
return result;
}
+ void ComputePassResourceUsageTracker::AddDispatch(SyncScopeResourceUsage scope) {
+ mUsage.dispatchUsages.push_back(std::move(scope));
+ }
+
+ void ComputePassResourceUsageTracker::AddReferencedBuffer(BufferBase* buffer) {
+ mUsage.referencedBuffers.insert(buffer);
+ }
+
+ void ComputePassResourceUsageTracker::AddResourcesReferencedByBindGroup(BindGroupBase* group) {
+ for (BindingIndex index{0}; index < group->GetLayout()->GetBindingCount(); ++index) {
+ const BindingInfo& bindingInfo = group->GetLayout()->GetBindingInfo(index);
+
+ switch (bindingInfo.bindingType) {
+ case BindingInfoType::Buffer: {
+ mUsage.referencedBuffers.insert(group->GetBindingAsBufferBinding(index).buffer);
+ break;
+ }
+
+ case BindingInfoType::Texture: {
+ mUsage.referencedTextures.insert(
+ group->GetBindingAsTextureView(index)->GetTexture());
+ break;
+ }
+
+ case BindingInfoType::StorageTexture:
+ case BindingInfoType::Sampler:
+ break;
+ }
+ }
+ }
+
ComputePassResourceUsage ComputePassResourceUsageTracker::AcquireResourceUsage() {
- ComputePassResourceUsage result;
- *static_cast<SyncScopeResourceUsage*>(&result) = AcquireSyncScopeUsage();
- return result;
+ return std::move(mUsage);
}
RenderPassResourceUsage RenderPassResourceUsageTracker::AcquireResourceUsage() {
diff --git a/src/dawn_native/PassResourceUsageTracker.h b/src/dawn_native/PassResourceUsageTracker.h
index 691d302..56b5854 100644
--- a/src/dawn_native/PassResourceUsageTracker.h
+++ b/src/dawn_native/PassResourceUsageTracker.h
@@ -49,14 +49,16 @@
};
// Helper class to build ComputePassResourceUsages
- class ComputePassResourceUsageTracker : public SyncScopeUsageTracker {
+ class ComputePassResourceUsageTracker {
public:
+ void AddDispatch(SyncScopeResourceUsage scope);
+ void AddReferencedBuffer(BufferBase* buffer);
+ void AddResourcesReferencedByBindGroup(BindGroupBase* group);
+
ComputePassResourceUsage AcquireResourceUsage();
private:
- // Hide AcquireSyncScopeUsage since users of this class should use AcquireResourceUsage
- // instead.
- using SyncScopeUsageTracker::AcquireSyncScopeUsage;
+ ComputePassResourceUsage mUsage;
};
// Helper class to build RenderPassResourceUsages
diff --git a/src/dawn_native/Queue.cpp b/src/dawn_native/Queue.cpp
index c555d99..c48a46f 100644
--- a/src/dawn_native/Queue.cpp
+++ b/src/dawn_native/Queue.cpp
@@ -37,18 +37,6 @@
namespace {
- MaybeError ValidateSyncScopeUsedInSubmit(const SyncScopeResourceUsage& scope) {
- for (const BufferBase* buffer : scope.buffers) {
- DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
- }
-
- for (const TextureBase* texture : scope.textures) {
- DAWN_TRY(texture->ValidateCanUseInSubmitNow());
- }
-
- return {};
- }
-
void CopyTextureData(uint8_t* dstPointer,
const uint8_t* srcPointer,
uint32_t depth,
@@ -423,10 +411,22 @@
const CommandBufferResourceUsage& usages = commands[i]->GetResourceUsages();
for (const SyncScopeResourceUsage& scope : usages.renderPasses) {
- DAWN_TRY(ValidateSyncScopeUsedInSubmit(scope));
+ for (const BufferBase* buffer : scope.buffers) {
+ DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
+ }
+
+ for (const TextureBase* texture : scope.textures) {
+ DAWN_TRY(texture->ValidateCanUseInSubmitNow());
+ }
}
- for (const SyncScopeResourceUsage& scope : usages.computePasses) {
- DAWN_TRY(ValidateSyncScopeUsedInSubmit(scope));
+
+ for (const ComputePassResourceUsage& pass : usages.computePasses) {
+ for (const BufferBase* buffer : pass.referencedBuffers) {
+ DAWN_TRY(buffer->ValidateCanUseOnQueueNow());
+ }
+ for (const TextureBase* texture : pass.referencedTextures) {
+ DAWN_TRY(texture->ValidateCanUseInSubmitNow());
+ }
}
for (const BufferBase* buffer : usages.topLevelBuffers) {
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index 367fbb0..b0b8e46 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -193,6 +193,62 @@
return {};
}
+
+ // Records the necessary barriers for a synchronization scope using the resource usage
+ // data pre-computed in the frontend. Also performs lazy initialization if required.
+ // Returns whether any UAV are used in the synchronization scope.
+ bool TransitionAndClearForSyncScope(CommandRecordingContext* commandContext,
+ const SyncScopeResourceUsage& usages) {
+ std::vector<D3D12_RESOURCE_BARRIER> barriers;
+
+ ID3D12GraphicsCommandList* commandList = commandContext->GetCommandList();
+
+ wgpu::BufferUsage bufferUsages = wgpu::BufferUsage::None;
+
+ for (size_t i = 0; i < usages.buffers.size(); ++i) {
+ Buffer* buffer = ToBackend(usages.buffers[i]);
+
+ // TODO(jiawei.shao@intel.com): clear storage buffers with
+ // ClearUnorderedAccessView*().
+ buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext));
+
+ D3D12_RESOURCE_BARRIER barrier;
+ if (buffer->TrackUsageAndGetResourceBarrier(commandContext, &barrier,
+ usages.bufferUsages[i])) {
+ barriers.push_back(barrier);
+ }
+ bufferUsages |= usages.bufferUsages[i];
+ }
+
+ wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
+
+ for (size_t i = 0; i < usages.textures.size(); ++i) {
+ Texture* texture = ToBackend(usages.textures[i]);
+
+ // Clear subresources that are not render attachments. Render attachments will be
+ // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
+ // subresource has not been initialized before the render pass.
+ usages.textureUsages[i].Iterate(
+ [&](const SubresourceRange& range, wgpu::TextureUsage usage) {
+ if (usage & ~wgpu::TextureUsage::RenderAttachment) {
+ texture->EnsureSubresourceContentInitialized(commandContext, range);
+ }
+ textureUsages |= usage;
+ });
+
+ ToBackend(usages.textures[i])
+ ->TrackUsageAndGetResourceBarrierForPass(commandContext, &barriers,
+ usages.textureUsages[i]);
+ }
+
+ if (barriers.size()) {
+ commandList->ResourceBarrier(barriers.size(), barriers.data());
+ }
+
+ return (bufferUsages & wgpu::BufferUsage::Storage ||
+ textureUsages & wgpu::TextureUsage::Storage);
+ }
+
} // anonymous namespace
class BindGroupStateTracker : public BindGroupTrackerBase<false, uint64_t> {
@@ -274,81 +330,6 @@
mDynamicOffsetCounts[index], mDynamicOffsets[index].data());
}
- if (mInCompute) {
- std::vector<D3D12_RESOURCE_BARRIER> barriers;
- for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
- BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
- for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
- const BindingInfo& bindingInfo = layout->GetBindingInfo(binding);
- switch (bindingInfo.bindingType) {
- case BindingInfoType::Buffer: {
- D3D12_RESOURCE_BARRIER barrier;
- wgpu::BufferUsage usage;
- switch (bindingInfo.buffer.type) {
- case wgpu::BufferBindingType::Uniform:
- usage = wgpu::BufferUsage::Uniform;
- break;
- case wgpu::BufferBindingType::Storage:
- usage = wgpu::BufferUsage::Storage;
- break;
- case wgpu::BufferBindingType::ReadOnlyStorage:
- usage = kReadOnlyStorageBuffer;
- break;
- case wgpu::BufferBindingType::Undefined:
- UNREACHABLE();
- }
- if (ToBackend(mBindGroups[index]
- ->GetBindingAsBufferBinding(binding)
- .buffer)
- ->TrackUsageAndGetResourceBarrier(commandContext, &barrier,
- usage)) {
- barriers.push_back(barrier);
- }
- break;
- }
-
- case BindingInfoType::StorageTexture: {
- TextureViewBase* view =
- mBindGroups[index]->GetBindingAsTextureView(binding);
- wgpu::TextureUsage usage;
- switch (bindingInfo.storageTexture.access) {
- case wgpu::StorageTextureAccess::ReadOnly:
- usage = kReadOnlyStorageTexture;
- break;
- case wgpu::StorageTextureAccess::WriteOnly:
- usage = wgpu::TextureUsage::Storage;
- break;
- case wgpu::StorageTextureAccess::Undefined:
- UNREACHABLE();
- }
- ToBackend(view->GetTexture())
- ->TransitionUsageAndGetResourceBarrier(
- commandContext, &barriers, usage,
- view->GetSubresourceRange());
- break;
- }
-
- case BindingInfoType::Texture: {
- TextureViewBase* view =
- mBindGroups[index]->GetBindingAsTextureView(binding);
- ToBackend(view->GetTexture())
- ->TransitionUsageAndGetResourceBarrier(
- commandContext, &barriers, wgpu::TextureUsage::Sampled,
- view->GetSubresourceRange());
- break;
- }
-
- case BindingInfoType::Sampler:
- // Don't require barriers.
- break;
- }
- }
- }
-
- if (!barriers.empty()) {
- commandList->ResourceBarrier(barriers.size(), barriers.data());
- }
- }
DidApply();
return {};
@@ -610,78 +591,6 @@
// actual command list but here is ok because there should be few command buffers.
bindingTracker.SetID3D12DescriptorHeaps(commandList);
- // Records the necessary barriers for the resource usage pre-computed by the frontend
- auto PrepareResourcesForRenderPass = [](CommandRecordingContext* commandContext,
- const RenderPassResourceUsage& usages) -> bool {
- std::vector<D3D12_RESOURCE_BARRIER> barriers;
-
- ID3D12GraphicsCommandList* commandList = commandContext->GetCommandList();
-
- wgpu::BufferUsage bufferUsages = wgpu::BufferUsage::None;
-
- for (size_t i = 0; i < usages.buffers.size(); ++i) {
- Buffer* buffer = ToBackend(usages.buffers[i]);
-
- // TODO(jiawei.shao@intel.com): clear storage buffers with
- // ClearUnorderedAccessView*().
- buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext));
-
- D3D12_RESOURCE_BARRIER barrier;
- if (buffer->TrackUsageAndGetResourceBarrier(commandContext, &barrier,
- usages.bufferUsages[i])) {
- barriers.push_back(barrier);
- }
- bufferUsages |= usages.bufferUsages[i];
- }
-
- wgpu::TextureUsage textureUsages = wgpu::TextureUsage::None;
-
- for (size_t i = 0; i < usages.textures.size(); ++i) {
- Texture* texture = ToBackend(usages.textures[i]);
-
- // Clear subresources that are not render attachments. Render attachments will be
- // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
- // subresource has not been initialized before the render pass.
- usages.textureUsages[i].Iterate(
- [&](const SubresourceRange& range, wgpu::TextureUsage usage) {
- if (usage & ~wgpu::TextureUsage::RenderAttachment) {
- texture->EnsureSubresourceContentInitialized(commandContext, range);
- }
- textureUsages |= usage;
- });
-
- ToBackend(usages.textures[i])
- ->TrackUsageAndGetResourceBarrierForPass(commandContext, &barriers,
- usages.textureUsages[i]);
- }
-
- if (barriers.size()) {
- commandList->ResourceBarrier(barriers.size(), barriers.data());
- }
-
- return (bufferUsages & wgpu::BufferUsage::Storage ||
- textureUsages & wgpu::TextureUsage::Storage);
- };
-
- // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
- // for compute passes.
- auto PrepareResourcesForComputePass = [](CommandRecordingContext* commandContext,
- const ComputePassResourceUsage& usages) {
- for (size_t i = 0; i < usages.buffers.size(); ++i) {
- Buffer* buffer = ToBackend(usages.buffers[i]);
-
- // TODO(jiawei.shao@intel.com): clear storage buffers with
- // ClearUnorderedAccessView*().
- buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext));
- }
-
- for (size_t i = 0; i < usages.textures.size(); ++i) {
- Texture* texture = ToBackend(usages.textures[i]);
- texture->EnsureSubresourceContentInitialized(commandContext,
- texture->GetAllSubresources());
- }
- };
-
size_t nextComputePassNumber = 0;
size_t nextRenderPassNumber = 0;
@@ -691,10 +600,10 @@
case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>();
- PrepareResourcesForComputePass(
- commandContext, GetResourceUsages().computePasses[nextComputePassNumber]);
bindingTracker.SetInComputePass(true);
- DAWN_TRY(RecordComputePass(commandContext, &bindingTracker));
+ DAWN_TRY(RecordComputePass(
+ commandContext, &bindingTracker,
+ GetResourceUsages().computePasses[nextComputePassNumber]));
nextComputePassNumber++;
break;
@@ -704,7 +613,7 @@
BeginRenderPassCmd* beginRenderPassCmd =
mCommands.NextCommand<BeginRenderPassCmd>();
- const bool passHasUAV = PrepareResourcesForRenderPass(
+ const bool passHasUAV = TransitionAndClearForSyncScope(
commandContext, GetResourceUsages().renderPasses[nextRenderPassNumber]);
bindingTracker.SetInComputePass(false);
@@ -945,7 +854,9 @@
}
MaybeError CommandBuffer::RecordComputePass(CommandRecordingContext* commandContext,
- BindGroupStateTracker* bindingTracker) {
+ BindGroupStateTracker* bindingTracker,
+ const ComputePassResourceUsage& resourceUsages) {
+ uint64_t currentDispatch = 0;
PipelineLayout* lastLayout = nullptr;
ID3D12GraphicsCommandList* commandList = commandContext->GetCommandList();
@@ -955,21 +866,28 @@
case Command::Dispatch: {
DispatchCmd* dispatch = mCommands.NextCommand<DispatchCmd>();
+ TransitionAndClearForSyncScope(commandContext,
+ resourceUsages.dispatchUsages[currentDispatch]);
DAWN_TRY(bindingTracker->Apply(commandContext));
+
commandList->Dispatch(dispatch->x, dispatch->y, dispatch->z);
+ currentDispatch++;
break;
}
case Command::DispatchIndirect: {
DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
-
- DAWN_TRY(bindingTracker->Apply(commandContext));
Buffer* buffer = ToBackend(dispatch->indirectBuffer.Get());
- buffer->TrackUsageAndTransitionNow(commandContext, wgpu::BufferUsage::Indirect);
+
+ TransitionAndClearForSyncScope(commandContext,
+ resourceUsages.dispatchUsages[currentDispatch]);
+ DAWN_TRY(bindingTracker->Apply(commandContext));
+
ComPtr<ID3D12CommandSignature> signature =
ToBackend(GetDevice())->GetDispatchIndirectSignature();
commandList->ExecuteIndirect(signature.Get(), 1, buffer->GetD3D12Resource(),
dispatch->indirectOffset, nullptr, 0);
+ currentDispatch++;
break;
}
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.h b/src/dawn_native/d3d12/CommandBufferD3D12.h
index 342f851..51dc527 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.h
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.h
@@ -39,7 +39,8 @@
CommandBuffer(CommandEncoder* encoder, const CommandBufferDescriptor* descriptor);
MaybeError RecordComputePass(CommandRecordingContext* commandContext,
- BindGroupStateTracker* bindingTracker);
+ BindGroupStateTracker* bindingTracker,
+ const ComputePassResourceUsage& resourceUsages);
MaybeError RecordRenderPass(CommandRecordingContext* commandContext,
BindGroupStateTracker* bindingTracker,
BeginRenderPassCmd* renderPass,
diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm
index 1e9a33f..5e40668 100644
--- a/src/dawn_native/metal/CommandBufferMTL.mm
+++ b/src/dawn_native/metal/CommandBufferMTL.mm
@@ -554,8 +554,8 @@
size_t nextComputePassNumber = 0;
size_t nextRenderPassNumber = 0;
- auto LazyClearForPass = [](const SyncScopeResourceUsage& scope,
- CommandRecordingContext* commandContext) {
+ auto LazyClearSyncScope = [](const SyncScopeResourceUsage& scope,
+ CommandRecordingContext* commandContext) {
for (size_t i = 0; i < scope.textures.size(); ++i) {
Texture* texture = ToBackend(scope.textures[i]);
@@ -580,8 +580,10 @@
case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>();
- LazyClearForPass(GetResourceUsages().computePasses[nextComputePassNumber],
- commandContext);
+ for (const SyncScopeResourceUsage& scope :
+ GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) {
+ LazyClearSyncScope(scope, commandContext);
+ }
commandContext->EndBlit();
DAWN_TRY(EncodeComputePass(commandContext));
@@ -593,8 +595,8 @@
case Command::BeginRenderPass: {
BeginRenderPassCmd* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
- LazyClearForPass(GetResourceUsages().renderPasses[nextRenderPassNumber],
- commandContext);
+ LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber],
+ commandContext);
commandContext->EndBlit();
LazyClearRenderPassAttachments(cmd);
diff --git a/src/dawn_native/opengl/CommandBufferGL.cpp b/src/dawn_native/opengl/CommandBufferGL.cpp
index 54687a0..1f988cf 100644
--- a/src/dawn_native/opengl/CommandBufferGL.cpp
+++ b/src/dawn_native/opengl/CommandBufferGL.cpp
@@ -536,7 +536,7 @@
MaybeError CommandBuffer::Execute() {
const OpenGLFunctions& gl = ToBackend(GetDevice())->gl;
- auto LazyClearForPass = [](const SyncScopeResourceUsage& scope) {
+ auto LazyClearSyncScope = [](const SyncScopeResourceUsage& scope) {
for (size_t i = 0; i < scope.textures.size(); i++) {
Texture* texture = ToBackend(scope.textures[i]);
@@ -564,7 +564,10 @@
switch (type) {
case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>();
- LazyClearForPass(GetResourceUsages().computePasses[nextComputePassNumber]);
+ for (const SyncScopeResourceUsage& scope :
+ GetResourceUsages().computePasses[nextComputePassNumber].dispatchUsages) {
+ LazyClearSyncScope(scope);
+ }
DAWN_TRY(ExecuteComputePass());
nextComputePassNumber++;
@@ -573,7 +576,7 @@
case Command::BeginRenderPass: {
auto* cmd = mCommands.NextCommand<BeginRenderPassCmd>();
- LazyClearForPass(GetResourceUsages().renderPasses[nextRenderPassNumber]);
+ LazyClearSyncScope(GetResourceUsages().renderPasses[nextRenderPassNumber]);
LazyClearRenderPassAttachments(cmd);
DAWN_TRY(ExecuteRenderPass(cmd));
diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp
index f926029..48427ba 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.cpp
+++ b/src/dawn_native/vulkan/CommandBufferVk.cpp
@@ -100,130 +100,73 @@
return region;
}
- void ApplyDescriptorSets(
- Device* device,
- VkCommandBuffer commands,
- VkPipelineBindPoint bindPoint,
- VkPipelineLayout pipelineLayout,
- const BindGroupLayoutMask& bindGroupsToApply,
- const ityp::array<BindGroupIndex, BindGroupBase*, kMaxBindGroups>& bindGroups,
- const ityp::array<BindGroupIndex, uint32_t, kMaxBindGroups>& dynamicOffsetCounts,
- const ityp::array<BindGroupIndex,
- std::array<uint32_t, kMaxDynamicBuffersPerPipelineLayout>,
- kMaxBindGroups>& dynamicOffsets) {
- for (BindGroupIndex dirtyIndex : IterateBitSet(bindGroupsToApply)) {
- VkDescriptorSet set = ToBackend(bindGroups[dirtyIndex])->GetHandle();
- const uint32_t* dynamicOffset = dynamicOffsetCounts[dirtyIndex] > 0
- ? dynamicOffsets[dirtyIndex].data()
- : nullptr;
- device->fn.CmdBindDescriptorSets(commands, bindPoint, pipelineLayout,
- static_cast<uint32_t>(dirtyIndex), 1, &*set,
- dynamicOffsetCounts[dirtyIndex], dynamicOffset);
+ class DescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
+ public:
+ DescriptorSetTracker() = default;
+
+ void Apply(Device* device,
+ CommandRecordingContext* recordingContext,
+ VkPipelineBindPoint bindPoint) {
+ for (BindGroupIndex dirtyIndex :
+ IterateBitSet(mDirtyBindGroupsObjectChangedOrIsDynamic)) {
+ VkDescriptorSet set = ToBackend(mBindGroups[dirtyIndex])->GetHandle();
+ const uint32_t* dynamicOffset = mDynamicOffsetCounts[dirtyIndex] > 0
+ ? mDynamicOffsets[dirtyIndex].data()
+ : nullptr;
+ device->fn.CmdBindDescriptorSets(
+ recordingContext->commandBuffer, bindPoint,
+ ToBackend(mPipelineLayout)->GetHandle(), static_cast<uint32_t>(dirtyIndex),
+ 1, &*set, mDynamicOffsetCounts[dirtyIndex], dynamicOffset);
+ }
+ DidApply();
+ }
+ };
+
+ // Records the necessary barriers for a synchronization scope using the resource usage
+ // data pre-computed in the frontend. Also performs lazy initialization if required.
+ void TransitionAndClearForSyncScope(Device* device,
+ CommandRecordingContext* recordingContext,
+ const SyncScopeResourceUsage& scope) {
+ std::vector<VkBufferMemoryBarrier> bufferBarriers;
+ std::vector<VkImageMemoryBarrier> imageBarriers;
+ VkPipelineStageFlags srcStages = 0;
+ VkPipelineStageFlags dstStages = 0;
+
+ for (size_t i = 0; i < scope.buffers.size(); ++i) {
+ Buffer* buffer = ToBackend(scope.buffers[i]);
+ buffer->EnsureDataInitialized(recordingContext);
+
+ VkBufferMemoryBarrier bufferBarrier;
+ if (buffer->TransitionUsageAndGetResourceBarrier(
+ scope.bufferUsages[i], &bufferBarrier, &srcStages, &dstStages)) {
+ bufferBarriers.push_back(bufferBarrier);
+ }
+ }
+
+ for (size_t i = 0; i < scope.textures.size(); ++i) {
+ Texture* texture = ToBackend(scope.textures[i]);
+
+ // Clear subresources that are not render attachments. Render attachments will be
+ // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
+ // subresource has not been initialized before the render pass.
+ scope.textureUsages[i].Iterate(
+ [&](const SubresourceRange& range, wgpu::TextureUsage usage) {
+ if (usage & ~wgpu::TextureUsage::RenderAttachment) {
+ texture->EnsureSubresourceContentInitialized(recordingContext, range);
+ }
+ });
+ texture->TransitionUsageForPass(recordingContext, scope.textureUsages[i],
+ &imageBarriers, &srcStages, &dstStages);
+ }
+
+ if (bufferBarriers.size() || imageBarriers.size()) {
+ device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages,
+ 0, 0, nullptr, bufferBarriers.size(),
+ bufferBarriers.data(), imageBarriers.size(),
+ imageBarriers.data());
}
}
- class RenderDescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
- public:
- RenderDescriptorSetTracker() = default;
-
- void Apply(Device* device,
- CommandRecordingContext* recordingContext,
- VkPipelineBindPoint bindPoint) {
- ApplyDescriptorSets(device, recordingContext->commandBuffer, bindPoint,
- ToBackend(mPipelineLayout)->GetHandle(),
- mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
- mDynamicOffsetCounts, mDynamicOffsets);
- DidApply();
- }
- };
-
- class ComputeDescriptorSetTracker : public BindGroupTrackerBase<true, uint32_t> {
- public:
- ComputeDescriptorSetTracker() = default;
-
- void Apply(Device* device,
- CommandRecordingContext* recordingContext,
- VkPipelineBindPoint bindPoint) {
- ApplyDescriptorSets(device, recordingContext->commandBuffer, bindPoint,
- ToBackend(mPipelineLayout)->GetHandle(),
- mDirtyBindGroupsObjectChangedOrIsDynamic, mBindGroups,
- mDynamicOffsetCounts, mDynamicOffsets);
-
- std::vector<VkBufferMemoryBarrier> bufferBarriers;
- std::vector<VkImageMemoryBarrier> imageBarriers;
- VkPipelineStageFlags srcStages = 0;
- VkPipelineStageFlags dstStages = 0;
-
- for (BindGroupIndex index : IterateBitSet(mBindGroupLayoutsMask)) {
- BindGroupLayoutBase* layout = mBindGroups[index]->GetLayout();
- for (BindingIndex binding{0}; binding < layout->GetBindingCount(); ++binding) {
- const BindingInfo& bindingInfo = layout->GetBindingInfo(binding);
-
- switch (bindingInfo.bindingType) {
- case BindingInfoType::Buffer: {
- wgpu::BufferUsage usage;
- switch (bindingInfo.buffer.type) {
- case wgpu::BufferBindingType::Uniform:
- usage = wgpu::BufferUsage::Uniform;
- break;
- case wgpu::BufferBindingType::Storage:
- case wgpu::BufferBindingType::ReadOnlyStorage:
- usage = wgpu::BufferUsage::Storage;
- break;
- case wgpu::BufferBindingType::Undefined:
- UNREACHABLE();
- }
-
- VkBufferMemoryBarrier bufferBarrier;
- if (ToBackend(mBindGroups[index]
- ->GetBindingAsBufferBinding(binding)
- .buffer)
- ->TransitionUsageAndGetResourceBarrier(
- usage, &bufferBarrier, &srcStages, &dstStages)) {
- bufferBarriers.push_back(bufferBarrier);
- }
- break;
- }
-
- case BindingInfoType::StorageTexture: {
- TextureViewBase* view =
- mBindGroups[index]->GetBindingAsTextureView(binding);
- ToBackend(view->GetTexture())
- ->TransitionUsageAndGetResourceBarrier(
- wgpu::TextureUsage::Storage, view->GetSubresourceRange(),
- &imageBarriers, &srcStages, &dstStages);
- break;
- }
-
- case BindingInfoType::Texture: {
- TextureViewBase* view =
- mBindGroups[index]->GetBindingAsTextureView(binding);
- ToBackend(view->GetTexture())
- ->TransitionUsageAndGetResourceBarrier(
- wgpu::TextureUsage::Sampled, view->GetSubresourceRange(),
- &imageBarriers, &srcStages, &dstStages);
- break;
- }
-
- case BindingInfoType::Sampler:
- // Don't require barriers.
- break;
- }
- }
- }
-
- if (!bufferBarriers.empty() || !imageBarriers.empty()) {
- ASSERT(srcStages != 0 && dstStages != 0);
- device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages,
- dstStages, 0, 0, nullptr, bufferBarriers.size(),
- bufferBarriers.data(), imageBarriers.size(),
- imageBarriers.data());
- }
-
- DidApply();
- }
- };
-
MaybeError RecordBeginRenderPass(CommandRecordingContext* recordingContext,
Device* device,
BeginRenderPassCmd* renderPass) {
@@ -532,44 +475,7 @@
auto PrepareResourcesForRenderPass = [](Device* device,
CommandRecordingContext* recordingContext,
const RenderPassResourceUsage& usages) {
- std::vector<VkBufferMemoryBarrier> bufferBarriers;
- std::vector<VkImageMemoryBarrier> imageBarriers;
- VkPipelineStageFlags srcStages = 0;
- VkPipelineStageFlags dstStages = 0;
-
- for (size_t i = 0; i < usages.buffers.size(); ++i) {
- Buffer* buffer = ToBackend(usages.buffers[i]);
- buffer->EnsureDataInitialized(recordingContext);
-
- VkBufferMemoryBarrier bufferBarrier;
- if (buffer->TransitionUsageAndGetResourceBarrier(
- usages.bufferUsages[i], &bufferBarrier, &srcStages, &dstStages)) {
- bufferBarriers.push_back(bufferBarrier);
- }
- }
-
- for (size_t i = 0; i < usages.textures.size(); ++i) {
- Texture* texture = ToBackend(usages.textures[i]);
-
- // Clear subresources that are not render attachments. Render attachments will be
- // cleared in RecordBeginRenderPass by setting the loadop to clear when the texture
- // subresource has not been initialized before the render pass.
- usages.textureUsages[i].Iterate(
- [&](const SubresourceRange& range, wgpu::TextureUsage usage) {
- if (usage & ~wgpu::TextureUsage::RenderAttachment) {
- texture->EnsureSubresourceContentInitialized(recordingContext, range);
- }
- });
- texture->TransitionUsageForPass(recordingContext, usages.textureUsages[i],
- &imageBarriers, &srcStages, &dstStages);
- }
-
- if (bufferBarriers.size() || imageBarriers.size()) {
- device->fn.CmdPipelineBarrier(recordingContext->commandBuffer, srcStages, dstStages,
- 0, 0, nullptr, bufferBarriers.size(),
- bufferBarriers.data(), imageBarriers.size(),
- imageBarriers.data());
- }
+ TransitionAndClearForSyncScope(device, recordingContext, usages);
// Reset all query set used on current render pass together before beginning render pass
// because the reset command must be called outside render pass
@@ -579,23 +485,6 @@
}
};
- // TODO(jiawei.shao@intel.com): move the resource lazy clearing inside the barrier tracking
- // for compute passes.
- auto PrepareResourcesForComputePass = [](Device* device,
- CommandRecordingContext* recordingContext,
- const ComputePassResourceUsage& usages) {
- for (size_t i = 0; i < usages.buffers.size(); ++i) {
- Buffer* buffer = ToBackend(usages.buffers[i]);
- buffer->EnsureDataInitialized(recordingContext);
- }
-
- for (size_t i = 0; i < usages.textures.size(); ++i) {
- Texture* texture = ToBackend(usages.textures[i]);
- texture->EnsureSubresourceContentInitialized(recordingContext,
- texture->GetAllSubresources());
- }
- };
-
size_t nextComputePassNumber = 0;
size_t nextRenderPassNumber = 0;
@@ -788,10 +677,9 @@
case Command::BeginComputePass: {
mCommands.NextCommand<BeginComputePassCmd>();
- PrepareResourcesForComputePass(
- device, recordingContext,
- GetResourceUsages().computePasses[nextComputePassNumber]);
- DAWN_TRY(RecordComputePass(recordingContext));
+ DAWN_TRY(RecordComputePass(
+ recordingContext,
+ GetResourceUsages().computePasses[nextComputePassNumber]));
nextComputePassNumber++;
break;
@@ -903,11 +791,13 @@
return {};
}
- MaybeError CommandBuffer::RecordComputePass(CommandRecordingContext* recordingContext) {
+ MaybeError CommandBuffer::RecordComputePass(CommandRecordingContext* recordingContext,
+ const ComputePassResourceUsage& resourceUsages) {
Device* device = ToBackend(GetDevice());
VkCommandBuffer commands = recordingContext->commandBuffer;
- ComputeDescriptorSetTracker descriptorSets = {};
+ uint64_t currentDispatch = 0;
+ DescriptorSetTracker descriptorSets = {};
Command type;
while (mCommands.NextCommandId(&type)) {
@@ -920,21 +810,27 @@
case Command::Dispatch: {
DispatchCmd* dispatch = mCommands.NextCommand<DispatchCmd>();
+ TransitionAndClearForSyncScope(device, recordingContext,
+ resourceUsages.dispatchUsages[currentDispatch]);
descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_COMPUTE);
+
device->fn.CmdDispatch(commands, dispatch->x, dispatch->y, dispatch->z);
+ currentDispatch++;
break;
}
case Command::DispatchIndirect: {
DispatchIndirectCmd* dispatch = mCommands.NextCommand<DispatchIndirectCmd>();
- ToBackend(dispatch->indirectBuffer)
- ->TransitionUsageNow(recordingContext, wgpu::BufferUsage::Indirect);
VkBuffer indirectBuffer = ToBackend(dispatch->indirectBuffer)->GetHandle();
+ TransitionAndClearForSyncScope(device, recordingContext,
+ resourceUsages.dispatchUsages[currentDispatch]);
descriptorSets.Apply(device, recordingContext, VK_PIPELINE_BIND_POINT_COMPUTE);
+
device->fn.CmdDispatchIndirect(
commands, indirectBuffer,
static_cast<VkDeviceSize>(dispatch->indirectOffset));
+ currentDispatch++;
break;
}
@@ -1072,7 +968,7 @@
device->fn.CmdSetScissor(commands, 0, 1, &scissorRect);
}
- RenderDescriptorSetTracker descriptorSets = {};
+ DescriptorSetTracker descriptorSets = {};
RenderPipeline* lastPipeline = nullptr;
auto EncodeRenderBundleCommand = [&](CommandIterator* iter, Command type) {
diff --git a/src/dawn_native/vulkan/CommandBufferVk.h b/src/dawn_native/vulkan/CommandBufferVk.h
index edc35ff..d5d603b 100644
--- a/src/dawn_native/vulkan/CommandBufferVk.h
+++ b/src/dawn_native/vulkan/CommandBufferVk.h
@@ -40,7 +40,8 @@
private:
CommandBuffer(CommandEncoder* encoder, const CommandBufferDescriptor* descriptor);
- MaybeError RecordComputePass(CommandRecordingContext* recordingContext);
+ MaybeError RecordComputePass(CommandRecordingContext* recordingContext,
+ const ComputePassResourceUsage& resourceUsages);
MaybeError RecordRenderPass(CommandRecordingContext* recordingContext,
BeginRenderPassCmd* renderPass);
void RecordCopyImageWithTemporaryBuffer(CommandRecordingContext* recordingContext,
diff --git a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
index 3c85edf..fedaf5e 100644
--- a/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
+++ b/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
@@ -343,6 +343,84 @@
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
+// Test that barriers for dispatches correctly combine Indirect | Storage in backends with explicit
+// barriers. Do this by:
+// 1 - Initializing an indirect buffer with zeros.
+// 2 - Write ones into it with a compute shader.
+// 3 - Use the indirect buffer in a Dispatch while also reading its data.
+TEST_P(ComputeStorageBufferBarrierTests, IndirectBufferCorrectBarrier) {
+ // For some reason SPIRV-Cross crashes when translating the step3 shader to HLSL. Suppress the
+ // failure since we'll remove SPIRV-Cross at some point.
+ DAWN_SKIP_TEST_IF(IsD3D12() && !HasToggleEnabled("use_tint_generator"));
+
+ wgpu::ComputePipelineDescriptor step2PipelineDesc;
+ step2PipelineDesc.computeStage.entryPoint = "main";
+ step2PipelineDesc.computeStage.module = utils::CreateShaderModule(device, R"(
+ [[block]] struct Buf {
+ data : array<u32, 3>;
+ };
+ [[group(0), binding(0)]] var<storage> buf : [[access(read_write)]] Buf;
+
+ [[stage(compute)]] fn main() {
+ buf.data = array<u32, 3>(1u, 1u, 1u);
+ }
+ )");
+ wgpu::ComputePipeline step2Pipeline = device.CreateComputePipeline(&step2PipelineDesc);
+
+ wgpu::ComputePipelineDescriptor step3PipelineDesc;
+ step3PipelineDesc.computeStage.entryPoint = "main";
+ step3PipelineDesc.computeStage.module = utils::CreateShaderModule(device, R"(
+ [[block]] struct Buf {
+ data : array<u32, 3>;
+ };
+ [[group(0), binding(0)]] var<storage> buf : [[access(read)]] Buf;
+
+ [[block]] struct Result {
+ data : u32;
+ };
+ [[group(0), binding(1)]] var<storage> result : [[access(read_write)]] Result;
+
+ [[stage(compute)]] fn main() {
+ result.data = 2u;
+ if (buf.data[0] == 1u && buf.data[1] == 1u && buf.data[2] == 1u) {
+ result.data = 1u;
+ }
+ }
+ )");
+ wgpu::ComputePipeline step3Pipeline = device.CreateComputePipeline(&step3PipelineDesc);
+
+ // 1 - Initializing an indirect buffer with zeros.
+ wgpu::Buffer buf = utils::CreateBufferFromData<uint32_t>(
+ device, wgpu::BufferUsage::Storage | wgpu::BufferUsage::Indirect, {0u, 0u, 0u});
+
+ // 2 - Write ones into it with a compute shader.
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+
+ wgpu::BindGroup step2Group =
+ utils::MakeBindGroup(device, step2Pipeline.GetBindGroupLayout(0), {{0, buf}});
+
+ pass.SetPipeline(step2Pipeline);
+ pass.SetBindGroup(0, step2Group);
+ pass.Dispatch(1);
+
+ // 3 - Use the indirect buffer in a Dispatch while also reading its data.
+ wgpu::Buffer resultBuffer = utils::CreateBufferFromData<uint32_t>(
+ device, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc, {0u});
+ wgpu::BindGroup step3Group = utils::MakeBindGroup(device, step3Pipeline.GetBindGroupLayout(0),
+ {{0, buf}, {1, resultBuffer}});
+
+ pass.SetPipeline(step3Pipeline);
+ pass.SetBindGroup(0, step3Group);
+ pass.DispatchIndirect(buf, 0);
+
+ pass.EndPass();
+ wgpu::CommandBuffer commands = encoder.Finish();
+ queue.Submit(1, &commands);
+
+ EXPECT_BUFFER_U32_EQ(1u, resultBuffer, 0);
+}
+
DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
D3D12Backend(),
MetalBackend(),
diff --git a/src/tests/unittests/validation/QueueSubmitValidationTests.cpp b/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
index 8bf0f4d..cff2c82 100644
--- a/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
+++ b/src/tests/unittests/validation/QueueSubmitValidationTests.cpp
@@ -219,4 +219,147 @@
WaitForAllOperations(device);
}
+ // Test that buffers in unused compute pass bindgroups are still checked for in
+ // Queue::Submit validation.
+ TEST_F(QueueSubmitValidationTest, SubmitWithUnusedComputeBuffer) {
+ wgpu::Queue queue = device.GetQueue();
+
+ wgpu::BindGroupLayout emptyBGL = utils::MakeBindGroupLayout(device, {});
+ wgpu::BindGroup emptyBG = utils::MakeBindGroup(device, emptyBGL, {});
+
+ wgpu::BindGroupLayout testBGL = utils::MakeBindGroupLayout(
+ device, {{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage}});
+
+ // In this test we check that BindGroup 1 is checked, the texture test will check
+ // BindGroup 2. This is to provide coverage of for loops in validation code.
+ wgpu::ComputePipelineDescriptor cpDesc;
+ cpDesc.layout = utils::MakePipelineLayout(device, {emptyBGL, testBGL});
+ cpDesc.computeStage.entryPoint = "main";
+ cpDesc.computeStage.module =
+ utils::CreateShaderModule(device, "[[stage(compute)]] fn main() {}");
+ wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&cpDesc);
+
+ wgpu::BufferDescriptor bufDesc;
+ bufDesc.size = 4;
+ bufDesc.usage = wgpu::BufferUsage::Storage;
+
+ // Test that completely unused bindgroups still have their buffers checked.
+ for (bool destroy : {true, false}) {
+ wgpu::Buffer unusedBuffer = device.CreateBuffer(&bufDesc);
+ wgpu::BindGroup unusedBG = utils::MakeBindGroup(device, testBGL, {{0, unusedBuffer}});
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetBindGroup(1, unusedBG);
+ pass.EndPass();
+ wgpu::CommandBuffer commands = encoder.Finish();
+
+ if (destroy) {
+ unusedBuffer.Destroy();
+ ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+ } else {
+ queue.Submit(1, &commands);
+ }
+ }
+
+ // Test that unused bindgroups because they were replaced still have their buffers checked.
+ for (bool destroy : {true, false}) {
+ wgpu::Buffer unusedBuffer = device.CreateBuffer(&bufDesc);
+ wgpu::BindGroup unusedBG = utils::MakeBindGroup(device, testBGL, {{0, unusedBuffer}});
+
+ wgpu::Buffer usedBuffer = device.CreateBuffer(&bufDesc);
+ wgpu::BindGroup usedBG = utils::MakeBindGroup(device, testBGL, {{0, unusedBuffer}});
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetBindGroup(0, emptyBG);
+ pass.SetBindGroup(1, unusedBG);
+ pass.SetBindGroup(1, usedBG);
+ pass.SetPipeline(pipeline);
+ pass.Dispatch(1);
+ pass.EndPass();
+ wgpu::CommandBuffer commands = encoder.Finish();
+
+ if (destroy) {
+ unusedBuffer.Destroy();
+ ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+ } else {
+ queue.Submit(1, &commands);
+ }
+ }
+ }
+
+ // Test that textures in unused compute pass bindgroups are still checked for in
+ // Queue::Submit validation.
+ TEST_F(QueueSubmitValidationTest, SubmitWithUnusedComputeTextures) {
+ wgpu::Queue queue = device.GetQueue();
+
+ wgpu::BindGroupLayout emptyBGL = utils::MakeBindGroupLayout(device, {});
+ wgpu::BindGroup emptyBG = utils::MakeBindGroup(device, emptyBGL, {});
+
+ wgpu::BindGroupLayout testBGL = utils::MakeBindGroupLayout(
+ device, {{0, wgpu::ShaderStage::Compute, wgpu::TextureSampleType::Float}});
+
+ wgpu::ComputePipelineDescriptor cpDesc;
+ cpDesc.layout = utils::MakePipelineLayout(device, {emptyBGL, emptyBGL, testBGL});
+ cpDesc.computeStage.entryPoint = "main";
+ cpDesc.computeStage.module =
+ utils::CreateShaderModule(device, "[[stage(compute)]] fn main() {}");
+ wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&cpDesc);
+
+ wgpu::TextureDescriptor texDesc;
+ texDesc.size = {1, 1, 1};
+ texDesc.usage = wgpu::TextureUsage::Sampled;
+ texDesc.format = wgpu::TextureFormat::RGBA8Unorm;
+
+ // Test that completely unused bindgroups still have their buffers checked.
+ for (bool destroy : {true, false}) {
+ wgpu::Texture unusedTexture = device.CreateTexture(&texDesc);
+ wgpu::BindGroup unusedBG =
+ utils::MakeBindGroup(device, testBGL, {{0, unusedTexture.CreateView()}});
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetBindGroup(2, unusedBG);
+ pass.EndPass();
+ wgpu::CommandBuffer commands = encoder.Finish();
+
+ if (destroy) {
+ unusedTexture.Destroy();
+ ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+ } else {
+ queue.Submit(1, &commands);
+ }
+ }
+
+ // Test that unused bindgroups because they were replaced still have their buffers checked.
+ for (bool destroy : {true, false}) {
+ wgpu::Texture unusedTexture = device.CreateTexture(&texDesc);
+ wgpu::BindGroup unusedBG =
+ utils::MakeBindGroup(device, testBGL, {{0, unusedTexture.CreateView()}});
+
+ wgpu::Texture usedTexture = device.CreateTexture(&texDesc);
+ wgpu::BindGroup usedBG =
+ utils::MakeBindGroup(device, testBGL, {{0, unusedTexture.CreateView()}});
+
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetBindGroup(0, emptyBG);
+ pass.SetBindGroup(1, emptyBG);
+ pass.SetBindGroup(2, unusedBG);
+ pass.SetBindGroup(2, usedBG);
+ pass.SetPipeline(pipeline);
+ pass.Dispatch(1);
+ pass.EndPass();
+ wgpu::CommandBuffer commands = encoder.Finish();
+
+ if (destroy) {
+ unusedTexture.Destroy();
+ ASSERT_DEVICE_ERROR(queue.Submit(1, &commands));
+ } else {
+ queue.Submit(1, &commands);
+ }
+ }
+ }
+
} // anonymous namespace
diff --git a/src/tests/unittests/validation/ResourceUsageTrackingTests.cpp b/src/tests/unittests/validation/ResourceUsageTrackingTests.cpp
index 918411a..10bec0a 100644
--- a/src/tests/unittests/validation/ResourceUsageTrackingTests.cpp
+++ b/src/tests/unittests/validation/ResourceUsageTrackingTests.cpp
@@ -61,12 +61,12 @@
return device.CreateRenderPipeline2(&pipelineDescriptor);
}
- wgpu::ComputePipeline CreateNoOpComputePipeline() {
+ wgpu::ComputePipeline CreateNoOpComputePipeline(std::vector<wgpu::BindGroupLayout> bgls) {
wgpu::ShaderModule csModule = utils::CreateShaderModule(device, R"(
[[stage(compute)]] fn main() {
})");
wgpu::ComputePipelineDescriptor pipelineDescriptor;
- pipelineDescriptor.layout = utils::MakeBasicPipelineLayout(device, nullptr);
+ pipelineDescriptor.layout = utils::MakePipelineLayout(device, std::move(bgls));
pipelineDescriptor.computeStage.module = csModule;
pipelineDescriptor.computeStage.entryPoint = "main";
return device.CreateComputePipeline(&pipelineDescriptor);
@@ -149,7 +149,7 @@
utils::MakeBindGroup(device, bgl, {{0, buffer, 0, 4}, {1, buffer, 256, 4}});
// Create a no-op compute pipeline
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// It is valid to use the buffer as both storage and readonly storage in a single
// compute pass if dispatch command is not called.
@@ -170,15 +170,14 @@
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
}
- // Test using multiple writable usages on the same buffer in a single pass/dispatch
- TEST_F(ResourceUsageTrackingTest, BufferWithMultipleWriteUsage) {
+ // Test the use of a buffer as a storage buffer multiple times in the same synchronization
+ // scope.
+ TEST_F(ResourceUsageTrackingTest, BufferUsedAsStorageMultipleTimes) {
// Create buffer and bind group
wgpu::Buffer buffer = CreateBuffer(512, wgpu::BufferUsage::Storage);
@@ -203,32 +202,16 @@
// test compute pass
{
- // Create a no-op compute pipeline
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ // It is valid to use multiple storage usages on the same buffer in a dispatch
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
- // It is valid to use the same buffer as multiple writeable usages in a single compute
- // pass if dispatch command is not called.
- {
- wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
- wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetBindGroup(0, bg);
- pass.EndPass();
- encoder.Finish();
- }
-
- // It is invalid to use the same buffer as multiple writeable usages in a single
- // dispatch
- {
- wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
- wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetPipeline(cp);
- pass.SetBindGroup(0, bg);
- pass.Dispatch(1);
- pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
- }
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetPipeline(cp);
+ pass.SetBindGroup(0, bg);
+ pass.Dispatch(1);
+ pass.EndPass();
+ encoder.Finish();
}
}
@@ -368,17 +351,19 @@
wgpu::BindGroup bg1 = utils::MakeBindGroup(device, bgl1, {{0, buffer}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp0 = CreateNoOpComputePipeline({bgl0});
+ wgpu::ComputePipeline cp1 = CreateNoOpComputePipeline({bgl1});
// It is valid to use the same buffer as both readable and writable in different
// dispatches within the same compute pass.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetPipeline(cp);
+ pass.SetPipeline(cp0);
pass.SetBindGroup(0, bg0);
pass.Dispatch(1);
+ pass.SetPipeline(cp1);
pass.SetBindGroup(0, bg1);
pass.Dispatch(1);
@@ -431,7 +416,7 @@
wgpu::BindGroup writeBG = utils::MakeBindGroup(device, writeBGL, {{0, buffer}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({readBGL, writeBGL});
// It is invalid to use the same buffer as both readable and writable usages in a single
// dispatch
@@ -444,9 +429,7 @@
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -479,11 +462,17 @@
// Use the buffer as both copy dst and readonly storage in compute pass
{
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl1});
+
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.CopyBufferToBuffer(bufferSrc, 0, bufferDst, 0, 4);
+
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetBindGroup(0, bg1);
+ pass.SetPipeline(cp);
+ pass.Dispatch(1);
pass.EndPass();
+
encoder.Finish();
}
}
@@ -601,7 +590,7 @@
// test compute pass
{
- // Create buffers that will be used as index and storage buffers
+ // Create buffers that will be used as readonly and writable storage buffers
wgpu::Buffer buffer0 = CreateBuffer(512, wgpu::BufferUsage::Storage);
wgpu::Buffer buffer1 = CreateBuffer(4, wgpu::BufferUsage::Storage);
@@ -616,11 +605,11 @@
wgpu::BindGroup readBG1 = utils::MakeBindGroup(device, readBGL, {{0, buffer1, 0, 4}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({writeBGL, readBGL});
// Set bind group against the same index twice. The second one overwrites the first one.
- // Then no buffer is used as both read and write in the same pass. But the overwritten
- // bind group still take effect.
+ // Then no buffer is used as both read and write in the same dispatch. But the
+ // overwritten bind group still take effect.
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -630,13 +619,11 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
encoder.Finish();
}
// Set bind group against the same index twice. The second one overwrites the first one.
- // Then buffer0 is used as both read and write in the same pass
+ // Then buffer0 is used as both read and write in the same dispatch
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -646,9 +633,7 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
}
@@ -686,18 +671,16 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, buffer}, {1, buffer}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
- // These two bindings are invisible in compute pass. But we still track these bindings.
+ // These two bindings are invisible in the dispatch. But we still track these bindings.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(cp);
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -727,7 +710,7 @@
// Test compute pass for bind group. The conflict of readonly storage and storage buffer
// usage resides between compute stage and fragment stage. But the fragment stage binding is
- // not visible in compute pass.
+ // not visible in the dispatch.
{
wgpu::Buffer buffer = CreateBuffer(4, wgpu::BufferUsage::Storage);
wgpu::BindGroupLayout bgl = utils::MakeBindGroupLayout(
@@ -736,10 +719,10 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, buffer}, {1, buffer}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// Buffer usage in compute stage conflicts with buffer usage in fragment stage. And
- // binding for fragment stage is not visible in compute pass. But we still track this
+ // binding for fragment stage is not visible in the dispatch. But we still track this
// invisible binding.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -747,9 +730,7 @@
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add buffer usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -803,9 +784,8 @@
ASSERT_DEVICE_ERROR(encoder.Finish());
}
- // Test compute pass for bind groups with unused bindings. The conflict of readonly storage
- // and storage usages resides in different bind groups, although some bindings may not be
- // used because its bind group layout is not designated in pipeline layout.
+ // Test that an unused bind group is not used to detect conflicts between bindings in
+ // compute passes.
{
// Create bind groups. The bindings are visible for compute pass.
wgpu::BindGroupLayout bgl0 = utils::MakeBindGroupLayout(
@@ -816,23 +796,11 @@
wgpu::BindGroup bg0 = utils::MakeBindGroup(device, bgl0, {{0, buffer}});
wgpu::BindGroup bg1 = utils::MakeBindGroup(device, bgl1, {{0, buffer}});
- // Create a passthrough compute pipeline with a readonly buffer
- wgpu::ShaderModule csModule = utils::CreateShaderModule(device, R"(
- [[block]] struct RBuffer {
- value : f32;
- };
- [[group(0), binding(0)]] var<storage> rBuffer : [[access(read)]] RBuffer;
- [[stage(compute)]] fn main() {
- })");
- wgpu::ComputePipelineDescriptor pipelineDescriptor;
- pipelineDescriptor.layout = utils::MakeBasicPipelineLayout(device, &bgl0);
- pipelineDescriptor.computeStage.module = csModule;
- pipelineDescriptor.computeStage.entryPoint = "main";
- wgpu::ComputePipeline cp = device.CreateComputePipeline(&pipelineDescriptor);
+ // Create a compute pipeline with only one of the two BGLs.
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl0});
// Resource in bg1 conflicts with resources used in bg0. However, the binding in bg1 is
- // not used in pipeline. But we still track this binding and read/write usage in one
- // dispatch is not allowed.
+ // not used in pipeline so no error is produced in the dispatch.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetBindGroup(0, bg0);
@@ -840,8 +808,6 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add resource tracking per dispatch for compute pass
- // ASSERT_DEVICE_ERROR(encoder.Finish());
encoder.Finish();
}
}
@@ -875,9 +841,13 @@
// Test compute pass
{
// Use the texture as both sampled and readonly storage in the same compute pass
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
+
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetBindGroup(0, bg);
+ pass.SetPipeline(cp);
+ pass.Dispatch(1);
pass.EndPass();
encoder.Finish();
}
@@ -924,7 +894,7 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, view}, {1, view}});
// Create a no-op compute pipeline
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// It is valid to use the texture as both sampled and writeonly storage in a single
// compute pass if dispatch command is not called.
@@ -945,9 +915,7 @@
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
}
@@ -1008,31 +976,17 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, view}, {1, view}});
// Create a no-op compute pipeline
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// It is valid to use the texture as multiple writeonly storage usages in a single
- // compute pass if dispatch command is not called.
- {
- wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
- wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetBindGroup(0, bg);
- pass.EndPass();
- encoder.Finish();
- }
-
- // It is invalid to use the texture as multiple writeonly storage usages in a single
// dispatch
- {
- wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
- wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetPipeline(cp);
- pass.SetBindGroup(0, bg);
- pass.Dispatch(1);
- pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
- }
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetPipeline(cp);
+ pass.SetBindGroup(0, bg);
+ pass.Dispatch(1);
+ pass.EndPass();
+ encoder.Finish();
}
}
@@ -1189,17 +1143,19 @@
wgpu::BindGroup writeBG = utils::MakeBindGroup(device, writeBGL, {{0, view}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline readCp = CreateNoOpComputePipeline({readBGL});
+ wgpu::ComputePipeline writeCp = CreateNoOpComputePipeline({writeBGL});
// It is valid to use the same texture as both readable and writable in different
// dispatches within the same compute pass.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetPipeline(cp);
+ pass.SetPipeline(readCp);
pass.SetBindGroup(0, readBG);
pass.Dispatch(1);
+ pass.SetPipeline(writeCp);
pass.SetBindGroup(0, writeBG);
pass.Dispatch(1);
@@ -1258,7 +1214,7 @@
wgpu::BindGroup writeBG = utils::MakeBindGroup(device, writeBGL, {{0, view}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({readBGL, writeBGL});
// It is invalid to use the same texture as both readable and writable usages in a
// single dispatch
@@ -1271,9 +1227,7 @@
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -1309,10 +1263,14 @@
device, {{0, wgpu::ShaderStage::Compute, wgpu::TextureSampleType::Float}});
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, view1}});
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
+
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.CopyTextureToTexture(&srcView, &dstView, ©Size);
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetBindGroup(0, bg);
+ pass.SetPipeline(cp);
+ pass.Dispatch(1);
pass.EndPass();
encoder.Finish();
}
@@ -1386,11 +1344,11 @@
wgpu::BindGroup readBG1 = utils::MakeBindGroup(device, readBGL, {{0, view1}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({writeBGL, readBGL});
// Set bind group on the same index twice. The second one overwrites the first one.
- // No texture is used as both readonly and writeonly storage in the same pass. But the
- // overwritten texture still take effect during resource tracking.
+ // No texture is used as both readonly and writeonly storage in the same dispatch so
+ // there are no errors.
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -1400,13 +1358,12 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
encoder.Finish();
}
// Set bind group on the same index twice. The second one overwrites the first one.
- // texture0 is used as both writeonly and readonly storage in the same pass.
+ // texture0 is used as both writeonly and readonly storage in the same dispatch, which
+ // is an error.
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -1416,9 +1373,7 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
}
@@ -1460,7 +1415,7 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, view}, {1, view}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// These two bindings are invisible in compute pass. But we still track these bindings.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
@@ -1469,9 +1424,7 @@
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -1514,7 +1467,7 @@
wgpu::BindGroup bg = utils::MakeBindGroup(device, bgl, {{0, view}, {1, view}});
// Create a no-op compute pipeline.
- wgpu::ComputePipeline cp = CreateNoOpComputePipeline();
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({bgl});
// Texture usage in compute stage conflicts with texture usage in fragment stage. And
// binding for fragment stage is not visible in compute pass. But we still track this
@@ -1525,9 +1478,7 @@
pass.SetBindGroup(0, bg);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add texture usage tracking for compute
- // ASSERT_DEVICE_ERROR(encoder.Finish());
- encoder.Finish();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
}
}
@@ -1581,19 +1532,10 @@
// Test compute pass
{
- // Create a passthrough compute pipeline with a readonly storage texture
- wgpu::ShaderModule csModule = utils::CreateShaderModule(device, R"(
- [[group(0), binding(0)]] var tex : [[access(read)]] texture_storage_2d<rgba8unorm>;
- [[stage(compute)]] fn main() {
- })");
- wgpu::ComputePipelineDescriptor pipelineDescriptor;
- pipelineDescriptor.layout = utils::MakeBasicPipelineLayout(device, &readBGL);
- pipelineDescriptor.computeStage.module = csModule;
- pipelineDescriptor.computeStage.entryPoint = "main";
- wgpu::ComputePipeline cp = device.CreateComputePipeline(&pipelineDescriptor);
+ wgpu::ComputePipeline cp = CreateNoOpComputePipeline({readBGL});
// Texture binding in readBG conflicts with texture binding in writeBG. The binding
- // in writeBG is not used in pipeline. But we still track this binding.
+ // in writeBG is not used in pipeline's layout so it isn't an error.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetBindGroup(0, readBG);
@@ -1601,19 +1543,81 @@
pass.SetPipeline(cp);
pass.Dispatch(1);
pass.EndPass();
- // TODO (yunchao.he@intel.com): add resource tracking per dispatch for compute pass
- // ASSERT_DEVICE_ERROR(encoder.Finish());
encoder.Finish();
}
}
+ // Test that using an indirect buffer is disallowed with a writable usage (like storage) but
+ // allowed with a readable usage (like readonly storage).
+ TEST_F(ResourceUsageTrackingTest, IndirectBufferWithReadOrWriteStorage) {
+ wgpu::Buffer buffer =
+ CreateBuffer(20, wgpu::BufferUsage::Indirect | wgpu::BufferUsage::Storage);
+
+ wgpu::BindGroupLayout readBGL = utils::MakeBindGroupLayout(
+ device, {{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::ReadOnlyStorage}});
+ wgpu::BindGroupLayout writeBGL = utils::MakeBindGroupLayout(
+ device, {{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage}});
+
+ wgpu::BindGroup readBG = utils::MakeBindGroup(device, readBGL, {{0, buffer}});
+ wgpu::BindGroup writeBG = utils::MakeBindGroup(device, writeBGL, {{0, buffer}});
+
+ // Test pipelines
+ wgpu::RenderPipeline rp = CreateNoOpRenderPipeline();
+ wgpu::ComputePipeline readCp = CreateNoOpComputePipeline({readBGL});
+ wgpu::ComputePipeline writeCp = CreateNoOpComputePipeline({writeBGL});
+
+ // Test that indirect + readonly is allowed in the same render pass.
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ DummyRenderPass dummyRenderPass(device);
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&dummyRenderPass);
+ pass.SetPipeline(rp);
+ pass.SetBindGroup(0, readBG);
+ pass.DrawIndirect(buffer, 0);
+ pass.EndPass();
+ encoder.Finish();
+ }
+
+ // Test that indirect + writable is disallowed in the same render pass.
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ DummyRenderPass dummyRenderPass(device);
+ wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&dummyRenderPass);
+ pass.SetPipeline(rp);
+ pass.SetBindGroup(0, writeBG);
+ pass.DrawIndirect(buffer, 0);
+ pass.EndPass();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
+ }
+
+ // Test that indirect + readonly is allowed in the same dispatch
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetPipeline(readCp);
+ pass.SetBindGroup(0, readBG);
+ pass.DispatchIndirect(buffer, 0);
+ pass.EndPass();
+ encoder.Finish();
+ }
+
+ // Test that indirect + writable is disallowed in the same dispatch
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetPipeline(writeCp);
+ pass.SetBindGroup(0, writeBG);
+ pass.DispatchIndirect(buffer, 0);
+ pass.EndPass();
+ ASSERT_DEVICE_ERROR(encoder.Finish());
+ }
+ }
+
// TODO (yunchao.he@intel.com):
//
// * Add tests for multiple encoders upon the same resource simultaneously. This situation fits
// some cases like VR, multi-threading, etc.
//
- // * Add tests for indirect buffer
- //
// * Add tests for bundle
} // anonymous namespace
diff --git a/src/utils/WGPUHelpers.cpp b/src/utils/WGPUHelpers.cpp
index c120966..c560c39 100644
--- a/src/utils/WGPUHelpers.cpp
+++ b/src/utils/WGPUHelpers.cpp
@@ -226,6 +226,14 @@
return device.CreatePipelineLayout(&descriptor);
}
+ wgpu::PipelineLayout MakePipelineLayout(const wgpu::Device& device,
+ std::vector<wgpu::BindGroupLayout> bgls) {
+ wgpu::PipelineLayoutDescriptor descriptor;
+ descriptor.bindGroupLayoutCount = uint32_t(bgls.size());
+ descriptor.bindGroupLayouts = bgls.data();
+ return device.CreatePipelineLayout(&descriptor);
+ }
+
wgpu::BindGroupLayout MakeBindGroupLayout(
const wgpu::Device& device,
std::initializer_list<BindingLayoutEntryInitializationHelper> entriesInitializer) {
diff --git a/src/utils/WGPUHelpers.h b/src/utils/WGPUHelpers.h
index 54ebc3f..26c01fe 100644
--- a/src/utils/WGPUHelpers.h
+++ b/src/utils/WGPUHelpers.h
@@ -94,6 +94,9 @@
wgpu::PipelineLayout MakeBasicPipelineLayout(const wgpu::Device& device,
const wgpu::BindGroupLayout* bindGroupLayout);
+ wgpu::PipelineLayout MakePipelineLayout(const wgpu::Device& device,
+ std::vector<wgpu::BindGroupLayout> bgls);
+
// Helpers to make creating bind group layouts look nicer:
//
// utils::MakeBindGroupLayout(device, {