optimization: BlockAllocator: Actually allocate in blocks
Instead of hitting the heap for each and every call to Create()
Significantly improves performance for heavy loads. Slight performance
loss for lighter loads.
A: base.bench
B: new.bench
Test name | Δ (A → B) | % (A → B)
--------------------------------------+--------------+-----------
GenerateSPIRV/"simple_fragment.wgsl" | 27.021µs | +6.4%
GenerateMSL/"simple_compute.wgsl" | 35.592µs | +6.1%
GenerateMSL/"simple_vertex.wgsl" | 37.64µs | +5.5%
GenerateHLSL/"simple_fragment.wgsl" | 42.145µs | +5.2%
GenerateGLSL/"simple_fragment.wgsl" | 31.506µs | +4.9%
GenerateHLSL/"simple_vertex.wgsl" | 38.843µs | +4.7%
GenerateMSL/"simple_fragment.wgsl" | 29.977µs | +4.5%
GenerateSPIRV/"simple_vertex.wgsl" | 19.882µs | +4.2%
GenerateGLSL/"simple_vertex.wgsl" | 24.702µs | +3.7%
GenerateSPIRV/"simple_compute.wgsl" | 17.652µs | +3.2%
GenerateHLSL/"simple_compute.wgsl" | 26.826µs | +2.7%
GenerateGLSL/"simple_compute.wgsl" | 11.952µs | +1.8%
ParseWGSL/"particles.wgsl" | -104.83µs | -4.2%
GenerateMSL/"particles.wgsl" | -1.079243ms | -9.4%
GenerateSPIRV/"particles.wgsl" | -1.012483ms | -9.4%
GenerateGLSL/"particles.wgsl" | -3.522106ms | -9.5%
GenerateHLSL/"particles.wgsl" | -1.849666ms | -10.6%
Issue: tint:1383
Change-Id: Ib691328538c597c06a75dfba392c99d2afbd5442
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/76961
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/fuzzers/tint_ast_clone_fuzzer.cc b/fuzzers/tint_ast_clone_fuzzer.cc
index bdaf643..85d7a17 100644
--- a/fuzzers/tint_ast_clone_fuzzer.cc
+++ b/fuzzers/tint_ast_clone_fuzzer.cc
@@ -73,11 +73,11 @@
ASSERT_EQ(tint::Program::printer(&src), tint::Program::printer(&dst));
// Check that none of the AST nodes or type pointers in dst are found in src
- std::unordered_set<tint::ast::Node*> src_nodes;
+ std::unordered_set<const tint::ast::Node*> src_nodes;
for (auto* src_node : src.ASTNodes().Objects()) {
src_nodes.emplace(src_node);
}
- std::unordered_set<tint::sem::Type*> src_types;
+ std::unordered_set<const tint::sem::Type*> src_types;
for (auto* src_type : src.Types()) {
src_types.emplace(src_type);
}
diff --git a/src/ast/module_clone_test.cc b/src/ast/module_clone_test.cc
index 29e9891..70b9116 100644
--- a/src/ast/module_clone_test.cc
+++ b/src/ast/module_clone_test.cc
@@ -132,11 +132,11 @@
EXPECT_EQ(Program::printer(&src), Program::printer(&dst));
// Check that none of the AST nodes or type pointers in dst are found in src
- std::unordered_set<ast::Node*> src_nodes;
+ std::unordered_set<const ast::Node*> src_nodes;
for (auto* src_node : src.ASTNodes().Objects()) {
src_nodes.emplace(src_node);
}
- std::unordered_set<sem::Type*> src_types;
+ std::unordered_set<const sem::Type*> src_types;
for (auto* src_type : src.Types()) {
src_types.emplace(src_type);
}
diff --git a/src/block_allocator.h b/src/block_allocator.h
index 44de75c..4a8fbf2 100644
--- a/src/block_allocator.h
+++ b/src/block_allocator.h
@@ -15,130 +15,155 @@
#ifndef SRC_BLOCK_ALLOCATOR_H_
#define SRC_BLOCK_ALLOCATOR_H_
-#include <memory>
+#include <array>
#include <utility>
-#include <vector>
+
+#include "src/utils/math.h"
namespace tint {
-/// A container and allocator of objects of (or deriving from) the template type
-/// `T`.
-/// Objects are allocated by calling Create(), and are owned by the
+/// A container and allocator of objects of (or deriving from) the template
+/// type `T`. Objects are allocated by calling Create(), and are owned by the
/// BlockAllocator. When the BlockAllocator is destructed, all constructed
/// objects are automatically destructed and freed.
///
-/// Objects held by the BlockAllocator can be iterated over using a
-/// View or ConstView.
-template <typename T>
+/// Objects held by the BlockAllocator can be iterated over using a View.
+template <typename T,
+ size_t BLOCK_SIZE = 64 * 1024,
+ size_t BLOCK_ALIGNMENT = 16>
class BlockAllocator {
- using InternalVector = std::vector<std::unique_ptr<T>>;
- using InternalIterator = typename InternalVector::const_iterator;
+ /// Pointers is a chunk of T* pointers, forming a linked list.
+ /// The list of Pointers are used to maintain the list of allocated objects.
+ /// Pointers are allocated out of the block memory.
+ struct Pointers {
+ static constexpr size_t kMax = 32;
+ std::array<T*, kMax> ptrs;
+ Pointers* next;
+ };
- public:
- class View;
- class ConstView;
+ /// Block is linked list of memory blocks.
+ /// Blocks are allocated out of heap memory.
+ ///
+ /// Note: We're not using std::aligned_storage here as this warns / errors
+ /// on MSVC.
+ struct alignas(BLOCK_ALIGNMENT) Block {
+ uint8_t data[BLOCK_SIZE];
+ Block* next;
+ };
- /// Constructor
- BlockAllocator() = default;
- /// Move constructor
- BlockAllocator(BlockAllocator&&) = default;
- /// Move assignment operator
- /// @return this BlockAllocator
- BlockAllocator& operator=(BlockAllocator&&) = default;
+ // Forward declaration
+ template <bool IS_CONST>
+ class TView;
/// An iterator for the objects owned by the BlockAllocator.
- class Iterator {
+ template <bool IS_CONST>
+ class TIterator {
+ using PointerTy = std::conditional_t<IS_CONST, const T*, T*>;
+
public:
/// Equality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is equal to other
- bool operator==(const Iterator& other) const { return it_ == other.it_; }
+ bool operator==(const TIterator& other) const {
+ return ptrs == other.ptrs && idx == other.idx;
+ }
+
/// Inequality operator
/// @param other the iterator to compare this iterator to
/// @returns true if this iterator is not equal to other
- bool operator!=(const Iterator& other) const { return it_ != other.it_; }
+ bool operator!=(const TIterator& other) const { return !(*this == other); }
+
/// Advances the iterator
/// @returns this iterator
- Iterator& operator++() {
- ++it_;
+ TIterator& operator++() {
+ if (ptrs != nullptr) {
+ ++idx;
+ if (idx == Pointers::kMax) {
+ idx = 0;
+ ptrs = ptrs->next;
+ }
+ }
return *this;
}
+
/// @returns the pointer to the object at the current iterator position
- T* operator*() const { return it_->get(); }
+ PointerTy operator*() const { return ptrs ? ptrs->ptrs[idx] : nullptr; }
private:
- friend View; // Keep internal iterator impl private.
- explicit Iterator(InternalIterator it) : it_(it) {}
- InternalIterator it_;
+ friend TView<IS_CONST>; // Keep internal iterator impl private.
+ explicit TIterator(const Pointers* p, size_t i) : ptrs(p), idx(i) {}
+
+ const Pointers* ptrs;
+ size_t idx;
};
- /// A const iterator for the objects owned by the BlockAllocator.
- class ConstIterator {
- public:
- /// Equality operator
- /// @param other the iterator to compare this iterator to
- /// @returns true if this iterator is equal to other
- bool operator==(const ConstIterator& other) const {
- return it_ == other.it_;
- }
- /// Inequality operator
- /// @param other the iterator to compare this iterator to
- /// @returns true if this iterator is not equal to other
- bool operator!=(const ConstIterator& other) const {
- return it_ != other.it_;
- }
- /// Advances the iterator
- /// @returns this iterator
- ConstIterator& operator++() {
- ++it_;
- return *this;
- }
- /// @returns the pointer to the object at the current iterator position
- T* operator*() const { return it_->get(); }
-
- private:
- friend ConstView; // Keep internal iterator impl private.
- explicit ConstIterator(InternalIterator it) : it_(it) {}
- InternalIterator it_;
- };
-
- /// View provides begin() and end() methods for looping over the objects owned
- /// by the BlockAllocator.
- class View {
+ /// View provides begin() and end() methods for looping over the objects
+ /// owned by the BlockAllocator.
+ template <bool IS_CONST>
+ class TView {
public:
/// @returns an iterator to the beginning of the view
- Iterator begin() const { return Iterator(allocator_->objects_.begin()); }
+ TIterator<IS_CONST> begin() const {
+ return TIterator<IS_CONST>{allocator_->pointers_.root, 0};
+ }
+
/// @returns an iterator to the end of the view
- Iterator end() const { return Iterator(allocator_->objects_.end()); }
+ TIterator<IS_CONST> end() const {
+ return allocator_->pointers_.current_index >= Pointers::kMax
+ ? TIterator<IS_CONST>(nullptr, 0)
+ : TIterator<IS_CONST>(allocator_->pointers_.current,
+ allocator_->pointers_.current_index);
+ }
private:
friend BlockAllocator; // For BlockAllocator::operator View()
- explicit View(BlockAllocator const* allocator) : allocator_(allocator) {}
+ explicit TView(BlockAllocator const* allocator) : allocator_(allocator) {}
BlockAllocator const* const allocator_;
};
+ public:
+ /// An iterator type over the objects of the BlockAllocator
+ using Iterator = TIterator<false>;
+
+ /// An immutable iterator type over the objects of the BlockAllocator
+ using ConstIterator = TIterator<true>;
+
+ /// View provides begin() and end() methods for looping over the objects
+ /// owned by the BlockAllocator.
+ using View = TView<false>;
+
/// ConstView provides begin() and end() methods for looping over the objects
/// owned by the BlockAllocator.
- class ConstView {
- public:
- /// @returns an iterator to the beginning of the view
- ConstIterator begin() const {
- return ConstIterator(allocator_->objects_.begin());
- }
- /// @returns an iterator to the end of the view
- ConstIterator end() const {
- return ConstIterator(allocator_->objects_.end());
- }
+ using ConstView = TView<true>;
- private:
- friend BlockAllocator; // For BlockAllocator::operator ConstView()
- explicit ConstView(BlockAllocator const* allocator)
- : allocator_(allocator) {}
- BlockAllocator const* const allocator_;
- };
+ /// Constructor
+ BlockAllocator() = default;
+
+ /// Move constructor
+ /// @param rhs the BlockAllocator to move
+ BlockAllocator(BlockAllocator&& rhs) {
+ std::swap(block_, rhs.block_);
+ std::swap(pointers_, rhs.pointers_);
+ }
+
+ /// Move assignment operator
+ /// @param rhs the BlockAllocator to move
+ /// @return this BlockAllocator
+ BlockAllocator& operator=(BlockAllocator&& rhs) {
+ if (this != &rhs) {
+ Reset();
+ std::swap(block_, rhs.block_);
+ std::swap(pointers_, rhs.pointers_);
+ }
+ return *this;
+ }
+
+ /// Destructor
+ ~BlockAllocator() { Reset(); }
/// @return a View of all objects owned by this BlockAllocator
View Objects() { return View(this); }
+
/// @return a ConstView of all objects owned by this BlockAllocator
ConstView Objects() const { return ConstView(this); }
@@ -152,17 +177,116 @@
static_assert(
std::is_same<T, TYPE>::value || std::is_base_of<T, TYPE>::value,
"TYPE does not derive from T");
- auto uptr = std::make_unique<TYPE>(std::forward<ARGS>(args)...);
- auto* ptr = uptr.get();
- objects_.emplace_back(std::move(uptr));
+ static_assert(
+ std::is_same<T, TYPE>::value || std::has_virtual_destructor<T>::value,
+ "TYPE requires a virtual destructor when calling Create() for a type "
+ "that is not T");
+
+ auto* ptr = Allocate<TYPE>();
+ new (ptr) TYPE(std::forward<ARGS>(args)...);
+ AddObjectPointer(ptr);
+
return ptr;
}
+ /// Frees all allocations from the allocator.
+ void Reset() {
+ for (auto ptr : Objects()) {
+ ptr->~T();
+ }
+ auto* block = block_.root;
+ while (block != nullptr) {
+ auto* next = block->next;
+ delete block;
+ block = next;
+ }
+ block_ = {};
+ pointers_ = {};
+ }
+
private:
BlockAllocator(const BlockAllocator&) = delete;
BlockAllocator& operator=(const BlockAllocator&) = delete;
- std::vector<std::unique_ptr<T>> objects_;
+ /// Allocates an instance of TYPE from the current block, or from a newly
+ /// allocated block if the current block is full.
+ template <typename TYPE>
+ TYPE* Allocate() {
+ static_assert(sizeof(TYPE) <= BLOCK_SIZE,
+ "Cannot construct TYPE with size greater than BLOCK_SIZE");
+ static_assert(alignof(TYPE) <= BLOCK_ALIGNMENT,
+ "alignof(TYPE) is greater than ALIGNMENT");
+
+ block_.current_offset =
+ utils::RoundUp(alignof(TYPE), block_.current_offset);
+ if (block_.current_offset + sizeof(TYPE) > BLOCK_SIZE) {
+ // Allocate a new block from the heap
+ auto* prev_block = block_.current;
+ block_.current = new Block;
+ if (!block_.current) {
+ return nullptr; // out of memory
+ }
+ block_.current->next = nullptr;
+ block_.current_offset = 0;
+ if (prev_block) {
+ prev_block->next = block_.current;
+ } else {
+ block_.root = block_.current;
+ }
+ }
+
+ auto* base = &block_.current->data[0];
+ auto* ptr = reinterpret_cast<TYPE*>(base + block_.current_offset);
+ block_.current_offset += sizeof(TYPE);
+ return ptr;
+ }
+
+ /// Adds `ptr` to the linked list of objects owned by this BlockAllocator.
+ /// Once added, `ptr` will be tracked for destruction when the BlockAllocator
+ /// is destructed.
+ void AddObjectPointer(T* ptr) {
+ if (pointers_.current_index >= Pointers::kMax) {
+ auto* prev_pointers = pointers_.current;
+ pointers_.current = Allocate<Pointers>();
+ if (!pointers_.current) {
+ return; // out of memory
+ }
+ pointers_.current->next = nullptr;
+ pointers_.current_index = 0;
+
+ if (prev_pointers) {
+ prev_pointers->next = pointers_.current;
+ } else {
+ pointers_.root = pointers_.current;
+ }
+ }
+
+ pointers_.current->ptrs[pointers_.current_index++] = ptr;
+ }
+
+ struct {
+ /// The root block of the block linked list
+ Block* root = nullptr;
+ /// The current (end) block of the blocked linked list.
+ /// New allocations come from this block
+ Block* current = nullptr;
+ /// The byte offset in #current for the next allocation.
+ /// Initialized with BLOCK_SIZE so that the first allocation triggers a
+ /// block allocation.
+ size_t current_offset = BLOCK_SIZE;
+ } block_;
+
+ struct {
+ /// The root Pointers structure of the pointers linked list
+ Pointers* root = nullptr;
+ /// The current (end) Pointers structure of the pointers linked list.
+ /// AddObjectPointer() adds to this structure.
+ Pointers* current = nullptr;
+ /// The array index in #current for the next append.
+ /// Initialized with Pointers::kMax so that the first append triggers a
+ /// allocation of the Pointers structure.
+ size_t current_index = Pointers::kMax;
+ } pointers_;
};
} // namespace tint
diff --git a/src/block_allocator_test.cc b/src/block_allocator_test.cc
index e468907..69834da 100644
--- a/src/block_allocator_test.cc
+++ b/src/block_allocator_test.cc
@@ -39,7 +39,7 @@
FAIL() << "BlockAllocator should be empty";
}
}
- for (int* i : static_cast<const Allocator&>(allocator).Objects()) {
+ for (const int* i : static_cast<const Allocator&>(allocator).Objects()) {
(void)i;
if ((true)) { // Workaround for "error: loop will run at most once"
FAIL() << "BlockAllocator should be empty";
@@ -67,48 +67,53 @@
TEST_F(BlockAllocatorTest, MoveConstruct) {
using Allocator = BlockAllocator<LifetimeCounter>;
- size_t count = 0;
+ for (size_t n :
+ {0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
+ size_t count = 0;
+ {
+ Allocator allocator_a;
+ for (size_t i = 0; i < n; i++) {
+ allocator_a.Create(&count);
+ }
+ EXPECT_EQ(count, n);
- {
- Allocator allocator_a;
- for (int i = 0; i < 10; i++) {
- allocator_a.Create(&count);
+ Allocator allocator_b{std::move(allocator_a)};
+ EXPECT_EQ(count, n);
}
- EXPECT_EQ(count, 10u);
- Allocator allocator_b{std::move(allocator_a)};
- EXPECT_EQ(count, 10u);
+ EXPECT_EQ(count, 0u);
}
-
- EXPECT_EQ(count, 0u);
}
TEST_F(BlockAllocatorTest, MoveAssign) {
using Allocator = BlockAllocator<LifetimeCounter>;
- size_t count_a = 0;
- size_t count_b = 0;
+ for (size_t n :
+ {0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
+ size_t count_a = 0;
+ size_t count_b = 0;
- {
- Allocator allocator_a;
- for (int i = 0; i < 10; i++) {
- allocator_a.Create(&count_a);
+ {
+ Allocator allocator_a;
+ for (size_t i = 0; i < n; i++) {
+ allocator_a.Create(&count_a);
+ }
+ EXPECT_EQ(count_a, n);
+
+ Allocator allocator_b;
+ for (size_t i = 0; i < n; i++) {
+ allocator_b.Create(&count_b);
+ }
+ EXPECT_EQ(count_b, n);
+
+ allocator_b = std::move(allocator_a);
+ EXPECT_EQ(count_a, n);
+ EXPECT_EQ(count_b, 0u);
}
- EXPECT_EQ(count_a, 10u);
- Allocator allocator_b;
- for (int i = 0; i < 10; i++) {
- allocator_b.Create(&count_b);
- }
- EXPECT_EQ(count_b, 10u);
-
- allocator_b = std::move(allocator_a);
- EXPECT_EQ(count_a, 10u);
+ EXPECT_EQ(count_a, 0u);
EXPECT_EQ(count_b, 0u);
}
-
- EXPECT_EQ(count_a, 0u);
- EXPECT_EQ(count_b, 0u);
}
TEST_F(BlockAllocatorTest, ObjectOrder) {
@@ -130,7 +135,7 @@
}
{
int i = 0;
- for (int* p : static_cast<const Allocator&>(allocator).Objects()) {
+ for (const int* p : static_cast<const Allocator&>(allocator).Objects()) {
EXPECT_EQ(*p, i);
i++;
}
diff --git a/src/transform/decompose_memory_access.cc b/src/transform/decompose_memory_access.cc
index af20c6d..fecb76f 100644
--- a/src/transform/decompose_memory_access.cc
+++ b/src/transform/decompose_memory_access.cc
@@ -286,8 +286,8 @@
/// Store describes a single storage or uniform buffer write
struct Store {
- ast::AssignmentStatement* assignment; // The AST assignment statement
- BufferAccess target; // The target for the write
+ const ast::AssignmentStatement* assignment; // The AST assignment statement
+ BufferAccess target; // The target for the write
};
} // namespace
diff --git a/src/transform/multiplanar_external_texture.cc b/src/transform/multiplanar_external_texture.cc
index 387604b..74ef782 100644
--- a/src/transform/multiplanar_external_texture.cc
+++ b/src/transform/multiplanar_external_texture.cc
@@ -246,14 +246,14 @@
/// Constructs a StatementList containing all the statements making up the
/// bodies of the textureSampleExternal and textureLoadExternal functions.
- /// @param callType determines which function body to generate
+ /// @param call_type determines which function body to generate
/// @returns a statement list that makes of the body of the chosen function
- ast::StatementList createTexFnExtStatementList(sem::IntrinsicType callType) {
+ ast::StatementList createTexFnExtStatementList(sem::IntrinsicType call_type) {
using f32 = ProgramBuilder::f32;
- const ast::CallExpression* single_plane_call;
- const ast::CallExpression* plane_0_call;
- const ast::CallExpression* plane_1_call;
- if (callType == sem::IntrinsicType::kTextureSampleLevel) {
+ const ast::CallExpression* single_plane_call = nullptr;
+ const ast::CallExpression* plane_0_call = nullptr;
+ const ast::CallExpression* plane_1_call = nullptr;
+ if (call_type == sem::IntrinsicType::kTextureSampleLevel) {
// textureSampleLevel(plane0, smp, coord.xy, 0.0);
single_plane_call =
b.Call("textureSampleLevel", "plane0", "smp", "coord", 0.0f);
@@ -263,13 +263,16 @@
// textureSampleLevel(plane1, smp, coord.xy, 0.0);
plane_1_call =
b.Call("textureSampleLevel", "plane1", "smp", "coord", 0.0f);
- } else if (callType == sem::IntrinsicType::kTextureLoad) {
+ } else if (call_type == sem::IntrinsicType::kTextureLoad) {
// textureLoad(plane0, coords.xy, 0);
single_plane_call = b.Call("textureLoad", "plane0", "coord", 0);
// textureLoad(plane0, coords.xy, 0);
plane_0_call = b.Call("textureLoad", "plane0", "coord", 0);
// textureLoad(plane1, coords.xy, 0);
plane_1_call = b.Call("textureLoad", "plane1", "coord", 0);
+ } else {
+ TINT_ICE(Transform, b.Diagnostics())
+ << "unhandled intrinsic: " << call_type;
}
return {