optimization: BlockAllocator: Actually allocate in blocks

Instead of hitting the heap for each and every call to Create()

Significantly improves performance for heavy loads. Slight performance
loss for lighter loads.

A: base.bench
B: new.bench

Test name                             | Δ (A → B)    | % (A → B)
--------------------------------------+--------------+-----------
GenerateSPIRV/"simple_fragment.wgsl"  | 27.021µs     | +6.4%
GenerateMSL/"simple_compute.wgsl"     | 35.592µs     | +6.1%
GenerateMSL/"simple_vertex.wgsl"      | 37.64µs      | +5.5%
GenerateHLSL/"simple_fragment.wgsl"   | 42.145µs     | +5.2%
GenerateGLSL/"simple_fragment.wgsl"   | 31.506µs     | +4.9%
GenerateHLSL/"simple_vertex.wgsl"     | 38.843µs     | +4.7%
GenerateMSL/"simple_fragment.wgsl"    | 29.977µs     | +4.5%
GenerateSPIRV/"simple_vertex.wgsl"    | 19.882µs     | +4.2%
GenerateGLSL/"simple_vertex.wgsl"     | 24.702µs     | +3.7%
GenerateSPIRV/"simple_compute.wgsl"   | 17.652µs     | +3.2%
GenerateHLSL/"simple_compute.wgsl"    | 26.826µs     | +2.7%
GenerateGLSL/"simple_compute.wgsl"    | 11.952µs     | +1.8%
ParseWGSL/"particles.wgsl"            | -104.83µs    | -4.2%
GenerateMSL/"particles.wgsl"          | -1.079243ms  | -9.4%
GenerateSPIRV/"particles.wgsl"        | -1.012483ms  | -9.4%
GenerateGLSL/"particles.wgsl"         | -3.522106ms  | -9.5%
GenerateHLSL/"particles.wgsl"         | -1.849666ms  | -10.6%

Issue: tint:1383
Change-Id: Ib691328538c597c06a75dfba392c99d2afbd5442
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/76961
Reviewed-by: Antonio Maiorano <amaiorano@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Ben Clayton <bclayton@google.com>
diff --git a/fuzzers/tint_ast_clone_fuzzer.cc b/fuzzers/tint_ast_clone_fuzzer.cc
index bdaf643..85d7a17 100644
--- a/fuzzers/tint_ast_clone_fuzzer.cc
+++ b/fuzzers/tint_ast_clone_fuzzer.cc
@@ -73,11 +73,11 @@
   ASSERT_EQ(tint::Program::printer(&src), tint::Program::printer(&dst));
 
   // Check that none of the AST nodes or type pointers in dst are found in src
-  std::unordered_set<tint::ast::Node*> src_nodes;
+  std::unordered_set<const tint::ast::Node*> src_nodes;
   for (auto* src_node : src.ASTNodes().Objects()) {
     src_nodes.emplace(src_node);
   }
-  std::unordered_set<tint::sem::Type*> src_types;
+  std::unordered_set<const tint::sem::Type*> src_types;
   for (auto* src_type : src.Types()) {
     src_types.emplace(src_type);
   }
diff --git a/src/ast/module_clone_test.cc b/src/ast/module_clone_test.cc
index 29e9891..70b9116 100644
--- a/src/ast/module_clone_test.cc
+++ b/src/ast/module_clone_test.cc
@@ -132,11 +132,11 @@
   EXPECT_EQ(Program::printer(&src), Program::printer(&dst));
 
   // Check that none of the AST nodes or type pointers in dst are found in src
-  std::unordered_set<ast::Node*> src_nodes;
+  std::unordered_set<const ast::Node*> src_nodes;
   for (auto* src_node : src.ASTNodes().Objects()) {
     src_nodes.emplace(src_node);
   }
-  std::unordered_set<sem::Type*> src_types;
+  std::unordered_set<const sem::Type*> src_types;
   for (auto* src_type : src.Types()) {
     src_types.emplace(src_type);
   }
diff --git a/src/block_allocator.h b/src/block_allocator.h
index 44de75c..4a8fbf2 100644
--- a/src/block_allocator.h
+++ b/src/block_allocator.h
@@ -15,130 +15,155 @@
 #ifndef SRC_BLOCK_ALLOCATOR_H_
 #define SRC_BLOCK_ALLOCATOR_H_
 
-#include <memory>
+#include <array>
 #include <utility>
-#include <vector>
+
+#include "src/utils/math.h"
 
 namespace tint {
 
-/// A container and allocator of objects of (or deriving from) the template type
-/// `T`.
-/// Objects are allocated by calling Create(), and are owned by the
+/// A container and allocator of objects of (or deriving from) the template
+/// type `T`. Objects are allocated by calling Create(), and are owned by the
 /// BlockAllocator. When the BlockAllocator is destructed, all constructed
 /// objects are automatically destructed and freed.
 ///
-/// Objects held by the BlockAllocator can be iterated over using a
-/// View or ConstView.
-template <typename T>
+/// Objects held by the BlockAllocator can be iterated over using a View.
+template <typename T,
+          size_t BLOCK_SIZE = 64 * 1024,
+          size_t BLOCK_ALIGNMENT = 16>
 class BlockAllocator {
-  using InternalVector = std::vector<std::unique_ptr<T>>;
-  using InternalIterator = typename InternalVector::const_iterator;
+  /// Pointers is a chunk of T* pointers, forming a linked list.
+  /// The list of Pointers are used to maintain the list of allocated objects.
+  /// Pointers are allocated out of the block memory.
+  struct Pointers {
+    static constexpr size_t kMax = 32;
+    std::array<T*, kMax> ptrs;
+    Pointers* next;
+  };
 
- public:
-  class View;
-  class ConstView;
+  /// Block is linked list of memory blocks.
+  /// Blocks are allocated out of heap memory.
+  ///
+  /// Note: We're not using std::aligned_storage here as this warns / errors
+  /// on MSVC.
+  struct alignas(BLOCK_ALIGNMENT) Block {
+    uint8_t data[BLOCK_SIZE];
+    Block* next;
+  };
 
-  /// Constructor
-  BlockAllocator() = default;
-  /// Move constructor
-  BlockAllocator(BlockAllocator&&) = default;
-  /// Move assignment operator
-  /// @return this BlockAllocator
-  BlockAllocator& operator=(BlockAllocator&&) = default;
+  // Forward declaration
+  template <bool IS_CONST>
+  class TView;
 
   /// An iterator for the objects owned by the BlockAllocator.
-  class Iterator {
+  template <bool IS_CONST>
+  class TIterator {
+    using PointerTy = std::conditional_t<IS_CONST, const T*, T*>;
+
    public:
     /// Equality operator
     /// @param other the iterator to compare this iterator to
     /// @returns true if this iterator is equal to other
-    bool operator==(const Iterator& other) const { return it_ == other.it_; }
+    bool operator==(const TIterator& other) const {
+      return ptrs == other.ptrs && idx == other.idx;
+    }
+
     /// Inequality operator
     /// @param other the iterator to compare this iterator to
     /// @returns true if this iterator is not equal to other
-    bool operator!=(const Iterator& other) const { return it_ != other.it_; }
+    bool operator!=(const TIterator& other) const { return !(*this == other); }
+
     /// Advances the iterator
     /// @returns this iterator
-    Iterator& operator++() {
-      ++it_;
+    TIterator& operator++() {
+      if (ptrs != nullptr) {
+        ++idx;
+        if (idx == Pointers::kMax) {
+          idx = 0;
+          ptrs = ptrs->next;
+        }
+      }
       return *this;
     }
+
     /// @returns the pointer to the object at the current iterator position
-    T* operator*() const { return it_->get(); }
+    PointerTy operator*() const { return ptrs ? ptrs->ptrs[idx] : nullptr; }
 
    private:
-    friend View;  // Keep internal iterator impl private.
-    explicit Iterator(InternalIterator it) : it_(it) {}
-    InternalIterator it_;
+    friend TView<IS_CONST>;  // Keep internal iterator impl private.
+    explicit TIterator(const Pointers* p, size_t i) : ptrs(p), idx(i) {}
+
+    const Pointers* ptrs;
+    size_t idx;
   };
 
-  /// A const iterator for the objects owned by the BlockAllocator.
-  class ConstIterator {
-   public:
-    /// Equality operator
-    /// @param other the iterator to compare this iterator to
-    /// @returns true if this iterator is equal to other
-    bool operator==(const ConstIterator& other) const {
-      return it_ == other.it_;
-    }
-    /// Inequality operator
-    /// @param other the iterator to compare this iterator to
-    /// @returns true if this iterator is not equal to other
-    bool operator!=(const ConstIterator& other) const {
-      return it_ != other.it_;
-    }
-    /// Advances the iterator
-    /// @returns this iterator
-    ConstIterator& operator++() {
-      ++it_;
-      return *this;
-    }
-    /// @returns the pointer to the object at the current iterator position
-    T* operator*() const { return it_->get(); }
-
-   private:
-    friend ConstView;  // Keep internal iterator impl private.
-    explicit ConstIterator(InternalIterator it) : it_(it) {}
-    InternalIterator it_;
-  };
-
-  /// View provides begin() and end() methods for looping over the objects owned
-  /// by the BlockAllocator.
-  class View {
+  /// View provides begin() and end() methods for looping over the objects
+  /// owned by the BlockAllocator.
+  template <bool IS_CONST>
+  class TView {
    public:
     /// @returns an iterator to the beginning of the view
-    Iterator begin() const { return Iterator(allocator_->objects_.begin()); }
+    TIterator<IS_CONST> begin() const {
+      return TIterator<IS_CONST>{allocator_->pointers_.root, 0};
+    }
+
     /// @returns an iterator to the end of the view
-    Iterator end() const { return Iterator(allocator_->objects_.end()); }
+    TIterator<IS_CONST> end() const {
+      return allocator_->pointers_.current_index >= Pointers::kMax
+                 ? TIterator<IS_CONST>(nullptr, 0)
+                 : TIterator<IS_CONST>(allocator_->pointers_.current,
+                                       allocator_->pointers_.current_index);
+    }
 
    private:
     friend BlockAllocator;  // For BlockAllocator::operator View()
-    explicit View(BlockAllocator const* allocator) : allocator_(allocator) {}
+    explicit TView(BlockAllocator const* allocator) : allocator_(allocator) {}
     BlockAllocator const* const allocator_;
   };
 
+ public:
+  /// An iterator type over the objects of the BlockAllocator
+  using Iterator = TIterator<false>;
+
+  /// An immutable iterator type over the objects of the BlockAllocator
+  using ConstIterator = TIterator<true>;
+
+  /// View provides begin() and end() methods for looping over the objects
+  /// owned by the BlockAllocator.
+  using View = TView<false>;
+
   /// ConstView provides begin() and end() methods for looping over the objects
   /// owned by the BlockAllocator.
-  class ConstView {
-   public:
-    /// @returns an iterator to the beginning of the view
-    ConstIterator begin() const {
-      return ConstIterator(allocator_->objects_.begin());
-    }
-    /// @returns an iterator to the end of the view
-    ConstIterator end() const {
-      return ConstIterator(allocator_->objects_.end());
-    }
+  using ConstView = TView<true>;
 
-   private:
-    friend BlockAllocator;  // For BlockAllocator::operator ConstView()
-    explicit ConstView(BlockAllocator const* allocator)
-        : allocator_(allocator) {}
-    BlockAllocator const* const allocator_;
-  };
+  /// Constructor
+  BlockAllocator() = default;
+
+  /// Move constructor
+  /// @param rhs the BlockAllocator to move
+  BlockAllocator(BlockAllocator&& rhs) {
+    std::swap(block_, rhs.block_);
+    std::swap(pointers_, rhs.pointers_);
+  }
+
+  /// Move assignment operator
+  /// @param rhs the BlockAllocator to move
+  /// @return this BlockAllocator
+  BlockAllocator& operator=(BlockAllocator&& rhs) {
+    if (this != &rhs) {
+      Reset();
+      std::swap(block_, rhs.block_);
+      std::swap(pointers_, rhs.pointers_);
+    }
+    return *this;
+  }
+
+  /// Destructor
+  ~BlockAllocator() { Reset(); }
 
   /// @return a View of all objects owned by this BlockAllocator
   View Objects() { return View(this); }
+
   /// @return a ConstView of all objects owned by this BlockAllocator
   ConstView Objects() const { return ConstView(this); }
 
@@ -152,17 +177,116 @@
     static_assert(
         std::is_same<T, TYPE>::value || std::is_base_of<T, TYPE>::value,
         "TYPE does not derive from T");
-    auto uptr = std::make_unique<TYPE>(std::forward<ARGS>(args)...);
-    auto* ptr = uptr.get();
-    objects_.emplace_back(std::move(uptr));
+    static_assert(
+        std::is_same<T, TYPE>::value || std::has_virtual_destructor<T>::value,
+        "TYPE requires a virtual destructor when calling Create() for a type "
+        "that is not T");
+
+    auto* ptr = Allocate<TYPE>();
+    new (ptr) TYPE(std::forward<ARGS>(args)...);
+    AddObjectPointer(ptr);
+
     return ptr;
   }
 
+  /// Frees all allocations from the allocator.
+  void Reset() {
+    for (auto ptr : Objects()) {
+      ptr->~T();
+    }
+    auto* block = block_.root;
+    while (block != nullptr) {
+      auto* next = block->next;
+      delete block;
+      block = next;
+    }
+    block_ = {};
+    pointers_ = {};
+  }
+
  private:
   BlockAllocator(const BlockAllocator&) = delete;
   BlockAllocator& operator=(const BlockAllocator&) = delete;
 
-  std::vector<std::unique_ptr<T>> objects_;
+  /// Allocates an instance of TYPE from the current block, or from a newly
+  /// allocated block if the current block is full.
+  template <typename TYPE>
+  TYPE* Allocate() {
+    static_assert(sizeof(TYPE) <= BLOCK_SIZE,
+                  "Cannot construct TYPE with size greater than BLOCK_SIZE");
+    static_assert(alignof(TYPE) <= BLOCK_ALIGNMENT,
+                  "alignof(TYPE) is greater than ALIGNMENT");
+
+    block_.current_offset =
+        utils::RoundUp(alignof(TYPE), block_.current_offset);
+    if (block_.current_offset + sizeof(TYPE) > BLOCK_SIZE) {
+      // Allocate a new block from the heap
+      auto* prev_block = block_.current;
+      block_.current = new Block;
+      if (!block_.current) {
+        return nullptr;  // out of memory
+      }
+      block_.current->next = nullptr;
+      block_.current_offset = 0;
+      if (prev_block) {
+        prev_block->next = block_.current;
+      } else {
+        block_.root = block_.current;
+      }
+    }
+
+    auto* base = &block_.current->data[0];
+    auto* ptr = reinterpret_cast<TYPE*>(base + block_.current_offset);
+    block_.current_offset += sizeof(TYPE);
+    return ptr;
+  }
+
+  /// Adds `ptr` to the linked list of objects owned by this BlockAllocator.
+  /// Once added, `ptr` will be tracked for destruction when the BlockAllocator
+  /// is destructed.
+  void AddObjectPointer(T* ptr) {
+    if (pointers_.current_index >= Pointers::kMax) {
+      auto* prev_pointers = pointers_.current;
+      pointers_.current = Allocate<Pointers>();
+      if (!pointers_.current) {
+        return;  // out of memory
+      }
+      pointers_.current->next = nullptr;
+      pointers_.current_index = 0;
+
+      if (prev_pointers) {
+        prev_pointers->next = pointers_.current;
+      } else {
+        pointers_.root = pointers_.current;
+      }
+    }
+
+    pointers_.current->ptrs[pointers_.current_index++] = ptr;
+  }
+
+  struct {
+    /// The root block of the block linked list
+    Block* root = nullptr;
+    /// The current (end) block of the blocked linked list.
+    /// New allocations come from this block
+    Block* current = nullptr;
+    /// The byte offset in #current for the next allocation.
+    /// Initialized with BLOCK_SIZE so that the first allocation triggers a
+    /// block allocation.
+    size_t current_offset = BLOCK_SIZE;
+  } block_;
+
+  struct {
+    /// The root Pointers structure of the pointers linked list
+    Pointers* root = nullptr;
+    /// The current (end) Pointers structure of the pointers linked list.
+    /// AddObjectPointer() adds to this structure.
+    Pointers* current = nullptr;
+    /// The array index in #current for the next append.
+    /// Initialized with Pointers::kMax so that the first append triggers a
+    /// allocation of the Pointers structure.
+    size_t current_index = Pointers::kMax;
+  } pointers_;
 };
 
 }  // namespace tint
diff --git a/src/block_allocator_test.cc b/src/block_allocator_test.cc
index e468907..69834da 100644
--- a/src/block_allocator_test.cc
+++ b/src/block_allocator_test.cc
@@ -39,7 +39,7 @@
       FAIL() << "BlockAllocator should be empty";
     }
   }
-  for (int* i : static_cast<const Allocator&>(allocator).Objects()) {
+  for (const int* i : static_cast<const Allocator&>(allocator).Objects()) {
     (void)i;
     if ((true)) {  // Workaround for "error: loop will run at most once"
       FAIL() << "BlockAllocator should be empty";
@@ -67,48 +67,53 @@
 TEST_F(BlockAllocatorTest, MoveConstruct) {
   using Allocator = BlockAllocator<LifetimeCounter>;
 
-  size_t count = 0;
+  for (size_t n :
+       {0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
+    size_t count = 0;
+    {
+      Allocator allocator_a;
+      for (size_t i = 0; i < n; i++) {
+        allocator_a.Create(&count);
+      }
+      EXPECT_EQ(count, n);
 
-  {
-    Allocator allocator_a;
-    for (int i = 0; i < 10; i++) {
-      allocator_a.Create(&count);
+      Allocator allocator_b{std::move(allocator_a)};
+      EXPECT_EQ(count, n);
     }
-    EXPECT_EQ(count, 10u);
 
-    Allocator allocator_b{std::move(allocator_a)};
-    EXPECT_EQ(count, 10u);
+    EXPECT_EQ(count, 0u);
   }
-
-  EXPECT_EQ(count, 0u);
 }
 
 TEST_F(BlockAllocatorTest, MoveAssign) {
   using Allocator = BlockAllocator<LifetimeCounter>;
 
-  size_t count_a = 0;
-  size_t count_b = 0;
+  for (size_t n :
+       {0, 1, 10, 16, 20, 32, 50, 64, 100, 256, 300, 512, 500, 512}) {
+    size_t count_a = 0;
+    size_t count_b = 0;
 
-  {
-    Allocator allocator_a;
-    for (int i = 0; i < 10; i++) {
-      allocator_a.Create(&count_a);
+    {
+      Allocator allocator_a;
+      for (size_t i = 0; i < n; i++) {
+        allocator_a.Create(&count_a);
+      }
+      EXPECT_EQ(count_a, n);
+
+      Allocator allocator_b;
+      for (size_t i = 0; i < n; i++) {
+        allocator_b.Create(&count_b);
+      }
+      EXPECT_EQ(count_b, n);
+
+      allocator_b = std::move(allocator_a);
+      EXPECT_EQ(count_a, n);
+      EXPECT_EQ(count_b, 0u);
     }
-    EXPECT_EQ(count_a, 10u);
 
-    Allocator allocator_b;
-    for (int i = 0; i < 10; i++) {
-      allocator_b.Create(&count_b);
-    }
-    EXPECT_EQ(count_b, 10u);
-
-    allocator_b = std::move(allocator_a);
-    EXPECT_EQ(count_a, 10u);
+    EXPECT_EQ(count_a, 0u);
     EXPECT_EQ(count_b, 0u);
   }
-
-  EXPECT_EQ(count_a, 0u);
-  EXPECT_EQ(count_b, 0u);
 }
 
 TEST_F(BlockAllocatorTest, ObjectOrder) {
@@ -130,7 +135,7 @@
   }
   {
     int i = 0;
-    for (int* p : static_cast<const Allocator&>(allocator).Objects()) {
+    for (const int* p : static_cast<const Allocator&>(allocator).Objects()) {
       EXPECT_EQ(*p, i);
       i++;
     }
diff --git a/src/transform/decompose_memory_access.cc b/src/transform/decompose_memory_access.cc
index af20c6d..fecb76f 100644
--- a/src/transform/decompose_memory_access.cc
+++ b/src/transform/decompose_memory_access.cc
@@ -286,8 +286,8 @@
 
 /// Store describes a single storage or uniform buffer write
 struct Store {
-  ast::AssignmentStatement* assignment;  // The AST assignment statement
-  BufferAccess target;                   // The target for the write
+  const ast::AssignmentStatement* assignment;  // The AST assignment statement
+  BufferAccess target;                         // The target for the write
 };
 
 }  // namespace
diff --git a/src/transform/multiplanar_external_texture.cc b/src/transform/multiplanar_external_texture.cc
index 387604b..74ef782 100644
--- a/src/transform/multiplanar_external_texture.cc
+++ b/src/transform/multiplanar_external_texture.cc
@@ -246,14 +246,14 @@
 
   /// Constructs a StatementList containing all the statements making up the
   /// bodies of the textureSampleExternal and textureLoadExternal functions.
-  /// @param callType determines which function body to generate
+  /// @param call_type determines which function body to generate
   /// @returns a statement list that makes of the body of the chosen function
-  ast::StatementList createTexFnExtStatementList(sem::IntrinsicType callType) {
+  ast::StatementList createTexFnExtStatementList(sem::IntrinsicType call_type) {
     using f32 = ProgramBuilder::f32;
-    const ast::CallExpression* single_plane_call;
-    const ast::CallExpression* plane_0_call;
-    const ast::CallExpression* plane_1_call;
-    if (callType == sem::IntrinsicType::kTextureSampleLevel) {
+    const ast::CallExpression* single_plane_call = nullptr;
+    const ast::CallExpression* plane_0_call = nullptr;
+    const ast::CallExpression* plane_1_call = nullptr;
+    if (call_type == sem::IntrinsicType::kTextureSampleLevel) {
       // textureSampleLevel(plane0, smp, coord.xy, 0.0);
       single_plane_call =
           b.Call("textureSampleLevel", "plane0", "smp", "coord", 0.0f);
@@ -263,13 +263,16 @@
       // textureSampleLevel(plane1, smp, coord.xy, 0.0);
       plane_1_call =
           b.Call("textureSampleLevel", "plane1", "smp", "coord", 0.0f);
-    } else if (callType == sem::IntrinsicType::kTextureLoad) {
+    } else if (call_type == sem::IntrinsicType::kTextureLoad) {
       // textureLoad(plane0, coords.xy, 0);
       single_plane_call = b.Call("textureLoad", "plane0", "coord", 0);
       // textureLoad(plane0, coords.xy, 0);
       plane_0_call = b.Call("textureLoad", "plane0", "coord", 0);
       // textureLoad(plane1, coords.xy, 0);
       plane_1_call = b.Call("textureLoad", "plane1", "coord", 0);
+    } else {
+      TINT_ICE(Transform, b.Diagnostics())
+          << "unhandled intrinsic: " << call_type;
     }
 
     return {