[spirv-reader] Refactor bookkeeping for localy defined values

Bug: tint:3
Change-Id: Ibe26b802ae61c6271f8a75e641ac31db36df6000
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/24124
Reviewed-by: dan sinclair <dsinclair@google.com>
diff --git a/src/reader/spirv/function.cc b/src/reader/spirv/function.cc
index 8b89d80..1fe4614 100644
--- a/src/reader/spirv/function.cc
+++ b/src/reader/spirv/function.cc
@@ -409,7 +409,13 @@
 BlockInfo::BlockInfo(const spvtools::opt::BasicBlock& bb)
     : basic_block(&bb), id(bb.id()) {}
 
-BlockInfo::~BlockInfo() {}
+BlockInfo::~BlockInfo() = default;
+
+DefInfo::DefInfo(const spvtools::opt::Instruction& def_inst,
+                 uint32_t the_block_pos)
+    : inst(def_inst), block_pos(the_block_pos) {}
+
+DefInfo::~DefInfo() = default;
 
 FunctionEmitter::FunctionEmitter(ParserImpl* pi,
                                  const spvtools::opt::Function& function)
@@ -623,7 +629,6 @@
   }
 
   // TODO(dneto): register phis
-  // TODO(dneto): register SSA values which need to be hoisted
   RegisterValuesNeedingNamedOrHoistedDefinition();
 
   if (!EmitFunctionVariables()) {
@@ -2416,7 +2421,8 @@
     const spvtools::opt::Instruction& inst,
     TypedExpression ast_expr) {
   const auto result_id = inst.result_id();
-  if (needs_hoisted_def_.count(result_id) != 0) {
+  const auto* def_info = GetDefInfo(result_id);
+  if (def_info && def_info->requires_hoisted_def) {
     // Emit an assignment of the expression to the hoisted variable.
     AddStatement(std::make_unique<ast::AssignmentStatement>(
         std::make_unique<ast::IdentifierExpression>(namer_.Name(result_id)),
@@ -2430,20 +2436,24 @@
   const auto result_id = inst.result_id();
   // Handle combinatorial instructions.
   auto combinatorial_expr = MaybeEmitCombinatorialValue(inst);
+  const auto* def_info = GetDefInfo(result_id);
   if (combinatorial_expr.expr != nullptr) {
-    if ((needs_hoisted_def_.count(result_id) == 0) &&
-        (needs_named_const_def_.count(result_id) == 0) &&
-        (def_use_mgr_->NumUses(&inst) == 1)) {
-      // If it's used once, and doesn't need a named constant definition,
-      // then defer emitting the expression until it's used. Any supporting
-      // statements have already been emitted.
-      singly_used_values_.insert(
-          std::make_pair(result_id, std::move(combinatorial_expr)));
-      return success();
+    if (def_info == nullptr) {
+      return Fail() << "internal error: result ID %" << result_id
+                    << " is missing a def_info";
     }
-    // Otherwise, generate a const definition for it now and later use
-    // the const's name at the uses of the value.
-    return EmitConstDefOrWriteToHoistedVar(inst, std::move(combinatorial_expr));
+    if (def_info->requires_hoisted_def || def_info->requires_named_const_def ||
+        def_info->num_uses != 1) {
+      // Generate a const definition or an assignment to a hoisted definition
+      // now and later use the const or variable name at the uses of this value.
+      return EmitConstDefOrWriteToHoistedVar(inst,
+                                             std::move(combinatorial_expr));
+    }
+    // It is harmless to defer emitting the expression until it's used.
+    // Any supporting statements have already been emitted.
+    singly_used_values_.insert(
+        std::make_pair(result_id, std::move(combinatorial_expr)));
+    return success();
   }
   if (failed()) {
     return false;
@@ -2906,85 +2916,91 @@
 }
 
 void FunctionEmitter::RegisterValuesNeedingNamedOrHoistedDefinition() {
-  // Maps a result ID to the block position where it is last used.
-  std::unordered_map<uint32_t, uint32_t> id_to_last_use_pos;
-  // List of pairs of (result id, block position of the definition).
-  std::vector<std::pair<uint32_t, uint32_t>> id_def_pos;
-
+  // Create a DefInfo for each value definition in this function.
   for (auto block_id : block_order_) {
     const auto* block_info = GetBlockInfo(block_id);
     const auto block_pos = block_info->pos;
-
     for (const auto& inst : *(block_info->basic_block)) {
       const auto result_id = inst.result_id();
-      if (result_id != 0) {
-        id_def_pos.emplace_back(
-            std::pair<uint32_t, uint32_t>{result_id, block_pos});
+      if ((result_id == 0) || inst.opcode() == SpvOpLabel) {
+        continue;
       }
-      inst.ForEachInId(
-          [&id_to_last_use_pos, block_pos](const uint32_t* id_ptr) {
-            // If the id is not in the map already, this will create
-            // an entry with value 0.
-            auto& pos = id_to_last_use_pos[*id_ptr];
-            // Update the entry.
-            pos = std::max(pos, block_pos);
-          });
+      def_info_[result_id] = std::make_unique<DefInfo>(inst, block_pos);
+    }
+  }
 
-      if (inst.opcode() == SpvOpVectorShuffle) {
-        // We might access the vector operands multiple times. Make sure they
-        // are evaluated only once.
-        for (auto index : std::array<uint32_t, 2>{0, 1}) {
-          auto id = inst.GetSingleWordInOperand(index);
-          if (constant_mgr_->FindDeclaredConstant(id)) {
-            // If it's constant, then avoid making a const definition
-            // in the wrong place; it would be wrong if it didn't
-            // dominate its uses.
-            continue;
-          }
-          // Othewrise, register it.
-          needs_named_const_def_.insert(id);
+  // Mark vector operands of OpVectorShuffle as needing a named definition,
+  // but only if they are defined in this function as well.
+  for (auto& id_def_info_pair : def_info_) {
+    const auto& inst = id_def_info_pair.second->inst;
+    if (inst.opcode() == SpvOpVectorShuffle) {
+      // We might access the vector operands multiple times. Make sure they
+      // are evaluated only once.
+      for (auto index : std::array<uint32_t, 2>{0, 1}) {
+        auto id = inst.GetSingleWordInOperand(index);
+        auto* operand_def = GetDefInfo(id);
+        if (operand_def) {
+          operand_def->requires_named_const_def = true;
         }
       }
     }
   }
 
+  // Scan uses of locally defined IDs, to determine the block position
+  // of its last use.
+  for (auto block_id : block_order_) {
+    const auto* block_info = GetBlockInfo(block_id);
+    const auto block_pos = block_info->pos;
+    for (const auto& inst : *(block_info->basic_block)) {
+      // Update the usage span for IDs used by this instruction.
+      inst.ForEachInId([this, block_pos](const uint32_t* id_ptr) {
+        auto* def_info = GetDefInfo(*id_ptr);
+        if (def_info) {
+          def_info->num_uses++;
+          def_info->last_use_pos = std::max(def_info->last_use_pos, block_pos);
+        }
+      });
+    }
+  }
+
   // For an ID defined in this function, if it is used in a different construct
   // than its definition, then it needs a named constant definition.  Otherwise
   // we might sink an expensive computation into control flow, and hence change
   // performance.
-  for (const auto& id_and_pos : id_def_pos) {
-    const auto id = id_and_pos.first;
-    const auto def_pos = id_and_pos.second;
+  for (auto& id_def_info_pair : def_info_) {
+    const auto def_id = id_def_info_pair.first;
+    auto* def_info = id_def_info_pair.second.get();
+    if (def_info->num_uses == 0) {
+      // There is no need to adjust the location of the declaration.
+      continue;
+    }
+    const auto last_use_pos = def_info->last_use_pos;
 
-    auto last_use_where = id_to_last_use_pos.find(id);
-    if (last_use_where != id_to_last_use_pos.end()) {
-      const auto last_use_pos = last_use_where->second;
-      const auto* const def_in_construct =
-          GetBlockInfo(block_order_[def_pos])->construct;
-      const auto* const construct_with_last_use =
-          GetBlockInfo(block_order_[last_use_pos])->construct;
+    const auto* const def_in_construct =
+        GetBlockInfo(block_order_[def_info->block_pos])->construct;
+    const auto* const construct_with_last_use =
+        GetBlockInfo(block_order_[last_use_pos])->construct;
 
-      // Find the smallest structured construct that encloses the definition
-      // and all its uses.
-      const auto* enclosing_construct = def_in_construct;
-      while (enclosing_construct &&
-             !enclosing_construct->ContainsPos(last_use_pos)) {
-        enclosing_construct = enclosing_construct->parent;
-      }
-      // At worst, we go all the way out to the function construct.
-      assert(enclosing_construct != nullptr);
+    // Find the smallest structured construct that encloses the definition
+    // and all its uses.
+    const auto* enclosing_construct = def_in_construct;
+    while (enclosing_construct &&
+           !enclosing_construct->ContainsPos(last_use_pos)) {
+      enclosing_construct = enclosing_construct->parent;
+    }
+    // At worst, we go all the way out to the function construct.
+    assert(enclosing_construct != nullptr);
 
-      if (def_in_construct != construct_with_last_use) {
-        if (enclosing_construct == def_in_construct) {
-          // We can use a plain 'const' definition.
-          needs_named_const_def_.insert(id);
-        } else {
-          // We need to make a hoisted variable definition.
-          // TODO(dneto): Handle non-storable types, particularly pointers.
-          needs_hoisted_def_.insert(id);
-          auto* hoist_to_block = GetBlockInfo(enclosing_construct->begin_id);
-          hoist_to_block->hoisted_ids.push_back(id);
-        }
+    if (def_in_construct != construct_with_last_use) {
+      if (enclosing_construct == def_in_construct) {
+        // We can use a plain 'const' definition.
+        def_info->requires_named_const_def = true;
+      } else {
+        // We need to make a hoisted variable definition.
+        // TODO(dneto): Handle non-storable types, particularly pointers.
+        def_info->requires_hoisted_def = true;
+        auto* hoist_to_block = GetBlockInfo(enclosing_construct->begin_id);
+        hoist_to_block->hoisted_ids.push_back(def_id);
       }
     }
   }
diff --git a/src/reader/spirv/function.h b/src/reader/spirv/function.h
index 484df30..40e23c2 100644
--- a/src/reader/spirv/function.h
+++ b/src/reader/spirv/function.h
@@ -159,8 +159,8 @@
   std::string flow_guard_name = "";
 
   /// The result IDs that this block is responsible for declaring as a
-  /// hoisted variable.  See the |needs_hoisted_def_| member of
-  /// FunctionEmitter for an explanation.
+  /// hoisted variable.  See the |requires_hoisted_def| member of
+  /// DefInfo for an explanation.
   std::vector<uint32_t> hoisted_ids;
 };
 
@@ -174,6 +174,62 @@
   return o;
 }
 
+/// Bookkeeping info for a SPIR-V ID defined in the function.
+/// This will be valid for result IDs for:
+/// - instructions that are not OpLabel, OpVariable, and OpFunctionParameter
+/// - are defined in a basic block visited in the block-order for the function.
+struct DefInfo {
+  /// Constructor.
+  /// @param def_inst the SPIR-V instruction defining the ID
+  /// @param block_pos the position of the basic block where the ID is defined.
+  DefInfo(const spvtools::opt::Instruction& def_inst, uint32_t block_pos);
+  /// Destructor.
+  ~DefInfo();
+
+  /// The SPIR-V instruction that defines the ID.
+  const spvtools::opt::Instruction& inst;
+  /// The position of the block that defines this ID, in the function block
+  /// order.  See method |FunctionEmitter::ComputeBlockOrderAndPositions|
+  const uint32_t block_pos = 0;
+
+  /// The number of uses of this ID.
+  uint32_t num_uses = 0;
+
+  /// The block position of the last use of this ID, or 0 if it is not used
+  /// at all.  The "last" ordering is determined by the function block order.
+  uint32_t last_use_pos = 0;
+
+  /// True if this ID requires a WGSL 'const' definition, due to context. It
+  /// might get one anyway (so this is *not* an if-and-only-if condition).
+  bool requires_named_const_def = false;
+
+  /// True if this ID must map to a WGSL variable declaration before the
+  /// corresponding position of the ID definition in SPIR-V.  This compensates
+  /// for the difference between dominance and scoping. An SSA definition can
+  /// dominate all its uses, but the construct where it is defined does not
+  /// enclose all the uses, and so if it were declared as a WGSL constant
+  /// definition at the point of its SPIR-V definition, then the WGSL name
+  /// would go out of scope too early. Fix that by creating a variable at the
+  /// top of the smallest construct that encloses both the definition and all
+  /// its uses. Then the original SPIR-V definition maps to a WGSL assignment
+  /// to that variable, and each SPIR-V use becomes a WGSL read from the
+  /// variable.
+  /// TODO(dneto): This works for constants of storable type, but not, for
+  /// example, pointers.
+  bool requires_hoisted_def = false;
+};
+
+inline std::ostream& operator<<(std::ostream& o, const DefInfo& di) {
+  o << "DefInfo{"
+    << " inst.result_id: " << di.inst.result_id()
+    << " block_pos: " << di.block_pos << " num_uses: " << di.num_uses
+    << " last_use_pos: " << di.last_use_pos << " requires_named_const_def: "
+    << (di.requires_named_const_def ? "true" : "false")
+    << " requires_hoisted_def: " << (di.requires_hoisted_def ? "true" : "false")
+    << "}";
+  return o;
+}
+
 /// A FunctionEmitter emits a SPIR-V function onto a Tint AST module.
 class FunctionEmitter {
  public:
@@ -288,12 +344,16 @@
   ///  - When a SPIR-V instruction might use the dynamically computed value
   ///    only once, but the WGSL code might reference it multiple times.
   ///    For example, this occurs for the vector operands of OpVectorShuffle.
-  ///    In this case the definition is added to |needs_named_const_def_|.
+  ///    In this case the definition's |requires_named_const_def| property is
+  ///    set to true.
   ///  - When a definition and at least one of its uses are not in the
   ///    same structured construct.
-  ///    In this case the definition is added to |needs_named_const_def_|.
+  ///    In this case the definition's |requires_named_const_def| property is
+  ///    set to true.
   ///  - When a definition is in a construct that does not enclose all the
-  ///    uses.  In this case the definition is added to |needs_hoisted_def_|.
+  ///    uses.  In this case the definition's |requires_hoisted_def| property
+  ///    is set to true.
+  /// Populates the |def_info_| mapping.
   void RegisterValuesNeedingNamedOrHoistedDefinition();
 
   /// Emits declarations of function variables.
@@ -495,8 +555,20 @@
   /// @returns the block info for the given ID, if it exists, or nullptr
   BlockInfo* GetBlockInfo(uint32_t id) const {
     auto where = block_info_.find(id);
-    if (where == block_info_.end())
+    if (where == block_info_.end()) {
       return nullptr;
+    }
+    return where->second.get();
+  }
+
+  /// Gets the local definition info for a result ID.
+  /// @param id the SPIR-V ID of local definition.
+  /// @returns the definition info for the given ID, if it exists, or nullptr
+  DefInfo* GetDefInfo(uint32_t id) const {
+    auto where = def_info_.find(id);
+    if (where == def_info_.end()) {
+      return nullptr;
+    }
     return where->second.get();
   }
 
@@ -622,21 +694,6 @@
   std::unordered_set<uint32_t> identifier_values_;
   // Mapping from SPIR-V ID that is used at most once, to its AST expression.
   std::unordered_map<uint32_t, TypedExpression> singly_used_values_;
-  // Set of SPIR-V IDs which should get a named const definition.
-  std::unordered_set<uint32_t> needs_named_const_def_;
-  // The SPIR-V IDs that must be declared in WGSL before the corresponding
-  // location in SPIR-V. This compensates for the difference between dominance
-  // and scoping. An SSA definition can dominate all its uses, but the construct
-  // where it is defined does not enclose all the uses, and so if it were
-  // declared as a WGSL constant definition at the point of its SPIR-V
-  // definition, then the WGSL name would go out of scope too early. Fix that by
-  // creating a variable at the top of the smallest construct that encloses both
-  // the definition and all its uses. Then the original SPIR-V definition maps
-  // to a WGSL assignment to that variable, and each SPIR-V use becomes a WGSL
-  // read from the variable.
-  // TODO(dneto): This works for constants of storable type, but not, for
-  // example, pointers.
-  std::unordered_set<uint32_t> needs_hoisted_def_;
 
   // The IDs of basic blocks, in reverse structured post-order (RSPO).
   // This is the output order for the basic blocks.
@@ -645,6 +702,9 @@
   // Mapping from block ID to its bookkeeping info.
   std::unordered_map<uint32_t, std::unique_ptr<BlockInfo>> block_info_;
 
+  // Mapping from a locally-defined result ID to its bookkeeping info.
+  std::unordered_map<uint32_t, std::unique_ptr<DefInfo>> def_info_;
+
   // Structured constructs, where enclosing constructs precede their children.
   ConstructList constructs_;
 };