tint/hlsl: Implement ArrayLengthFromImmediates support

This change enables HLSL backend to use immediate data blocks (root
constants) for storing buffer size information, eliminating the need
for a separate uniform buffer when computing array lengths.

Why:
- Reduces descriptor table usage in D3D12 by storing buffer sizes in
  root constants instead of requiring a dedicated uniform buffer
- Provides performance benefit by avoiding an extra buffer binding
- Brings HLSL backend to parity with MSL backend which already supports
  this optimization

Components affected:
- ArrayLengthFromUniformOptions: Added buffer_sizes_offset field to
  support immediate block storage at a specified offset
- PopulateBindingRelatedOptions: Fixed to properly copy the new
  buffer_sizes_offset field from input options
- HLSL raise pipeline: Added buffer_sizes array creation in immediate
  data config, packed as array<vec4<u32>> for 16-byte alignment
- Transform selection: Conditionally uses ArrayLengthFromImmediates
  when buffer_sizes_offset is set, otherwise falls back to
  ArrayLengthFromUniform

Bug: 366291600
Change-Id: I42d6dc3e172513d853fe502e60d337c22142be95
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/273955
Commit-Queue: Shaobo Yan <shaoboyan@microsoft.com>
Reviewed-by: James Price <jrprice@google.com>
diff --git a/src/tint/lang/hlsl/writer/arraylength_test.cc b/src/tint/lang/hlsl/writer/arraylength_test.cc
index 6b4c9ee..8069493 100644
--- a/src/tint/lang/hlsl/writer/arraylength_test.cc
+++ b/src/tint/lang/hlsl/writer/arraylength_test.cc
@@ -288,5 +288,40 @@
 )");
 }
 
+TEST_F(HlslWriterTest, ArrayLength_RobustnessAndArrayLengthFromImmediates) {
+    auto* dst = b.Var("dest", ty.ptr(storage, ty.array<u32>()));
+    dst->SetBindingPoint(0, 1);
+    b.ir.root_block->Append(dst);
+    auto* func = b.Function("main", ty.void_(), core::ir::Function::PipelineStage::kFragment);
+    b.Append(func->Block(), [&] {
+        auto* access = b.Access(ty.ptr(storage, ty.u32()), dst, 0_u);
+        b.Store(access, 123_u);
+        b.Return(func);
+    });
+
+    Options options;
+    options.entry_point_name = "main";
+    options.disable_robustness = false;
+    options.immediate_binding_point = BindingPoint{0, 30};
+    options.array_length_from_uniform.buffer_sizes_offset = 4;  // Non-zero offset
+    options.array_length_from_uniform.bindpoint_to_size_index[{0, 1}] = 0;
+    ASSERT_TRUE(Generate(options)) << err_ << output_.hlsl;
+    EXPECT_EQ(output_.hlsl, R"(struct tint_array_lengths_struct {
+  uint tint_array_length_0_1;
+};
+
+
+RWByteAddressBuffer dest : register(u1);
+cbuffer cbuffer_tint_immediate_data : register(b30) {
+  uint4 tint_immediate_data[2];
+};
+void main() {
+  tint_array_lengths_struct v = {(tint_immediate_data[0u].y / 4u)};
+  dest.Store((0u + (min(0u, (v.tint_array_length_0_1 - 1u)) * 4u)), 123u);
+}
+
+)");
+}
+
 }  // namespace
 }  // namespace tint::hlsl::writer
diff --git a/src/tint/lang/hlsl/writer/common/option_helpers.cc b/src/tint/lang/hlsl/writer/common/option_helpers.cc
index 4479ce3..de4bb9f 100644
--- a/src/tint/lang/hlsl/writer/common/option_helpers.cc
+++ b/src/tint/lang/hlsl/writer/common/option_helpers.cc
@@ -215,6 +215,8 @@
     };
 
     array_length_from_uniform_options.ubo_binding = options.array_length_from_uniform.ubo_binding;
+    array_length_from_uniform_options.buffer_sizes_offset =
+        options.array_length_from_uniform.buffer_sizes_offset;
     array_length_from_uniform_options.bindpoint_to_size_index =
         remap(options.array_length_from_uniform.bindpoint_to_size_index);
 
diff --git a/src/tint/lang/hlsl/writer/common/options.h b/src/tint/lang/hlsl/writer/common/options.h
index fc1ba29..1ba87e0 100644
--- a/src/tint/lang/hlsl/writer/common/options.h
+++ b/src/tint/lang/hlsl/writer/common/options.h
@@ -48,10 +48,13 @@
 constexpr uint32_t kMaxInterStageLocations = 30;
 
 /// Options used to specify a mapping of binding points to indices into a UBO
-/// from which to load buffer sizes.
+/// from which to load buffer sizes, or to load them from immediate blocks.
+/// TODO(crbug.com/366291600): Remove ubo_binding after switch to immediates.
 struct ArrayLengthFromUniformOptions {
     /// The HLSL binding point to use to generate a uniform buffer from which to read buffer sizes.
     BindingPoint ubo_binding;
+    /// The offset in immediate block for buffer sizes.
+    std::optional<uint32_t> buffer_sizes_offset{};
     /// The mapping from the storage buffer binding points in WGSL binding-point space to the index
     /// into the uniform buffer where the length of the buffer is stored.
     std::unordered_map<BindingPoint, uint32_t> bindpoint_to_size_index;
@@ -59,7 +62,10 @@
     bool operator==(const ArrayLengthFromUniformOptions& other) const = default;
 
     /// Reflect the fields of this class so that it can be used by tint::ForeachField()
-    TINT_REFLECT(ArrayLengthFromUniformOptions, ubo_binding, bindpoint_to_size_index);
+    TINT_REFLECT(ArrayLengthFromUniformOptions,
+                 ubo_binding,
+                 buffer_sizes_offset,
+                 bindpoint_to_size_index);
 };
 
 struct ArrayOffsetFromUniformOptions {
diff --git a/src/tint/lang/hlsl/writer/raise/raise.cc b/src/tint/lang/hlsl/writer/raise/raise.cc
index ac6819f..422ce7e 100644
--- a/src/tint/lang/hlsl/writer/raise/raise.cc
+++ b/src/tint/lang/hlsl/writer/raise/raise.cc
@@ -27,10 +27,12 @@
 
 #include "src/tint/lang/hlsl/writer/raise/raise.h"
 
+#include <algorithm>
 #include <unordered_set>
 #include <utility>
 
 #include "src/tint/lang/core/ir/module.h"
+#include "src/tint/lang/core/ir/transform/array_length_from_immediate.h"
 #include "src/tint/lang/core/ir/transform/array_length_from_uniform.h"
 #include "src/tint/lang/core/ir/transform/binary_polyfill.h"
 #include "src/tint/lang/core/ir/transform/binding_remapper.h"
@@ -53,6 +55,7 @@
 #include "src/tint/lang/core/ir/transform/value_to_let.h"
 #include "src/tint/lang/core/ir/transform/vectorize_scalar_matrix_constructors.h"
 #include "src/tint/lang/core/ir/transform/zero_init_workgroup_memory.h"
+#include "src/tint/lang/core/type/array.h"
 #include "src/tint/lang/core/type/u32.h"
 #include "src/tint/lang/core/type/vector.h"
 #include "src/tint/lang/hlsl/writer/common/option_helpers.h"
@@ -84,6 +87,19 @@
     RUN_TRANSFORM(core::ir::transform::SubstituteOverrides, module,
                   options.substitute_overrides_config);
 
+    // PopulateBindingRelatedOptions must come before PrepareImmediateData so that
+    // buffer_sizes_offset is available when configuring immediate data.
+    tint::transform::multiplanar::BindingsMap multiplanar_map{};
+    RemapperData remapper_data{};
+    ArrayLengthFromUniformOptions array_length_from_uniform_options{};
+    ArrayOffsetFromUniformOptions array_offset_from_uniform_options{};
+    PopulateBindingRelatedOptions(options, remapper_data, multiplanar_map,
+                                  array_length_from_uniform_options,
+                                  array_offset_from_uniform_options);
+
+    // The number of vec4s used to store buffer sizes that will be set into the immediate block.
+    uint32_t buffer_sizes_array_elements_num = 0;
+
     // PrepareImmediateData must come before any transform that needs internal push constants.
     core::ir::transform::PrepareImmediateDataConfig immediate_data_config;
     if (options.first_index_offset) {
@@ -103,20 +119,31 @@
             options.num_workgroups_start_offset.value(),
             module.symbols.New("tint_num_workgroups_start_offset"), module.Types().vec3u());
     }
+
+    if (array_length_from_uniform_options.buffer_sizes_offset) {
+        // Find the largest index declared in the map, in order to determine the number of
+        // elements needed in the array of buffer sizes. The buffer sizes will be packed into
+        // vec4s to satisfy the 16-byte alignment requirement for array elements in constant
+        // buffers.
+        uint32_t max_index = 0;
+        for (const auto& entry : array_length_from_uniform_options.bindpoint_to_size_index) {
+            max_index = std::max(max_index, entry.second);
+        }
+        buffer_sizes_array_elements_num = (max_index / 4) + 1;
+
+        immediate_data_config.AddInternalImmediateData(
+            array_length_from_uniform_options.buffer_sizes_offset.value(),
+            module.symbols.New("buffer_sizes"),
+            module.Types().array(module.Types().vec4<core::u32>(),
+                                 buffer_sizes_array_elements_num));
+    }
+
     auto immediate_data_layout =
         core::ir::transform::PrepareImmediateData(module, immediate_data_config);
     if (immediate_data_layout != Success) {
         return immediate_data_layout.Failure();
     }
 
-    tint::transform::multiplanar::BindingsMap multiplanar_map{};
-    RemapperData remapper_data{};
-    ArrayLengthFromUniformOptions array_length_from_uniform_options{};
-    ArrayOffsetFromUniformOptions array_offset_from_uniform_options{};
-    PopulateBindingRelatedOptions(options, remapper_data, multiplanar_map,
-                                  array_length_from_uniform_options,
-                                  array_offset_from_uniform_options);
-
     RUN_TRANSFORM(core::ir::transform::BindingRemapper, module, remapper_data);
     RUN_TRANSFORM(core::ir::transform::MultiplanarExternalTexture, module, multiplanar_map);
 
@@ -192,11 +219,27 @@
         RUN_TRANSFORM(raise::ReplaceDefaultOnlySwitch, module);
     }
 
-    // ArrayLengthFromUniform must run after Robustness, which introduces arrayLength calls.
-    RUN_TRANSFORM(core::ir::transform::ArrayLengthFromUniform, module,
-                  BindingPoint{array_length_from_uniform_options.ubo_binding.group,
-                               array_length_from_uniform_options.ubo_binding.binding},
-                  array_length_from_uniform_options.bindpoint_to_size_index);
+    // ArrayLength must run after Robustness, which introduces arrayLength calls.
+    // TODO(crbug.com/366291600): Replace ArrayLengthFromUniform with ArrayLengthFromImmediates
+    if (array_length_from_uniform_options.buffer_sizes_offset) {
+        // Use ArrayLengthFromImmediates when buffer_sizes_offset is provided.
+        TINT_ASSERT(!array_length_from_uniform_options.ubo_binding.group &&
+                    !array_length_from_uniform_options.ubo_binding.binding);
+
+        RUN_TRANSFORM(core::ir::transform::ArrayLengthFromImmediates, module,
+                      immediate_data_layout.Get(),
+                      array_length_from_uniform_options.buffer_sizes_offset.value(),
+                      buffer_sizes_array_elements_num,
+                      array_length_from_uniform_options.bindpoint_to_size_index);
+    } else {
+        // Always fall back to ArrayLengthFromUniform when buffer_sizes_offset is not provided.
+        // This preserves the behavior from before ArrayLengthFromImmediates was introduced,
+        // ensuring that arrayLength() calls are properly handled even without explicit options.
+        RUN_TRANSFORM(core::ir::transform::ArrayLengthFromUniform, module,
+                      BindingPoint{array_length_from_uniform_options.ubo_binding.group,
+                                   array_length_from_uniform_options.ubo_binding.binding},
+                      array_length_from_uniform_options.bindpoint_to_size_index);
+    }
 
     if (!options.disable_workgroup_init) {
         // Must run before ShaderIO as it may introduce a builtin parameter (local_invocation_index)