Refactor reduction loop config spec

jansel · jansel · commit b49bd50e6c85 · 2025-06-03T19:18:48.000-07:00
stack-info: PR: #128, branch: jansel/stack/24
diff --git a/helion/_compiler/device_ir.py b/helion/_compiler/device_ir.py
@@ -331,13 +331,14 @@ def build_rolled_reductions(self) -> None:
                 allow_loop = allow_loop or reduction_info.used_rdim
                 self.rolled_reductions.append(reduction_info)
                 graph_to_info[graph_id] = reduction_info
-            env.config_spec.reduction_loop_specs.append(
-                ReductionLoopSpec(
-                    size_hint=rdim.size_hint(),
-                    # TODO(jansel): we should add support for rolling multiple dims at once
-                    allow_loop=allow_loop and first,
+            if allow_loop and first:
+                # TODO(jansel): we should add support for rolling multiple dims at once
+                env.config_spec.reduction_loops.append(
+                    ReductionLoopSpec(
+                        block_id=rdim.block_size_idx,
+                        size_hint=rdim.size_hint(),
+                    )
                 )
-            )
             first = False
 
     def __enter__(self) -> None:
diff --git a/helion/_compiler/tile_dispatch.py b/helion/_compiler/tile_dispatch.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import collections
 import functools
 import operator
 from typing import TYPE_CHECKING
@@ -99,18 +98,16 @@ def _add_loop_strategy(
     def _add_reduction_strategies(self, fn: DeviceFunction, config: Config) -> None:
         env = CompileEnvironment.current()
         rdims = [bs.block_size_idx for bs in env.block_sizes if bs.reduction]
-        reduction_loops = collections.deque(config.reduction_loops)
-        for rdim_index, rdim_spec in zip(
-            rdims, env.config_spec.reduction_loop_specs, strict=True
-        ):
-            reduction_loop = reduction_loops.popleft() if rdim_spec.allow_loop else None
+        for block_id in rdims:
+            reduction_loop = env.config_spec.reduction_loops.config_get(
+                config.reduction_loops, block_id, None
+            )
             if reduction_loop is None:
-                strategy: TileStrategy = PersistentReductionStrategy(fn, rdim_index)
+                strategy: TileStrategy = PersistentReductionStrategy(fn, block_id)
             else:
-                strategy = LoopedReductionStrategy(fn, rdim_index, reduction_loop)
+                strategy = LoopedReductionStrategy(fn, block_id, reduction_loop)
             self.strategies.append(strategy)
-            self.block_indices_to_strategy[(rdim_index,)] = strategy
-        assert not reduction_loops
+            self.block_indices_to_strategy[(block_id,)] = strategy
 
     def codegen_grid(self, state: CodegenState, block_indices: list[int]) -> None:
         strategy = self.block_indices_to_strategy[tuple(block_indices)]
diff --git a/helion/autotuner/config_spec.py b/helion/autotuner/config_spec.py
@@ -70,6 +70,11 @@ def _fragment(self, base: ConfigSpec) -> ConfigSpecFragment:
         """Return the fragment used for autotunging for this item."""
         raise NotImplementedError
 
+    def _flat_config(
+        self, base: ConfigSpec, fn: Callable[[ConfigSpecFragment], object]
+    ) -> object:
+        return fn(self._fragment(base))
+
 
 _BlockIdItemT = TypeVar("_BlockIdItemT", bound=_BlockIdItem)
 
@@ -153,7 +158,7 @@ def _flat_config(
         self, base: ConfigSpec, fn: Callable[[ConfigSpecFragment], object]
     ) -> list[object]:
         """Map a flattened version of the config using the given function."""
-        return [fn(spec._fragment(base)) for spec in self._data]
+        return [spec._flat_config(base, fn) for spec in self._data]
 
     def _normalize(
         self, name: str, values: object, *, flatten: bool = False
@@ -219,9 +224,8 @@ class ConfigSpec:
     flatten_loops: BlockIdSequence[FlattenLoopSpec] = dataclasses.field(
         default_factory=BlockIdSequence
     )
-    # TODO(jansel): convert this to a BlockIdSequence[ReductionLoopSpec]
-    reduction_loop_specs: list[ReductionLoopSpec] = dataclasses.field(
-        default_factory=list
+    reduction_loops: BlockIdSequence[ReductionLoopSpec] = dataclasses.field(
+        default_factory=BlockIdSequence
     )
     allow_use_yz_grid: bool | None = None
 
@@ -254,15 +258,12 @@ def normalize(self, config: helion.Config | dict[str, object]) -> None:
             ("flatten_loops", self.flatten_loops, True),
             ("l2_groupings", self.l2_groupings, True),
             ("loop_orders", self.loop_orders, False),
+            ("reduction_loops", self.reduction_loops, True),
         ]:
             config[name] = mapping._normalize(
                 name, config.get(name, ()), flatten=flatten
             )
 
-        config["reduction_loops"] = self.normalize_reduction_loops(
-            config.get("reduction_loops", None)
-        )
-
         for name in ("loop_orders", "l2_groupings", "flatten_loops", "reduction_loops"):
             if not config[name]:
                 config.pop(name)
@@ -278,22 +279,6 @@ def normalize(self, config: helion.Config | dict[str, object]) -> None:
         if invalid_keys := ({*config} - VALID_KEYS):
             raise InvalidConfig(f"Invalid config keys {sorted(invalid_keys)!r}")
 
-    def normalize_reduction_loops(self, reduction_loops: object) -> list[int | None]:
-        assert isinstance(reduction_loops, (list, tuple, type(None), int))
-        loops = [spec for spec in self.reduction_loop_specs if spec.allow_loop]
-        if reduction_loops is None:
-            reduction_loops = [None for _ in loops]
-        elif isinstance(reduction_loops, int):
-            reduction_loops = [reduction_loops]
-        if len(reduction_loops) != len(loops):
-            raise InvalidConfig(
-                f"Invalid number of reduction loops, expected {len(loops)} got {len(reduction_loops)}"
-            )
-        return [
-            spec.normalize(value)
-            for spec, value in zip(loops, reduction_loops, strict=True)
-        ]
-
     def default_config(self) -> helion.Config:
         return self.flat_config(lambda x: x.default())
 
@@ -304,11 +289,7 @@ def flat_config(self, fn: Callable[[ConfigSpecFragment], object]) -> helion.Conf
             "loop_orders": self.loop_orders._flat_config(self, fn),
             "flatten_loops": self.flatten_loops._flat_config(self, fn),
             "l2_groupings": self.l2_groupings._flat_config(self, fn),
-            "reduction_loops": [
-                spec.flat_reduction_loop(fn)
-                for spec in self.reduction_loop_specs
-                if spec.allow_loop
-            ],
+            "reduction_loops": self.reduction_loops._flat_config(self, fn),
             "num_warps": fn(NumWarpsFragment(1, 32, DEFAULT_NUM_WARPS)),
             "num_stages": fn(IntegerFragment(1, 8, DEFAULT_NUM_STAGES)),
             "indexing": fn(
@@ -354,7 +335,7 @@ def _fill_missing(self) -> list[int]:
 
 
 class _PowerOfTwoBlockIdItem(_BlockIdItem):
-    def _normalize(self, name: str, value: object) -> int:
+    def _normalize(self, name: str, value: object) -> int | None:
         try:
             return assert_integer_power_of_two(value)
         except InvalidConfig:
@@ -413,7 +394,7 @@ def update_hint(self, value: int) -> None:
     def _fragment(self, base: ConfigSpec) -> BlockSizeFragment:
         total_ndim = len(base.block_sizes)
         reduction_numel = _product(
-            [next_power_of_2(spec.size_hint) for spec in base.reduction_loop_specs]
+            [next_power_of_2(spec.size_hint) for spec in base.reduction_loops]
         )
         if total_ndim <= 1 and reduction_numel <= 1:
             default = 1024
@@ -443,31 +424,36 @@ def _fill_missing(self) -> bool:
         return False
 
 
-@dataclasses.dataclass
-class ReductionLoopSpec:
-    size_hint: int
-    allow_loop: bool
-
-    def normalize(self, value: int | None) -> int | None:
-        if value is None:
-            return None
-        assert_integer_power_of_two(value)
-        if value < 0 or value >= next_power_of_2(self.size_hint):
-            raise InvalidConfig(
-                f"Invalid reduction loop value {value!r}, expected 0 to {next_power_of_2(self.size_hint)}"
-            )
-        return value
+class ReductionLoopSpec(_PowerOfTwoBlockIdItem):
+    def __init__(
+        self,
+        *,
+        block_id: int,
+        size_hint: int,
+    ) -> None:
+        super().__init__([block_id])
+        self.size_hint = size_hint
 
-    def flat_reduction_loop(self, fn: Callable[[ConfigSpecFragment], object]) -> object:
-        assert self.allow_loop
+    def _flat_config(
+        self, base: ConfigSpec, fn: Callable[[ConfigSpecFragment], object]
+    ) -> int | None:
         low = 8  # TODO(jansel): is smaller needed?
         high = next_power_of_2(self.size_hint)
         default = min(high, 4096)
         value = fn(BlockSizeFragment(low, high, default))
-        if value == high:
+        assert isinstance(value, int)
+        if value >= self.size_hint:
             return None  # max size becomes persistent reduction
         return value
 
+    def _normalize(self, name: str, value: object) -> int | None:
+        if value is None:
+            return None
+        return super()._normalize(name, value)
+
+    def _fill_missing(self) -> None:
+        return None
+
 
 def _product(seq: Sequence[int]) -> int:
     """Return the product of the elements in the sequence."""
diff --git a/test/test_specialize.py b/test/test_specialize.py
@@ -157,7 +157,7 @@ def fn(
         x = torch.randn([512, 512], device=DEVICE)
         code, result = code_and_output(fn, (x,), block_size=32)
         torch.testing.assert_close(result, x + 1)
-        self.assertFalse(fn.bind((x,)).config_spec.reduction_loop_specs[0].allow_loop)
+        self.assertEqual(len(fn.bind((x,)).config_spec.reduction_loops), 0)
         self.assertExpectedInline(
             code,
             """\
@@ -214,7 +214,7 @@ def fn(
         x = torch.randn([500, 500], device=DEVICE)
         code, result = code_and_output(fn, (x,), block_size=32)
         torch.testing.assert_close(result, x + 1)
-        self.assertFalse(fn.bind((x,)).config_spec.reduction_loop_specs[0].allow_loop)
+        self.assertEqual(len(fn.bind((x,)).config_spec.reduction_loops), 0)
         self.assertIs(
             fn.bind((x,)),
             fn.bind((torch.zeros_like(x),)),
@@ -278,7 +278,7 @@ def fn(
         x = torch.randn([500, 500], device=DEVICE)
         code, result = code_and_output(fn, (x,), block_size=32)
         torch.testing.assert_close(result, x.sum(-1))
-        self.assertTrue(fn.bind((x,)).config_spec.reduction_loop_specs[0].allow_loop)
+        self.assertEqual(len(fn.bind((x,)).config_spec.reduction_loops), 1)
         self.assertExpectedInline(
             code,
             """\