Prototyping an hl.atomic opp

drisspg · drisspg · commit bf8c8989b35b · 2025-05-22T19:31:22.000-07:00
stack-info: PR: #63, branch: drisspg/stack/5
diff --git a/helion/_compiler/device_ir.py b/helion/_compiler/device_ir.py
@@ -696,6 +696,9 @@ def visit_Attribute(self, node: ast.Attribute) -> object:
                 raise exc.CantReadOnDevice(type_info) from None
         return getattr(self.visit(node.value), node.attr)
 
+    def visit_Expr(self, node):
+        return self.visit(node.value)
+
     def visit_Constant(self, node: ast.Constant) -> object:
         return node.value
 
diff --git a/helion/_compiler/indexing_strategy.py b/helion/_compiler/indexing_strategy.py
@@ -277,10 +277,9 @@ def create(
                         mask_values.setdefault(f"({mask}){expand}")
                 output_idx += 1
             else:
-                raise exc.InvalidIndexingType(k)
+                raise exc.InvalidIndexingType(type(k))
         assert len(output_size) == output_idx - first_non_grid_index
         assert len(index_values) == fake_value.ndim
-
         index_expr = []
         for i, idx in enumerate(index_values):
             if fake_value.size(i) != 1:
diff --git a/helion/language/__init__.py b/helion/language/__init__.py
@@ -6,6 +6,7 @@
 from .loops import grid as grid
 from .loops import register_block_size as register_block_size
 from .loops import tile as tile
+from .memory_ops import atomic_add as atomic_add
 from .memory_ops import load as load
 from .memory_ops import store as store
 from .view_ops import subscript as subscript
diff --git a/helion/language/_decorators.py b/helion/language/_decorators.py
@@ -38,6 +38,32 @@ def __call__(self, fn: Callable[..., _T]) -> object: ...
 
 
 class APIFunc(Protocol):
+    """Protocol for Helion API functions that define operations within kernel code.
+
+    This protocol defines the interface for functions decorated with @api. These functions
+    represent operations that can be called in Helion kernel code and are compiled
+    into the final device code.
+
+    Attributes:
+        __qualname__: The qualified name of the function.
+        _helion_api: A literal True marker indicating this is a Helion API function.
+        _is_device_loop: Whether this API function can transition between host and device code.
+            When True, the function can contain both host and device code sections.
+        _is_device_only: Whether this API function is intended for device code only.
+            When True, the function can only be used within device code sections.
+        _tiles_as_sizes: Whether tile indices should be converted to sizes automatically.
+            Used primarily with tiling operations to transform indices to dimensions.
+        _cache_type: Whether to cache the type information for repeated calls.
+        _type_function: A callable that determines the return type of this function
+            during type propagation phase.
+        _codegen: A callable that generates the device code for this function.
+        _fake_fn: A callable that provides a "fake" implementation used during
+            tracing and compilation.
+        _prepare_args: A callable that preprocesses the arguments before they're
+            passed to the actual function implementation.
+        _signature: The function signature for binding and validating arguments.
+    """
+
     __qualname__: str
     _helion_api: Literal[True]
     # a device loop can transition between host and device code
diff --git a/helion/language/memory_ops.py b/helion/language/memory_ops.py
@@ -14,7 +14,7 @@
 
     from .._compiler.inductor_lowering import CodegenState
 
-__all__ = ["load", "store"]
+__all__ = ["atomic_add", "load", "store"]
 
 
 @has_side_effect
@@ -53,6 +53,14 @@ def _(state: CodegenState) -> ast.AST:
 
 @_decorators.api(tiles_as_sizes=True)
 def load(tensor: torch.Tensor, index: list[object]) -> torch.Tensor:
+    """Load a value from a tensor using a list of indices.
+
+    Args:
+        tensor: The tensor to load from
+        index: The indices to use to index into the tensor
+    Returns:
+        torch.Tensor: The loaded value
+    """
     raise exc.NotInsideKernel
 
 
@@ -70,3 +78,85 @@ def _(state: CodegenState) -> ast.AST:
     return state.device_function.indexing_strategy.codegen_load(
         state, tensor, [*subscript]
     )
+
+
+@has_side_effect
+@_decorators.api()
+def atomic_add(
+    target: torch.Tensor,
+    index: list[object],
+    value: torch.Tensor | float,
+    sem: str = "relaxed",
+) -> None:
+    """
+    Atomically add a value to a target tensor.
+
+    Args:
+        target: The tensor to add to
+        index: Indices into target for way to accumulate values
+        value: The value to add
+        sem: The memory ordering semantics (default: 'relaxed')
+
+    Returns:
+        None
+    """
+    raise exc.NotInsideKernel
+
+
+@_decorators.prepare_args(atomic_add)
+def _(
+    target: torch.Tensor,
+    index: list[object],
+    value: torch.Tensor | float,
+    sem: str = "relaxed",
+) -> tuple[torch.Tensor, object, torch.Tensor | float | int, str]:
+    from helion._compiler.tile_index_proxy import TileIndexProxy
+
+    valid_sems = {"relaxed", "acquire", "release", "acq_rel"}
+    if sem not in valid_sems:
+        raise ValueError(
+            f"Invalid memory semantic '{sem}'. Must be one of {valid_sems}."
+        )
+
+    index = TileIndexProxy.prepare_index(index)
+    index = TileIndexProxy.tiles_to_sizes(index)
+
+    return (target, index, value, sem)
+
+
+@_decorators.register_fake(atomic_add)
+def _(
+    target: torch.Tensor, index: list[object], value: torch.Tensor, sem: str = "relaxed"
+) -> None:
+    return None
+
+
+@_decorators.codegen(atomic_add)
+def _(state: CodegenState) -> ast.AST:
+    import ast
+
+    from .._compiler.ast_extension import expr_from_string
+
+    target = state.proxy_arg(0)
+    index = state.proxy_arg(1)
+    value = state.proxy_arg(2)
+    sem = expr_from_string(f"'{state.proxy_arg(3)}'")
+
+    assert isinstance(target, torch.Tensor)
+
+    indices = SubscriptIndexing.create(state, target, index)
+    name = state.device_function.tensor_arg(target).name
+
+    value_expr = (
+        state.ast_args[2]
+        if isinstance(value, torch.Tensor)
+        else ast.Constant(value=value)
+    )
+
+    return expr_from_string(
+        f"tl.atomic_add({name} + offset, value, mask=mask, sem=sem)",
+        value=value_expr,
+        offset=indices.index_expr,
+        mask=indices.mask_expr,
+        sem=sem,
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -87,4 +87,3 @@ exclude = [
 
 [tool.hatch.metadata]
 allow-direct-references = true
-
diff --git a/test/test_atomic_add.py b/test/test_atomic_add.py

Original file line number	Diff line number	Diff line change
`@@ -87,4 +87,3 @@ exclude = [`
`87`	`87`
`88`	`88`	`[tool.hatch.metadata]`
`89`	`89`	`allow-direct-references = true`
`90`		`-`