Prototyping an hl.atomic opp

drisspg · drisspg · commit 804cd241f087 · 2025-05-20T11:52:40.000-07:00
stack-info: PR: #63, branch: drisspg/stack/5
diff --git a/helion/_compiler/device_ir.py b/helion/_compiler/device_ir.py
@@ -693,6 +693,15 @@ def visit_Attribute(self, node: ast.Attribute) -> object:
                 raise exc.CantReadOnDevice(type_info) from None
         return getattr(self.visit(node.value), node.attr)
 
+    def visit_Expr(self, node):
+        # Check if this is a call to a known Helion operation
+        return self.visit(node.value)
+        if isinstance(node.value, ast.Call) and self._is_helion_op(node.value.func):
+            # Handle the specific operation
+            return self.visit(node.value)
+        # Unsupported generic expression
+        raise exc.StatementNotSupported(type(node).__name__)
+
     def visit_Constant(self, node: ast.Constant) -> object:
         return node.value
 
diff --git a/helion/_compiler/indexing_strategy.py b/helion/_compiler/indexing_strategy.py
@@ -254,7 +254,6 @@ def create(
                 raise exc.InvalidIndexingType(k)
         assert len(output_size) == output_idx
         assert len(index_values) == fake_value.ndim
-
         index_expr = []
         for i, idx in enumerate(index_values):
             if fake_value.size(i) != 1:
diff --git a/helion/language/__init__.py b/helion/language/__init__.py
@@ -5,6 +5,7 @@
 from .creation_ops import zeros as zeros
 from .loops import register_block_size as register_block_size
 from .loops import tile as tile
+from .memory_ops import atomic_add as atomic_add
 from .memory_ops import load as load
 from .memory_ops import store as store
 from .view_ops import subscript as subscript
diff --git a/helion/language/_decorators.py b/helion/language/_decorators.py
@@ -38,6 +38,32 @@ def __call__(self, fn: Callable[..., _T]) -> object: ...
 
 
 class APIFunc(Protocol):
+    """Protocol for Helion API functions that define operations within kernel code.
+
+    This protocol defines the interface for functions decorated with @api. These functions
+    represent operations that can be called in Helion kernel code and are compiled
+    into the final device code.
+
+    Attributes:
+        __qualname__: The qualified name of the function.
+        _helion_api: A literal True marker indicating this is a Helion API function.
+        _is_device_loop: Whether this API function can transition between host and device code.
+            When True, the function can contain both host and device code sections.
+        _is_device_only: Whether this API function is intended for device code only.
+            When True, the function can only be used within device code sections.
+        _tiles_as_sizes: Whether tile indices should be converted to sizes automatically.
+            Used primarily with tiling operations to transform indices to dimensions.
+        _cache_type: Whether to cache the type information for repeated calls.
+        _type_function: A callable that determines the return type of this function
+            during type propagation phase.
+        _codegen: A callable that generates the device code for this function.
+        _fake_fn: A callable that provides a "fake" implementation used during
+            tracing and compilation.
+        _prepare_args: A callable that preprocesses the arguments before they're
+            passed to the actual function implementation.
+        _signature: The function signature for binding and validating arguments.
+    """
+
     __qualname__: str
     _helion_api: Literal[True]
     # a device loop can transition between host and device code
diff --git a/helion/language/memory_ops.py b/helion/language/memory_ops.py
@@ -14,7 +14,7 @@
 
     from .._compiler.inductor_lowering import CodegenState
 
-__all__ = ["load", "store"]
+__all__ = ["atomic_add", "load", "store"]
 
 
 @has_side_effect
@@ -53,6 +53,14 @@ def _(state: CodegenState) -> ast.AST:
 
 @_decorators.api(tiles_as_sizes=True)
 def load(tensor: torch.Tensor, index: list[object]) -> torch.Tensor:
+    """Load a value from a tensor using a list of indices.
+
+    Args:
+        tensor: The tensor to load from
+        index: The indices to use to index into the tensor
+    Returns:
+        torch.Tensor: The loaded value
+    """
     raise exc.NotInsideKernel
 
 
@@ -70,3 +78,60 @@ def _(state: CodegenState) -> ast.AST:
     return state.device_function.indexing_strategy.codegen_load(
         state, tensor, [*subscript]
     )
+
+
+@has_side_effect
+@_decorators.api(tiles_as_sizes=True)
+def atomic_add(target: torch.Tensor, index: list[object], value: torch.Tensor) -> None:
+    """
+    Atomically add a value to a target tensor.
+
+    Args:
+        target (torch.Tensor): The tensor to add to
+        value (torch.Tensor): The value to add
+
+    Returns:
+        None
+    """
+    raise exc.NotInsideKernel
+
+
+@_decorators.prepare_args(atomic_add)
+def _(
+    target: torch.Tensor, index: object, value: torch.Tensor
+) -> tuple[torch.Tensor, object, torch.Tensor]:
+    from helion._compiler.tile_index_proxy import TileIndexProxy
+
+    assert value.dtype == target.dtype, (
+        f"Expected value dtype {target.dtype}, got {value.dtype}"
+    )
+    # Convert tile indices to proper indices
+    if isinstance(index, (list, tuple)):
+        index = TileIndexProxy.tiles_to_sizes(index)
+    return (target, index, value)
+
+
+@_decorators.register_fake(atomic_add)
+def _(target: torch.Tensor, index: list[object], value: torch.Tensor) -> None:
+    return None
+
+
+@_decorators.codegen(atomic_add)
+def _(state: CodegenState) -> ast.AST:
+    target = state.proxy_arg(0)
+    index = state.proxy_arg(1)
+    value = state.proxy_arg(2)
+    assert isinstance(target, torch.Tensor)
+    assert isinstance(value, torch.Tensor)
+
+    from .._compiler.ast_extension import expr_from_string
+
+    indices = SubscriptIndexing.create(state, target, index)
+    name = state.device_function.tensor_arg(target).name
+    return expr_from_string(
+        f"tl.atomic_add({name} + offset, value, mask=mask, sem=sem)",
+        value=state.ast_args[2],
+        offset=indices.index_expr,
+        mask=indices.mask_expr,
+        sem=expr_from_string("'relaxed'"),
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -87,4 +87,3 @@ exclude = [
 
 [tool.hatch.metadata]
 allow-direct-references = true
-
diff --git a/test/test_atomic_add.py b/test/test_atomic_add.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+import torch
+
+import helion
+from helion._testing import code_and_output
+import helion.language as hl
+
+
+@helion.kernel()
+def atomic_add_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """Test basic atomic_add functionality."""
+    for i in hl.tile([x.size(0)]):
+        hl.atomic_add(x[i], y[i])
+    return x
+
+
+@helion.kernel()
+def atomic_add_overlap_kernel(
+    x: torch.Tensor, y: torch.Tensor, indices: torch.Tensor
+) -> torch.Tensor:
+    """Test atomic_add with overlapping indices."""
+    for i in hl.tile([y.size(0)]):
+        idx = indices[i]
+        hl.atomic_add(x[idx], y[i])
+    return x
+
+
+def test_atomic_add():
+    """Test atomic add operation."""
+    # Basic test
+    x = torch.zeros(10, device="cuda")
+    y = torch.ones(10, device="cuda")
+
+    result = atomic_add_kernel(x, y)
+    assert torch.allclose(result, y), f"Expected {y}, got {result}"
+
+    # Test with overlapping indices
+    x = torch.zeros(5, device="cuda")
+    y = torch.ones(10, device="cuda")
+    indices = torch.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], device="cuda")
+
+    result = atomic_add_overlap_kernel(x, y, indices)
+    expected = torch.ones(5, device="cuda") * 2
+    assert torch.allclose(result, expected), f"Expected {expected}, got {result}"
+
+
+def test_atomic_add_code():
+    """Test that the atomic_add code is correctly generated."""
+    code, _ = code_and_output(atomic_add_kernel)
+    # Ensure "atomic_add" appears in the generated code
+    assert "atomic_add" in code, f"Expected 'atomic_add' in generated code, got: {code}"
+
+
+if __name__ == "__main__":
+    test_atomic_add()
+    test_atomic_add_code()
+    print("All tests passed!")

Original file line number	Diff line number	Diff line change
`@@ -87,4 +87,3 @@ exclude = [`
`87`	`87`
`88`	`88`	`[tool.hatch.metadata]`
`89`	`89`	`allow-direct-references = true`
`90`		`-`