Skip to content

Commit 750a34f

Browse files
committed
Correct units for time printouts
do_bench returns ms rather than sec stack-info: PR: #133, branch: jansel/stack/29
1 parent 13c47cf commit 750a34f

File tree

11 files changed

+20
-20
lines changed

11 files changed

+20
-20
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,11 @@ typical autotuning session produces output similar to:
129129

130130
```
131131
[0s] Starting DifferentialEvolutionSearch with population=40, generations=20, crossover_rate=0.8
132-
[20s] Initial population: failed=10 min=0.9677s mid=3.0013s max=22.1430s best=Config(block_sizes=[[64, 32], [32]], loop_orders=[[1, 0]], num_warps=2, num_stages=2, indexing='pointer', l2_grouping=1, use_yz_grid=False)
133-
[52s] Generation 2: replaced=16 min=0.7731s mid=1.7203s max=3.1227s best=Config(block_sizes=[[32, 128], [16]], loop_orders=[[0, 1]], num_warps=4, num_stages=4, indexing='block_ptr', l2_grouping=16)
134-
[85s] Generation 3: replaced=19 min=0.6256s mid=1.3916s max=2.7868s best=Config(block_sizes=[[64, 128], [16]], loop_orders=[[0, 1]], num_warps=4, num_stages=4, indexing='block_ptr', l2_grouping=16)
132+
[20s] Initial population: failed=10 min=0.9677 mid=3.0013 max=22.1430 best=Config(block_sizes=[[64, 32], [32]], loop_orders=[[1, 0]], num_warps=2, num_stages=2, indexing='pointer', l2_grouping=1, use_yz_grid=False)
133+
[52s] Generation 2: replaced=16 min=0.7731 mid=1.7203 max=3.1227 best=Config(block_sizes=[[32, 128], [16]], loop_orders=[[0, 1]], num_warps=4, num_stages=4, indexing='block_ptr', l2_grouping=16)
134+
[85s] Generation 3: replaced=19 min=0.6256 mid=1.3916 max=2.7868 best=Config(block_sizes=[[64, 128], [16]], loop_orders=[[0, 1]], num_warps=4, num_stages=4, indexing='block_ptr', l2_grouping=16)
135135
...
136-
[593s] Generation 19: replaced=7 min=0.6072s mid=0.6626s max=0.7496s best=Config(block_sizes=[[64, 128], [16]], loop_orders=[[1, 0]], num_warps=4, num_stages=3, indexing='block_ptr', l2_grouping=32)
136+
[593s] Generation 19: replaced=7 min=0.6072 mid=0.6626 max=0.7496 best=Config(block_sizes=[[64, 128], [16]], loop_orders=[[1, 0]], num_warps=4, num_stages=3, indexing='block_ptr', l2_grouping=32)
137137
[593s] Autotuning complete in 593.1s after searching 1520 configs.
138138
One can hardcode the best config and skip autotuning with:
139139
@helion.kernel(config=helion.Config(block_sizes=[[64, 128], [16]], loop_orders=[[1, 0]], num_warps=4, num_stages=3, indexing='block_ptr', l2_grouping=32))

examples/add.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def check(m: int, n: int) -> None:
3232
sec = do_bench(lambda: add(x, y))
3333
baseline_sec = do_bench(lambda: torch.add(x, y))
3434
print(
35-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
35+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
3636
)
3737

3838

examples/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def test(
109109
flex_sec = do_bench(lambda: flex_attention(q, k, v))
110110
helion_sec = do_bench(lambda: attention(q, k, v))
111111
print(
112-
f"Helion time: {helion_sec:.4f}s, flex time: {flex_sec:.4f}, torch time: {spda_sec:.4f}"
112+
f"Helion time: {helion_sec:.4f}ms, flex time: {flex_sec:.4f}, torch time: {spda_sec:.4f}"
113113
)
114114

115115

examples/bmm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def check(b: int, m: int, k: int, n: int) -> None:
3535
sec = do_bench(lambda: bmm(x, y))
3636
baseline_sec = do_bench(lambda: torch.bmm(x, y))
3737
print(
38-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
38+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
3939
)
4040

4141

examples/concatenate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def main() -> None:
4141
sec = do_bench(lambda: concat2d_dim1(x, y))
4242
baseline_sec = do_bench(lambda: torch.cat([x, y], dim=1))
4343
print(
44-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
44+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
4545
)
4646

4747

examples/embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def main() -> None:
3434
sec = do_bench(lambda: embedding(x, weight))
3535
baseline_sec = do_bench(lambda: torch.nn.functional.embedding(x, weight))
3636
print(
37-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
37+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
3838
)
3939

4040

examples/long_sum.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def check(m: int, n: int) -> None:
9595
manual_loop_sec = do_bench(lambda: longsum_manual(x))
9696
baseline_sec = do_bench(lambda: baseline_sum(x))
9797
print(
98-
f"Helion Naive time: {sec:.4f}s, Helion Looped Time: {loop_sec:.4f}, Helion Manual Loop Time: {manual_loop_sec:.4f} torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x {baseline_sec / loop_sec:.2f}x {baseline_sec / manual_loop_sec:.2f}x"
98+
f"Helion Naive time: {sec:.4f}ms, Helion Looped Time: {loop_sec:.4f}, Helion Manual Loop Time: {manual_loop_sec:.4f} torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x {baseline_sec / loop_sec:.2f}x {baseline_sec / manual_loop_sec:.2f}x"
9999
)
100100

101101

examples/matmul.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def check(m: int, k: int, n: int) -> None:
3333
sec = do_bench(lambda: matmul(x, y))
3434
baseline_sec = do_bench(lambda: torch.matmul(x, y))
3535
print(
36-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
36+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
3737
)
3838

3939

examples/moe_matmul_ogs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def check(T: int, K: int, N: int, n_experts: int) -> None:
172172
lambda: moe_matmul_ogs_reference(A, W, top1_expert_per_token)
173173
)
174174
print(
175-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}s, speed-up: {baseline_sec / sec:.2f}x"
175+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speed-up: {baseline_sec / sec:.2f}x"
176176
)
177177

178178

examples/softmax.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def check(m: int, n: int) -> None:
6464
sec = do_bench(lambda: softmax(x))
6565
baseline_sec = do_bench(lambda: torch.nn.functional.softmax(x, dim=1))
6666
print(
67-
f"Helion time: {sec:.4f}s, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
67+
f"Helion time: {sec:.4f}ms, torch time: {baseline_sec:.4f}, speedup: {baseline_sec / sec:.2f}x"
6868
)
6969

7070

helion/autotuner/base_search.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def benchmark_function(self, config: Config, fn: CompiledConfig) -> float:
110110
)
111111
t2 = time.perf_counter()
112112
self.log.debug(
113-
lambda: f"result: {res:.4f}s (took {t1 - t0:.1f}s + {t2 - t1:.1f}s)",
113+
lambda: f"result: {res:.4f}ms (took {t1 - t0:.1f}s + {t2 - t1:.1f}s)",
114114
)
115115
return res
116116
except OutOfResources:
@@ -336,15 +336,15 @@ def population_statistics(population: list[PopulationMember]) -> str:
336336
working = [x for x in population if not math.isinf(x.perf)]
337337
return (
338338
f"failed={len(population) - len(working)} "
339-
f"min={working[0].perf:.4f}s "
340-
f"mid={working[len(working) // 2].perf:.4f}s "
341-
f"max={working[-1].perf:.4f}s "
339+
f"min={working[0].perf:.4f} "
340+
f"mid={working[len(working) // 2].perf:.4f} "
341+
f"max={working[-1].perf:.4f} "
342342
f"best={population[0].config!s}"
343343
)
344344
return (
345-
f"min={population[0].perf:.4f}s "
346-
f"mid={population[len(population) // 2].perf:.4f}s "
347-
f"max={population[-1].perf:.4f}s "
345+
f"min={population[0].perf:.4f} "
346+
f"mid={population[len(population) // 2].perf:.4f} "
347+
f"max={population[-1].perf:.4f} "
348348
f"best={population[0].config!s}"
349349
)
350350

0 commit comments

Comments
 (0)