mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 10:33:59 -05:00
Helion is a Python-embedded domain-specific language (DSL) for authoring machine learning kernels, designed to compile down to Triton. https://github.com/pytorch/helion I mostly followed the Triton example, please let me know if there's more I need to do or anything I missed. infra PR: https://github.com/compiler-explorer/infra/pull/1875
30 lines
693 B
Python
30 lines
693 B
Python
import torch
|
|
import helion
|
|
import helion.language as hl
|
|
|
|
|
|
@helion.kernel(config=helion.Config(
|
|
block_sizes=[],
|
|
indexing='pointer',
|
|
load_eviction_policies=['', ''],
|
|
num_stages=2,
|
|
num_warps=4,
|
|
pid_type='flat',
|
|
range_flattens=[None],
|
|
range_multi_buffers=[None],
|
|
range_num_stages=[0],
|
|
range_unroll_factors=[0],
|
|
range_warp_specializes=[]
|
|
), static_shapes=True)
|
|
def add(x: torch.Tensor, y: torch.Tensor, scale: float) -> torch.Tensor:
|
|
out = torch.empty_like(x)
|
|
for idx in hl.grid(x.size()):
|
|
out[idx] = (x[idx] + y[idx]) * scale
|
|
return out
|
|
|
|
|
|
x = torch.rand(1024).cuda()
|
|
y = torch.rand(1024).cuda()
|
|
scale = 2.0
|
|
result = add(x, y, scale)
|