Skip to content

Instantly share code, notes, and snippets.

@shunting314
Created June 13, 2025 19:47
Show Gist options
  • Save shunting314/63d7fcee93e49c7700507fcf101e75a4 to your computer and use it in GitHub Desktop.
Save shunting314/63d7fcee93e49c7700507fcf101e75a4 to your computer and use it in GitHub Desktop.
import torch
from triton.testing import do_bench
import functools
from torch._inductor import config
from torch._dynamo.decorators import mark_dynamic
import os
@torch.compile
def f(x):
return x.sum(dim=-1)
N = 50
C = functools.partial(torch.randn, device="cuda")
x_large = C(N, 4096 * 4)
f(x_large)
ms = do_bench(lambda: f(x_large))
# 0.037ms v.s. 0.025ms
print(ms)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment