Skip to content

Instantly share code, notes, and snippets.

@vadimkantorov
vadimkantorov / to_.py
Last active July 25, 2025 12:56
Inplace downcasting in PyTorch
# https://github.com/pytorch/pytorch/issues/158710
# https://github.com/pytorch/pytorch/issues/158698
# https://github.com/pytorch/pytorch/issues/69431
import torch
def to_(tensor1d, dtype, *, split_size = 0, chunks = 0):
# instead of clone() maybe could copy_ into a buffer, clone() does not allow using a buffer
assert tensor1d.ndim == 1
@vadimkantorov
vadimkantorov / base64_torch.py
Last active July 19, 2025 18:54
Base64 decoding in PyTorch
# https://en.wikipedia.org/wiki/Base64
# 00123456 00ABCDEF 00abcdef 00uvwxyz
# 123456AB CDEFabcd efuvwxyz
# this code does not support batches. adapting for e.g. concatenated varlen format is possible, but need to handle/preserve varlen information and paddings in some way
import torch
def base64_encode_padded(input_as_uint8_tensor):
base64_alphabet, base64_pad = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', '='
encode = torch.tensor(list(map(ord, base64_alphabet)), dtype = torch.uint8, device = input_as_uint8_tensor.device)
@vadimkantorov
vadimkantorov / .nebius.s3cfg
Created July 16, 2025 13:24
s3cmd config .s3cfg for Nebius s3
# Nebius s3
# sudo apt-get install s3cmd # export the vars below or configure access_key / secret_key
# export AWS_ACCESS_KEY_ID=...
# export AWS_SECRET_ACCESS_KEY=...
# https://s3tools.org/usage
# s3cmd -c ~/.nebius.s3cfg ls
[default]
# access_key=...
# secret_key=...
@vadimkantorov
vadimkantorov / sitecustomize.py
Created July 10, 2025 18:03
Install a OOM hook in PyTorch
# PYTHONPATH=. python ...
import os
import torch
def cuda_oom_hook(device, alloc, device_alloc, device_free, info = dict(counter = 0), snapshot_dump_file_pattern = './memory_snapshot_{pid}_{oom_counter}.pt'):
memory_summary = torch.cuda.memory_summary(device = device)
memory_snapshot = torch.cuda.memory._snapshot(device = device)
pid = os.getpid()
print('device:', device, 'oom#:', info['oom_counter'], 'pid:', pid, 'alloc:', alloc, 'device_alloc:', device_alloc, 'device_free:', device_free)
@vadimkantorov
vadimkantorov / multiprocessing_pool_batched.py
Last active May 27, 2025 09:45
Example of using multiprocessing with explicitly batched inputs
import multiprocessing
import itertools
inputs = list(range(111))
batchsize = 10
num_workers = 4
batches = itertools.batched(inputs, batchsize)
def reducer(xs):
@vadimkantorov
vadimkantorov / cache_hf_model.sh
Created May 23, 2025 17:26
Downloads and tests import of a HuggingFace model into a meta device (and thus does not use any GPU/CPU memory)
# Usage: bash cache_hf_model.sh Qwen/Qwen3-8B
# export HF_HOME=/my/cache/HF_HOME
python -c 'import sys, transformers; transformers.AutoModel.from_pretrained(sys.argv[-1], trust_remote_code=True, device_map="meta")' $@
@vadimkantorov
vadimkantorov / minidotenv.py
Created May 22, 2025 18:40
toml can abused to read some basic key-value pairs as well
def load_dotenv(dotenv_path = '.env'):
# https://snarky.ca/use-toml-for-env-files/
# https://github.com/theskumar/python-dotenv
'''
# such simple key-value files are toml subset and can be read via tomllib without external packages or hacks
a="b"
c="d"
'''
import os, tomllib
os.environ.update(tomllib.load(open(dotenv_path, 'rb')))
@vadimkantorov
vadimkantorov / catfsspec.py
Last active June 12, 2025 11:50
Basic example of using fsspec explaining some quirks on porting from regular Python I/O
import sys, fsspec
with fsspec.open(sys.argv[1], 'rt') as f: # must pass 'rt' explicitly, as in fsspec the default mode is 'rb'
print(f.read()) # msut use context manager as in fsspec the result of fsspec.open(...) does not have method read()
# echo world > hello.txt
# python catfsspec.py hello.txt
# python catfsspec.py file://hello.txt
# python catfsspec.py s3://mybucket/hello.txt
@vadimkantorov
vadimkantorov / git_private_fork.sh
Last active June 18, 2025 08:53
Create a private fork of verl
# reference: https://gist.github.com/0xjac/85097472043b697ab57ba1b1c7530274
git clone --bare [email protected]:volcengine/verl.git
cd verl.git
# create a bare repo vaidmkantorov/verl
git push --mirror [email protected]:vadimkantorov/verl.git
cd .. && rm -rf verl.git
# set up upstream remote
@vadimkantorov
vadimkantorov / tqdm.py
Last active May 13, 2025 13:05
Extremely simplified single-file, 20 LOC version of https://tqdm.github.io/docs/tqdm/ for debugging tqdm bugs like https://github.com/tqdm/tqdm/issues/760 or dropping the full dependency
# Save as tqdm.py in project dir, then `from tqdm import tqdm; from tqdm.auto import tqdm` should pick up this class, if fails use export PYTHONPATH=.
# Test run: python tqdm.py
import os, sys
# huggingface_hub/hf_api.py:
# from tqdm.auto import tqdm as base_tqdm
# from tqdm.contrib.concurrent import thread_map
# https://tqdm.github.io/docs/shortcuts/#tqdmauto
sys.modules['tqdm.auto'] = sys.modules[__name__]