Skip to content

Instantly share code, notes, and snippets.

View YouJiacheng's full-sized avatar

You Jiacheng YouJiacheng

  • IIIS, Tsinghua University
  • Beijing, China
View GitHub Profile
@YouJiacheng
YouJiacheng / prompt.md
Created June 3, 2025 04:30 — forked from cloneofsimo/prompt.md
Neel's Paper Quality Analysis Prompt

Credit: How to write ML papers by Neel Nanda

You are chatbot that gives constructive analysis of the following work. Specifically, you care about the following criteria:

## Core Narrative Quality
- **Clear Claims**: Contains 1-3 specific, concrete claims that fit within a cohesive theme
- **Strong Motivation**: Clearly explains why readers should care ("so what?")
@YouJiacheng
YouJiacheng / cdf.py
Created May 30, 2025 09:09
supports CUDA, no graph breaks
def cdf(t: Tensor, thresholds: list[float]):
level = torch.bucketize(t, t.new_tensor(thresholds, dtype=torch.float32), out_int32=True) # sum(x > v for v in thresholds)
count = t.new_zeros(len(thresholds) + 1, dtype=torch.int32)
count.index_put_((level,), count.new_ones(1), True)
return count.cumsum(0) / t.numel()
import time
import unicodedata
import regex as re
regex_pattern = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"
# --- Helper Functions for Character Properties ---
@YouJiacheng
YouJiacheng / train_gptm.py
Last active May 10, 2025 12:47
improve logging
with open(__file__) as f:
code = f.read() # read the code of this file ASAP, for logging
import copy
from dataclasses import dataclass
from functools import lru_cache
import os
from pathlib import Path
import time
import uuid
@YouJiacheng
YouJiacheng / pyproject.toml
Created May 6, 2025 04:12
code for repro regression, data available in: https://github.com/KellerJordan/modded-nanogpt/
[project]
name = "modded-nanogpt"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = "==3.12.*"
dependencies = [
"numpy>=2.1.3",
"torch",
"pytorch-triton",
from torch._logging._internal import trace_structured # noqa: E402
import torch._inductor.codecache # noqa: E402
import torch._inductor.graph # noqa: E402
def _patched_trace_structured(name, *args, **kwargs):
if name == "inductor_output_code":
match args, kwargs:
case (metadata_fn, *_), _:
filename = metadata_fn().get("filename", "Unknown")
case _, {"metadata_fn": metadata_fn}:
filename = metadata_fn().get("filename", "Unknown")
from functools import partial
import jax
import jax.numpy as jnp
import optax
def poly(x: jnp.ndarray, w: jnp.ndarray):
assert w.shape == (3,)
w = w.astype(jnp.float32)
import os
import sys
with open(sys.argv[0]) as f:
code = f.read() # read the code of this file ASAP, for logging
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import time
import uuid
from dataclasses import dataclass
from functools import lru_cache, partial
import os
import sys
with open(sys.argv[0]) as f:
code = f.read() # read the code of this file ASAP, for logging
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import time
import uuid
from dataclasses import dataclass
from functools import lru_cache, partial
import os
import sys
from typing import override
with open(sys.argv[0]) as f:
code = f.read() # read the code of this file ASAP, for logging
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import contextlib
import time
import uuid