import os
import torch
from einops import rearrange
from transformers import pipeline
from flux.sampling import denoise, get_noise, get_schedule, unpack, prepare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.nn.attention._flex_attention import _create_block_mask, _create_mask | |
from functools import partial | |
from torch.nn.attention._flex_attention import _flex_attention | |
from triton.testing import do_bench | |
import torch.nn.functional as F | |
from functools import lru_cache | |
torch.set_default_device('cuda') | |
# Example usage |
Ref: Exclusive Q&A: John Carmack’s ‘Different Path’ to Artificial General Intelligence
"So I asked Ilya Sutskever, OpenAI’s chief scientist, for a reading list. He gave me a list of like 40 research papers and said, ‘If you really learn all of these, you’ll know 90% of what matters today.’ And I did. I plowed through all those things and it all started sorting out in my head."
Ref: https://x.com/ID_AA_Carmack/status/1622673143469858816
I rather expected @ilyasut to have made a public post by now after all the discussion of the AI reading list he gave me. A canonical list of references from a leading figure would be appreciated by many. I would be curious myself about what he would add from the last three years.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <tgmath.h> | |
#define max(x,y) ((x>y)?x:y) | |
#define half __fp16 | |
void E_(int* data0) { | |
int val0 = data0[0]; | |
data0[0] = (val0+1); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[55883.721977] amdgpu: map VA 0x702eae9d2000 - 0x702eae9d3000 in entry 0000000072d2b750 | |
[55883.721996] amdgpu: INC mapping count 1 | |
[55883.722133] kfd kfd: amdgpu: ioctl cmd 0xc0184b0c (#0xc), arg 0x7ffe16172bef | |
[55883.722238] gmc_v11_0_process_interrupt: 6 callbacks suppressed | |
[55883.722250] amdgpu 0000:c3:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:24 vmid:8 pasid:32774, for process python3 pid 356134 thread python3 pid 356134) | |
[55883.722343] amdgpu 0000:c3:00.0: amdgpu: in page starting at address 0x00000000aabbc000 from client 10 | |
[55883.722391] amdgpu 0000:c3:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00800A30 | |
[55883.722429] amdgpu 0000:c3:00.0: amdgpu: Faulty UTCL2 client ID: CPC (0x5) | |
[55883.722466] amdgpu 0000:c3:00.0: amdgpu: MORE_FAULTS: 0x0 | |
[55883.722497] amdgpu 0000:c3:00.0: amdgpu: WALKER_ERROR: 0x0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# | |
# TARGET arch is: ['-D__HIP_PLATFORM_AMD__', '-I/opt/rocm/include'] | |
# WORD_SIZE is: 8 | |
# POINTER_SIZE is: 8 | |
# LONGDOUBLE_SIZE is: 16 | |
# | |
import ctypes | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tiny@tiny9:~/tinygrad$ python3 examples/benchmark_copies.py | |
# CPU copy 6.18 ms, 16.28 GB/s | |
# GPU copy 4.38 ms, 23.00 GB/s | |
# GPU 6x 1.85 ms, 54.54 GB/s | |
import time | |
def timeit(fxn): | |
tms = [] | |
for _ in range(10): | |
st = time.perf_counter() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) { | |
int gidx0 = get_group_id(1); /* 512 */ | |
int gidx1 = get_group_id(0); /* 512 */ | |
float2 acc0 = (float2)(0.0f,0.0f); | |
float2 acc1 = (float2)(0.0f,0.0f); | |
for (int ridx0 = 0; ridx0 < 512; ++ridx0) { | |
float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)))); | |
float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024))); | |
float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)))); | |
float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024))); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Callable, List | |
import numpy as np | |
import torch | |
from torch._dynamo.backends.common import aot_autograd | |
from torch.fx.graph_module import GraphModule | |
# NOTE: requires torch >= 2.1.0 | |
def np2torch(fn: Callable) -> Callable: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
ppl_model_name = 'gpt2-xl' if device == 'cuda' else 'gpt2' | |
ppl_tokenizer = GPT2Tokenizer.from_pretrained(ppl_model_name) | |
load_opts = { | |
'device_map': 'auto', |
NewerOlder