This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <tgmath.h> | |
| typedef float float4 __attribute__((aligned(16),ext_vector_type(4))); | |
| void r_8_112_28_4_4_3_3_3(float* restrict data0_401408, float* restrict data1_150528, float* restrict data2_864, float* restrict data3_32, float* restrict data4_32, float* restrict data5_32, float* restrict data6_32, int core_id) { | |
| float acc0[16]; | |
| float4 cast0 = (float4){0.0f,0.0f,0.0f,0.0f}; | |
| for (int Lidx3 = 0; Lidx3 < 8; Lidx3++) { | |
| int alu0 = (Lidx3<<2); | |
| float4 val0 = (*((float4*)((data3_32+alu0)))); | |
| float4 val1 = (*((float4*)((data4_32+alu0)))); | |
| float4 val2 = (*((float4*)((data5_32+alu0)))); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def k0(): | |
| c0 = UOp(Ops.DEFINE_GLOBAL, dtypes.uint.ptr(1), arg=0) | |
| c2 = c0.index(UOp.const(dtypes.index, 0)) | |
| c3 = c2.load() | |
| c5 = (c3+512) | |
| c6 = c2.store(c5) | |
| ast = c6.sink() | |
| return ast | |
| def k1(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Maybe this helps someone when they Google this. | |
| [ 48%] Building CXX object runtime_src/core/tools/xbtracer/CMakeFiles/xrt_trace.dir/src/wrapper/tracer.cpp.o | |
| /usr/bin/ld: libxbtracer_protobuf.a(func.pb.cc.o): undefined reference to symbol '_ZN4absl12lts_2025081412log_internal21CheckOpMessageBuilder7ForVar2Ev' | |
| /usr/bin/ld: /usr/lib/libabsl_log_internal_check_op.so.2508.0.0: error adding symbols: DSO missing from command line | |
| collect2: error: ld returned 1 exit status | |
| XRT master ❯ git diff | |
| diff --git a/src/runtime_src/core/tools/xbtracer/CMakeLists.txt b/src/runtime_src/core/tools/xbtracer/CMakeLists.txt | |
| index 2276159fb..02631afe9 100644 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| comma@tiny24:/data/openpilot/tinygrad_repo$ python3 openpilot/compile2.py https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx | |
| https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx: 100%|███████████████████████████████████████████| 51.5M/51.5M [00:00<00:00, 88.2MB/s] | |
| cache is out of date, clearing it | |
| /usr/local/pyenv/versions/3.11.4/lib/python3.11/site-packages/pyopencl/__init__.py:528: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more. | |
| lambda: self._prg.build(options_bytes, devices), | |
| 190 schedule items depend on the input, 462 don't | |
| 7 inputs | |
| 13: rewrite input, image dtype dtypes.imageh((16, 2048, 4)), (View(shape=(1, 16, 32, 64, 2), strides=(0, 8192, 256, 4, 1), offset=0, mask=None, contiguous=False), View(shape=(1, 16, 32, 128), strides=(0, 4096, 128, 1), offset=0, mask=None, contiguous=True)) | |
| 24: rewrite input, image dtype dtypes.imageh((8, 2048, 4)), (View(shap |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from huggingface_hub import snapshot_download | |
| from tinygrad import nn, Tensor, TinyJit, Device | |
| import time | |
| class Block: | |
| def __init__(self, in_dims, dims, stride=1): | |
| super().__init__() | |
| self.conv1 = nn.Conv2d( | |
| in_dims, dims, kernel_size=3, stride=stride, padding=1, bias=False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdlib.h> | |
| #include <stdbool.h> | |
| #include <tgmath.h> | |
| #define max(x,y) ((x>y)?x:y) | |
| #define half __fp16 | |
| void E_(int* data0) { | |
| int val0 = data0[0]; | |
| data0[0] = (val0+1); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [55883.721977] amdgpu: map VA 0x702eae9d2000 - 0x702eae9d3000 in entry 0000000072d2b750 | |
| [55883.721996] amdgpu: INC mapping count 1 | |
| [55883.722133] kfd kfd: amdgpu: ioctl cmd 0xc0184b0c (#0xc), arg 0x7ffe16172bef | |
| [55883.722238] gmc_v11_0_process_interrupt: 6 callbacks suppressed | |
| [55883.722250] amdgpu 0000:c3:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:24 vmid:8 pasid:32774, for process python3 pid 356134 thread python3 pid 356134) | |
| [55883.722343] amdgpu 0000:c3:00.0: amdgpu: in page starting at address 0x00000000aabbc000 from client 10 | |
| [55883.722391] amdgpu 0000:c3:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00800A30 | |
| [55883.722429] amdgpu 0000:c3:00.0: amdgpu: Faulty UTCL2 client ID: CPC (0x5) | |
| [55883.722466] amdgpu 0000:c3:00.0: amdgpu: MORE_FAULTS: 0x0 | |
| [55883.722497] amdgpu 0000:c3:00.0: amdgpu: WALKER_ERROR: 0x0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # one hit, no loop needed | |
| # this is caused by creating a KFD_IOC_QUEUE_TYPE_COMPUTE_AQL without an EOP buffer | |
| # this causes the MES to page fault | |
| import os, ctypes, pathlib, re, fcntl, functools, mmap, time | |
| import tinygrad.runtime.autogen.kfd as kfd | |
| from tinygrad.helpers import to_mv | |
| from extra.hip_gpu_driver import hip_ioctl | |
| import tinygrad.runtime.autogen.hsa as hsa |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| # | |
| # TARGET arch is: ['-D__HIP_PLATFORM_AMD__', '-I/opt/rocm/include'] | |
| # WORD_SIZE is: 8 | |
| # POINTER_SIZE is: 8 | |
| # LONGDOUBLE_SIZE is: 16 | |
| # | |
| import ctypes | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # tiny@tiny9:~/tinygrad$ python3 examples/benchmark_copies.py | |
| # CPU copy 6.18 ms, 16.28 GB/s | |
| # GPU copy 4.38 ms, 23.00 GB/s | |
| # GPU 6x 1.85 ms, 54.54 GB/s | |
| import time | |
| def timeit(fxn): | |
| tms = [] | |
| for _ in range(10): | |
| st = time.perf_counter() |
NewerOlder