msaroufim · March 21, 2025 00:34
diff --git a/gistfile1.txt b/gistfile1.txt
 (pt) ➜  examples git:(msaroufim/noheader) ✗ python tensor_base_example.py
 Clearing existing build directory: /home/marksaroufim/pytorch/examples/custom_extension_build
 Created build directory: /home/marksaroufim/pytorch/examples/custom_extension_build
 Compiling TensorBase CUDA extension with no_header=True...
 Using /home/marksaroufim/.cache/torch_extensions/py310_cu124 as PyTorch extensions root...
 Detected CUDA files, patching ldflags
 Emitting ninja build file /home/marksaroufim/.cache/torch_extensions/py310_cu124/tensor_base_example/build.ninja...
 Building extension module tensor_base_example...
 Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
 [1/2] /home/marksaroufim/.conda/envs/pt/bin/nvcc --generate-dependencies-with-compile --dependency-output cuda.cuda.o.d -ccbin /home/marksaroufim/.conda/envs/pt/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=tensor_base_example -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/marksaroufim/.conda/envs/pt/targets/x86_64-linux/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/TH -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/THC -isystem /home/marksaroufim/.conda/envs/pt/include -isystem /home/marksaroufim/.conda/envs/pt/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_90,code=sm_90 --compiler-options '-fPIC' -std=c++17 -c /home/marksaroufim/.cache/torch_extensions/py310_cu124/tensor_base_example/cuda.cu -o cuda.cuda.o 
 nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used
 nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used
 [2/2] /home/marksaroufim/.conda/envs/pt/bin/x86_64-conda-linux-gnu-c++ main.o cuda.cuda.o -shared -L/home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/marksaroufim/.conda/envs/pt/lib -lcudart -o tensor_base_example.so
 Loading extension module tensor_base_example...
 Extension compiled successfully, end_time=0:00:01.743991!
 Testing on CUDA tensors...
 Traceback (most recent call last):
  File "/home/marksaroufim/pytorch/examples/tensor_base_example.py", line 128, in <module>
    main()
  File "/home/marksaroufim/pytorch/examples/tensor_base_example.py", line 105, in main
    result = module.tensor_base_add(x, y)
 TypeError: tensor_base_add(): incompatible function arguments. The following argument types are supported:
    1. (arg0: at::TensorBase, arg1: at::TensorBase) -> at::TensorBase

 Invoked with: tensor([ 0.3075,  0.2251,  2.0814, -0.1993,  2.1120,  0.4359, -1.2128, -1.3712,
         1.0081, -0.7876, -0.4743,  0.0932, -0.4981,  0.9163,  0.4615,  0.7497,
        -0.1410,  2.7299, -1.6575,  0.3628, -0.6320,  0.7387,  0.1129, -0.7351,
         0.2625, -0.1444, -1.2933, -0.6645,  1.7553,  1.8268,  1.5368,  1.1989,
         1.3375,  0.0319,  0.6104, -0.2583,  0.4208, -0.4763,  1.1738, -0.5811,
        -0.8523,  0.7788, -1.1727, -0.1039,  1.3611, -0.2009,  0.1007, -0.2128,
         1.3701, -0.3160,  0.4778, -0.7499,  0.4355, -1.0649, -0.1144,  1.1037,
         0.1247,  0.5750,  0.3477, -1.5343,  0.0235, -1.7903,  1.0686,  0.3601,
        -0.8557, -0.3729,  0.0528, -0.0330, -1.4052, -2.0251,  1.7193,  1.6655,
        -0.9704, -0.4773,  0.0617, -0.2456, -0.4386, -0.0450, -1.4266, -1.2182,
        -0.7235,  0.0692,  1.1015,  1.6901,  1.1024,  0.8707,  0.0153,  3.0676,
        -0.1598,  0.9846, -0.9006,  1.8185,  0.9716,  1.2846, -0.5822, -1.3630,
         0.9436,  0.2025, -0.9631,  0.5301], device='cuda:0'), tensor([ 0.8495, -1.1602,  0.4871,  0.5259, -1.0449, -0.2025,  0.3765,  0.1866,
         0.8778, -0.1032, -0.0615, -1.4703, -0.7150, -0.6080,  1.4939, -0.5385,
         0.3888,  0.3340, -0.6579,  1.2212,  0.8587, -0.6999, -2.5423, -0.4974,
        -1.4718, -0.8848, -1.6310, -0.0671,  0.4675,  0.5053,  1.0451, -0.1926,
         0.0358,  0.7107, -0.7308,  0.9124,  0.2783,  1.0310, -1.0966, -3.5018,
         0.5132,  0.2854, -0.1475,  0.7848, -0.6384, -0.8235,  0.9152, -0.2428,
        -1.9931,  1.3734, -1.5304,  0.1267,  0.3652,  2.8141, -0.7612,  0.7084,
        -0.6642, -0.1870,  0.9129, -0.0194, -0.1302,  0.9792,  1.6826, -0.7609,
         1.8881,  1.7524, -0.1715,  0.7033,  0.7665, -0.4587,  0.8028, -0.9926,
        -0.5368,  0.0837,  1.2282, -0.4884, -1.1954,  0.1941, -0.5928,  1.4722,
         1.1105, -0.2332,  0.6430,  0.1946, -0.5192,  0.1701, -0.3154,  1.4991,
         0.3135, -0.3949, -1.1011,  0.7934,  0.1645,  0.2413,  0.9921,  0.0275,
        -0.4305,  0.5826, -2.7652, -0.9962], device='cuda:0')
 (pt) ➜  examples git:(msaroufim/noheader) ✗
	(pt) ➜ examples git:(msaroufim/noheader) ✗ python tensor_base_example.py
	Clearing existing build directory: /home/marksaroufim/pytorch/examples/custom_extension_build
	Created build directory: /home/marksaroufim/pytorch/examples/custom_extension_build
	Compiling TensorBase CUDA extension with no_header=True...
	Using /home/marksaroufim/.cache/torch_extensions/py310_cu124 as PyTorch extensions root...
	Detected CUDA files, patching ldflags
	Emitting ninja build file /home/marksaroufim/.cache/torch_extensions/py310_cu124/tensor_base_example/build.ninja...
	Building extension module tensor_base_example...
	Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
	[1/2] /home/marksaroufim/.conda/envs/pt/bin/nvcc --generate-dependencies-with-compile --dependency-output cuda.cuda.o.d -ccbin /home/marksaroufim/.conda/envs/pt/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=tensor_base_example -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/marksaroufim/.conda/envs/pt/targets/x86_64-linux/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/TH -isystem /home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/include/THC -isystem /home/marksaroufim/.conda/envs/pt/include -isystem /home/marksaroufim/.conda/envs/pt/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_90,code=sm_90 --compiler-options '-fPIC' -std=c++17 -c /home/marksaroufim/.cache/torch_extensions/py310_cu124/tensor_base_example/cuda.cu -o cuda.cuda.o
	nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used
	nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used
	[2/2] /home/marksaroufim/.conda/envs/pt/bin/x86_64-conda-linux-gnu-c++ main.o cuda.cuda.o -shared -L/home/marksaroufim/.conda/envs/pt/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/home/marksaroufim/.conda/envs/pt/lib -lcudart -o tensor_base_example.so
	Loading extension module tensor_base_example...
	Extension compiled successfully, end_time=0:00:01.743991!
	Testing on CUDA tensors...
	Traceback (most recent call last):
	File "/home/marksaroufim/pytorch/examples/tensor_base_example.py", line 128, in <module>
	main()
	File "/home/marksaroufim/pytorch/examples/tensor_base_example.py", line 105, in main
	result = module.tensor_base_add(x, y)
	TypeError: tensor_base_add(): incompatible function arguments. The following argument types are supported:
	1. (arg0: at::TensorBase, arg1: at::TensorBase) -> at::TensorBase

	Invoked with: tensor([ 0.3075, 0.2251, 2.0814, -0.1993, 2.1120, 0.4359, -1.2128, -1.3712,
	1.0081, -0.7876, -0.4743, 0.0932, -0.4981, 0.9163, 0.4615, 0.7497,
	-0.1410, 2.7299, -1.6575, 0.3628, -0.6320, 0.7387, 0.1129, -0.7351,
	0.2625, -0.1444, -1.2933, -0.6645, 1.7553, 1.8268, 1.5368, 1.1989,
	1.3375, 0.0319, 0.6104, -0.2583, 0.4208, -0.4763, 1.1738, -0.5811,
	-0.8523, 0.7788, -1.1727, -0.1039, 1.3611, -0.2009, 0.1007, -0.2128,
	1.3701, -0.3160, 0.4778, -0.7499, 0.4355, -1.0649, -0.1144, 1.1037,
	0.1247, 0.5750, 0.3477, -1.5343, 0.0235, -1.7903, 1.0686, 0.3601,
	-0.8557, -0.3729, 0.0528, -0.0330, -1.4052, -2.0251, 1.7193, 1.6655,
	-0.9704, -0.4773, 0.0617, -0.2456, -0.4386, -0.0450, -1.4266, -1.2182,
	-0.7235, 0.0692, 1.1015, 1.6901, 1.1024, 0.8707, 0.0153, 3.0676,
	-0.1598, 0.9846, -0.9006, 1.8185, 0.9716, 1.2846, -0.5822, -1.3630,
	0.9436, 0.2025, -0.9631, 0.5301], device='cuda:0'), tensor([ 0.8495, -1.1602, 0.4871, 0.5259, -1.0449, -0.2025, 0.3765, 0.1866,
	0.8778, -0.1032, -0.0615, -1.4703, -0.7150, -0.6080, 1.4939, -0.5385,
	0.3888, 0.3340, -0.6579, 1.2212, 0.8587, -0.6999, -2.5423, -0.4974,
	-1.4718, -0.8848, -1.6310, -0.0671, 0.4675, 0.5053, 1.0451, -0.1926,
	0.0358, 0.7107, -0.7308, 0.9124, 0.2783, 1.0310, -1.0966, -3.5018,
	0.5132, 0.2854, -0.1475, 0.7848, -0.6384, -0.8235, 0.9152, -0.2428,
	-1.9931, 1.3734, -1.5304, 0.1267, 0.3652, 2.8141, -0.7612, 0.7084,
	-0.6642, -0.1870, 0.9129, -0.0194, -0.1302, 0.9792, 1.6826, -0.7609,
	1.8881, 1.7524, -0.1715, 0.7033, 0.7665, -0.4587, 0.8028, -0.9926,
	-0.5368, 0.0837, 1.2282, -0.4884, -1.1954, 0.1941, -0.5928, 1.4722,
	1.1105, -0.2332, 0.6430, 0.1946, -0.5192, 0.1701, -0.3154, 1.4991,
	0.3135, -0.3949, -1.1011, 0.7934, 0.1645, 0.2413, 0.9921, 0.0275,
	-0.4305, 0.5826, -2.7652, -0.9962], device='cuda:0')
	(pt) ➜ examples git:(msaroufim/noheader) ✗