Created
November 3, 2013 18:18
-
-
Save ibayer/7293108 to your computer and use it in GitHub Desktop.
trying to call cblas without python function call overhead
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# using ubuntu 12.10\n", | |
"# shut up debug symbols: https://groups.google.com/a/continuum.io/forum/#!topic/numba-users/wwC80xyjuu4\n", | |
"# http://arjones6.blogspot.ch/2013/05/passing-multidimensional-numpy-arrays.html" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 25 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from cffi import FFI\n", | |
"ffi = FFI()\n", | |
"ffi.cdef(\"\"\" // some declarations from the man page\n", | |
" double cblas_ddot (const int N, const double * x, const int incx, const double * y, const int incy);\n", | |
"\"\"\")\n", | |
"C = ffi.verify(\"\"\" // passed to the real C compiler\n", | |
"#include <gsl/gsl_cblas.h>\n", | |
"\"\"\", libraries=['gslcblas']) # or a list of libraries to link with" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import ctypes\n", | |
"from numba import double\n", | |
"\n", | |
"length = 33\n", | |
"x = np.arange(length, dtype=np.float64)\n", | |
"y = x.copy()\n", | |
"\n", | |
"x_p = ffi.cast(\"double *\", x.ctypes.data)\n", | |
"y_p = ffi.cast(\"double *\", y.ctypes.data)\n", | |
"\n", | |
"assert C.cblas_ddot(length,x_p, 1, y_p, 1) == np.dot(x,y)\n", | |
"print 'np.dot'\n", | |
"%timeit np.dot(x,y)\n", | |
"print 'cblas ddot'\n", | |
"%timeit C.cblas_ddot(length,x_p, 1, y_p, 1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"np.dot\n", | |
"1000000 loops, best of 3: 642 ns per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"cblas ddot\n", | |
"1000000 loops, best of 3: 364 ns per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 27 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from numba import autojit\n", | |
"from numba.decorators import jit, autojit\n", | |
"X = np.arange(300*300).reshape(300,300)\n", | |
"\n", | |
"\n", | |
"def sum2d(arr):\n", | |
" M, N = arr.shape\n", | |
" result = 0.0\n", | |
" for i in range(M):\n", | |
" for j in range(N):\n", | |
" result += arr[i,j]\n", | |
" return result\n", | |
"\n", | |
"sum2d_numba = autojit(sum2d)\n", | |
"\n", | |
"print 'sum'\n", | |
"%timeit sum2d(X)\n", | |
"print 'sum + numba'\n", | |
"%timeit sum2d_numba(X)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"sum\n", | |
"10 loops, best of 3: 42.2 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"sum + numba\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"10000 loops, best of 3: 110 \u00b5s per loop\n" | |
] | |
} | |
], | |
"prompt_number": 28 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def sumdot(x, y, n):\n", | |
" result = 0.0\n", | |
" for i in xrange(n):\n", | |
" result += np.dot(x,y)\n", | |
" return result\n", | |
"\n", | |
"print 'np.dot'\n", | |
"%timeit sumdot(x, y, 1000000)\n", | |
"sumdot_numba = autojit(sumdot)\n", | |
"print 'np.dot + numba'\n", | |
"%timeit sumdot_numba(x,y, 1000000)\n", | |
"\n", | |
"\n", | |
"print 'ddot (cblas/cffi)'\n", | |
"from numba import autojit\n", | |
"\n", | |
"x_p = ffi.cast(\"double *\", x.ctypes.data)\n", | |
"y_p = ffi.cast(\"double *\", y.ctypes.data)\n", | |
"\n", | |
"def sumdot_cffi(x_p, y_p, n):\n", | |
"\n", | |
" length = len(x)\n", | |
" result = 0.0\n", | |
" for i in xrange(n):\n", | |
" result += C.cblas_ddot(length, x_p, 1, y_p, 1)\n", | |
" return result\n", | |
"\n", | |
"assert sumdot_cffi(x_p, y_p, 1000000) == sumdot(x, y, 1000000)\n", | |
"\n", | |
"%timeit sumdot_cffi(x_p, y_p, 1000000)\n", | |
"sumdot_cffi_numba = autojit(sumdot_cffi)\n", | |
"print 'ddot (cblas/cffi) + numba'\n", | |
"%timeit sumdot_cffi(x_p, y_p, 1000000)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"np.dot\n", | |
"1 loops, best of 3: 902 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"np.dot + numba\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 loops, best of 3: 528 ms per loop\n", | |
"ddot (cblas/cffi)\n", | |
"1 loops, best of 3: 347 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ddot (cblas/cffi) + numba\n", | |
"1 loops, best of 3: 347 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 26 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment