Skip to content

Instantly share code, notes, and snippets.

@ibayer
Created November 3, 2013 18:18
Show Gist options
  • Save ibayer/7293108 to your computer and use it in GitHub Desktop.
Save ibayer/7293108 to your computer and use it in GitHub Desktop.
trying to call cblas without python function call overhead
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"# using ubuntu 12.10\n",
"# shut up debug symbols: https://groups.google.com/a/continuum.io/forum/#!topic/numba-users/wwC80xyjuu4\n",
"# http://arjones6.blogspot.ch/2013/05/passing-multidimensional-numpy-arrays.html"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from cffi import FFI\n",
"ffi = FFI()\n",
"ffi.cdef(\"\"\" // some declarations from the man page\n",
" double cblas_ddot (const int N, const double * x, const int incx, const double * y, const int incy);\n",
"\"\"\")\n",
"C = ffi.verify(\"\"\" // passed to the real C compiler\n",
"#include <gsl/gsl_cblas.h>\n",
"\"\"\", libraries=['gslcblas']) # or a list of libraries to link with"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import ctypes\n",
"from numba import double\n",
"\n",
"length = 33\n",
"x = np.arange(length, dtype=np.float64)\n",
"y = x.copy()\n",
"\n",
"x_p = ffi.cast(\"double *\", x.ctypes.data)\n",
"y_p = ffi.cast(\"double *\", y.ctypes.data)\n",
"\n",
"assert C.cblas_ddot(length,x_p, 1, y_p, 1) == np.dot(x,y)\n",
"print 'np.dot'\n",
"%timeit np.dot(x,y)\n",
"print 'cblas ddot'\n",
"%timeit C.cblas_ddot(length,x_p, 1, y_p, 1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"np.dot\n",
"1000000 loops, best of 3: 642 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"cblas ddot\n",
"1000000 loops, best of 3: 364 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from numba import autojit\n",
"from numba.decorators import jit, autojit\n",
"X = np.arange(300*300).reshape(300,300)\n",
"\n",
"\n",
"def sum2d(arr):\n",
" M, N = arr.shape\n",
" result = 0.0\n",
" for i in range(M):\n",
" for j in range(N):\n",
" result += arr[i,j]\n",
" return result\n",
"\n",
"sum2d_numba = autojit(sum2d)\n",
"\n",
"print 'sum'\n",
"%timeit sum2d(X)\n",
"print 'sum + numba'\n",
"%timeit sum2d_numba(X)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"sum\n",
"10 loops, best of 3: 42.2 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"sum + numba\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10000 loops, best of 3: 110 \u00b5s per loop\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def sumdot(x, y, n):\n",
" result = 0.0\n",
" for i in xrange(n):\n",
" result += np.dot(x,y)\n",
" return result\n",
"\n",
"print 'np.dot'\n",
"%timeit sumdot(x, y, 1000000)\n",
"sumdot_numba = autojit(sumdot)\n",
"print 'np.dot + numba'\n",
"%timeit sumdot_numba(x,y, 1000000)\n",
"\n",
"\n",
"print 'ddot (cblas/cffi)'\n",
"from numba import autojit\n",
"\n",
"x_p = ffi.cast(\"double *\", x.ctypes.data)\n",
"y_p = ffi.cast(\"double *\", y.ctypes.data)\n",
"\n",
"def sumdot_cffi(x_p, y_p, n):\n",
"\n",
" length = len(x)\n",
" result = 0.0\n",
" for i in xrange(n):\n",
" result += C.cblas_ddot(length, x_p, 1, y_p, 1)\n",
" return result\n",
"\n",
"assert sumdot_cffi(x_p, y_p, 1000000) == sumdot(x, y, 1000000)\n",
"\n",
"%timeit sumdot_cffi(x_p, y_p, 1000000)\n",
"sumdot_cffi_numba = autojit(sumdot_cffi)\n",
"print 'ddot (cblas/cffi) + numba'\n",
"%timeit sumdot_cffi(x_p, y_p, 1000000)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"np.dot\n",
"1 loops, best of 3: 902 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"np.dot + numba\n"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 528 ms per loop\n",
"ddot (cblas/cffi)\n",
"1 loops, best of 3: 347 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"ddot (cblas/cffi) + numba\n",
"1 loops, best of 3: 347 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 26
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment