Created
May 14, 2013 12:29
-
-
Save HaveF/5575537 to your computer and use it in GitHub Desktop.
check_blas.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
D:\OpenCourses\ufldl\ref\Theano\theano\misc>python check_blas.py | |
Forcing DISTUTILS_USE_SDK=1 | |
WARNING (theano.tensor.blas): Failed to import scipy.linalg.blas. Falling back o | |
n slower implementations (DLL load failed: 找不到指定的模块。) | |
Some results that you can compare against. They were 10 executions | |
of gemm in float64 with matrices of shape 2000x2000 (M=N=K=2000). | |
All memory layout was in C order. | |
CPU tested: Xeon E5345(2.33Ghz, 8M L2 cache, 1333Mhz FSB), | |
Xeon E5430(2.66Ghz, 12M L2 cache, 1333Mhz FSB), | |
Xeon E5450(3Ghz, 12M L2 cache, 1333Mhz FSB), | |
Xeon X5560(2.8Ghz, 12M L2 cache, hyper-threads?) | |
Core 2 E8500, Core i7 930(2.8Ghz, hyper-threads enabled), | |
Core i7 950(3.07GHz, hyper-threads enabled) | |
Xeon X5550(2.67GHz, 8M l2 cache?, hyper-threads enabled) | |
Libraries tested: | |
* numpy with ATLAS from distribution (FC9) package (1 thread) | |
* manually compiled numpy and ATLAS with 2 threads | |
* goto 1.26 with 1, 2, 4 and 8 threads | |
* goto2 1.13 compiled with multiple threads enabled | |
Xeon Xeon Xeon Core2 i7 i7 Xeon Xeon | |
lib/nb threads E5345 E5430 E5450 E8500 930 950 X5560 X5550 | |
numpy 1.3.0 blas 775.92s | |
numpy_FC9_atlas/1 39.2s 35.0s 30.7s 29.6s 21.5s 19.60s | |
goto/1 18.7s 16.1s 14.2s 13.7s 16.1s 14.67s | |
numpy_MAN_atlas/2 12.0s 11.6s 10.2s 9.2s 9.0s | |
goto/2 9.5s 8.1s 7.1s 7.3s 8.1s 7.4s | |
goto/4 4.9s 4.4s 3.7s - 4.1s 3.8s | |
goto/8 2.7s 2.4s 2.0s - 4.1s 3.8s | |
openblas/1 14.04s | |
openblas/2 7.16s | |
openblas/4 3.71s | |
openblas/8 3.70s | |
mkl 11.0.083/1 7.97s | |
mkl 10.2.2.025/1 13.7s | |
mkl 10.2.2.025/2 7.6s | |
mkl 10.2.2.025/4 4.0s | |
mkl 10.2.2.025/8 2.0s | |
goto2 1.13/1 14.37s | |
goto2 1.13/2 7.26s | |
goto2 1.13/4 3.70s | |
goto2 1.13/8 1.94s | |
goto2 1.13/16 3.16s | |
Test time in float32 | |
cuda version 5.0 4.2 4.1 4.0 3.2 3.0 # note | |
gpu | |
K20m/ECC 0.07s | |
K20/NOECC 0.07s | |
M2070 0.25s 0.27s 0.32s | |
M2050(Amazon) 0.25s | |
C2075 0.25s | |
C1060 0.46s | |
GTX Titan(D15U-50)0.06s don't work | |
GTX 680 0.12s 0.154s 0.218s | |
GTX 580 0.16s 0.164s 0.203s | |
GTX 480 0.19s 0.192s 0.237s 0.27s | |
GTX 470 0.23s 0.238s 0.297s 0.34s | |
GTX 660 0.20s 0.23s | |
GTX 560 0.30s | |
GTX 650 Ti 0.27s | |
GTX 460 0.37s 0.45s | |
GTX 285 0.452s 0.452s 0.40s # cuda 3.0 se | |
ems faster? driver version? | |
GTX 550 Ti 0.57s | |
GT 520 2.68s 3.06s | |
520M 2.44s 3.19s # with bumble | |
bee on Ubuntu 12.04 | |
GT 220 3.80s | |
GT 210 6.35s | |
8500 GT 10.68s | |
Some Theano flags: | |
blas.ldflags= -LC:\Anaconda\MinGW\lib -lopenblas | |
compiledir= R:\compiledir_Windows-7-6.1.7601-SP1-Intel64_Family_6_Model_58_S | |
tepping_9_GenuineIntel-2.7.3-64 | |
floatX= float64 | |
device= cpu | |
Some environment variables: | |
MKL_NUM_THREADS= None | |
OMP_NUM_THREADS= None | |
GOTO_NUM_THREADS= None | |
Numpy config: (used when the Theano flag "blas.ldflags" is empty) | |
lapack_opt_info: | |
libraries = ['mkl_lapack95_lp64', 'mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl | |
_intel_thread_dll'] | |
library_dirs = ['C:\\aroot\\stage\\libs'] | |
define_macros = [('SCIPY_MKL_H', None)] | |
include_dirs = ['C:\\aroot\\stage\\include'] | |
blas_opt_info: | |
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll'] | |
library_dirs = ['C:\\aroot\\stage\\libs'] | |
define_macros = [('SCIPY_MKL_H', None)] | |
include_dirs = ['C:\\aroot\\stage\\include'] | |
lapack_mkl_info: | |
libraries = ['mkl_lapack95_lp64', 'mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl | |
_intel_thread_dll'] | |
library_dirs = ['C:\\aroot\\stage\\libs'] | |
define_macros = [('SCIPY_MKL_H', None)] | |
include_dirs = ['C:\\aroot\\stage\\include'] | |
blas_mkl_info: | |
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll'] | |
library_dirs = ['C:\\aroot\\stage\\libs'] | |
define_macros = [('SCIPY_MKL_H', None)] | |
include_dirs = ['C:\\aroot\\stage\\include'] | |
mkl_info: | |
libraries = ['mkl_core_dll', 'mkl_intel_lp64_dll', 'mkl_intel_thread_dll'] | |
library_dirs = ['C:\\aroot\\stage\\libs'] | |
define_macros = [('SCIPY_MKL_H', None)] | |
include_dirs = ['C:\\aroot\\stage\\include'] | |
Numpy dot module: numpy.core._dotblas | |
Numpy location: C:\Anaconda\lib\site-packages\numpy\__init__.pyc | |
Numpy version: 1.7.1 | |
Traceback (most recent call last): | |
File "check_blas.py", line 229, in <module> | |
iters=options.iter, order=options.order) | |
File "check_blas.py", line 71, in execute | |
f = theano.function([], updates=[(c, 0.4 * c + .8 * T.dot(a, b))]) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\compile\function.py", line 222, i | |
n function | |
profile=profile) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\compile\pfunc.py", line 506, in p | |
func | |
on_unused_input=on_unused_input) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\compile\function_module.py", line | |
1299, in orig_function | |
defaults) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\compile\function_module.py", line | |
1168, in create | |
_fn, _i, _o = self.linker.make_thunk(input_storage=input_storage_lists) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\link.py", line 382, in make_t | |
hunk | |
output_storage = output_storage)[:3] | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\vm.py", line 840, in make_all | |
for node in order] | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\op.py", line 589, in make_thu | |
nk | |
output_storage=node_output_storage) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cc.py", line 933, in make_thu | |
nk | |
keep_lock=keep_lock) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cc.py", line 876, in __compil | |
e__ | |
keep_lock=keep_lock) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cc.py", line 1304, in cthunk_ | |
factory | |
key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cmodule.py", line 992, in mod | |
ule_from_key | |
module = next(compile_steps) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cc.py", line 1221, in compile | |
_cmodule_by_step | |
preargs=preargs) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cmodule.py", line 1841, in co | |
mpile_str | |
return dlimport(lib_filename) | |
File "D:\OpenCourses\ufldl\ref\Theano\theano\gof\cmodule.py", line 280, in dli | |
mport | |
rval = __import__(module_name, {}, {}, [module_name]) | |
ImportError: ('DLL load failed: \xd5\xd2\xb2\xbb\xb5\xbd\xd6\xb8\xb6\xa8\xb5\xc4 | |
\xc4\xa3\xbf\xe9\xa1\xa3', '[Gemm{inplace}(<TensorType(float64, matrix)>, Tensor | |
Constant{0.8}, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, Ten | |
sorConstant{0.4})]') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment