-
-
Save jkjung-avt/a89f2d76ea933ffe5567a117c4cd399e to your computer and use it in GitHub Desktop.
s#!/bin/bash | |
set -e | |
# tensorflow version | |
version=2.6.0 | |
if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then | |
echo "ERROR: not JetPack-4.6" | |
exit 1 | |
fi | |
case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in | |
"33" ) # Nano and TX1 | |
cuda_compute=5.3 | |
;; | |
"24" ) # TX2 | |
cuda_compute=6.2 | |
;; | |
"25" ) # Xavier NX and AGX Xavier | |
cuda_compute=7.2 | |
;; | |
* ) # default | |
cuda_compute=5.3,6.2,7.2 | |
;; | |
esac | |
script_path=$(realpath $0) | |
patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch | |
trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? | cut -d '.' -f 3) | |
src_folder=${HOME}/src | |
mkdir -p $src_folder | |
if pip3 list | grep tensorflow > /dev/null; then | |
echo "ERROR: tensorflow is installed already" | |
exit 1 | |
fi | |
if [[ -z $(pip3 list | grep numpy) ]]; then | |
echo "ERROR: missing numpy" | |
exit 1 | |
fi | |
if ! which bazel > /dev/null; then | |
echo "ERROR: bazel has not been installled" | |
exit 1 | |
fi | |
echo "** Install requirements" | |
sudo apt install -y llvm-10* clang-10* | |
sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \ | |
zip libjpeg8-dev liblapack-dev libblas-dev gfortran | |
sudo pip3 install -U six wheel setuptools typing_extensions | |
sudo pip3 install pkgconfig | |
sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0 | |
sudo pip3 install -U future==0.18.2 mock==3.0.5 \ | |
keras_preprocessing==1.1.2 keras_applications==1.0.8 \ | |
gast==0.4.0 futures pybind11 | |
echo "** Download and patch tensorflow-${version}" | |
pushd $src_folder | |
if [ ! -f tensorflow-${version}.tar.gz ]; then | |
wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz | |
fi | |
tar xzvf tensorflow-${version}.tar.gz | |
cd tensorflow-${version} | |
patch -N -p1 < $patch_path && \ | |
echo "tensorflow-${version} source tree appears to be patched already. Continue..." | |
echo "** Configure and build tensorflow-${version}" | |
export TMP=/tmp | |
export CC=/usr/bin/clang-10 | |
export CXX=/usr/bin/clang++-10 | |
export CXXFLAGS="-stdlib=libc++" | |
PYTHON_BIN_PATH=$(which python3) \ | |
PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \ | |
TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \ | |
TF_CUDA_VERSION=10.2 \ | |
TF_CUDA_CLANG=1 \ | |
TF_DOWNLOAD_CLANG=0 \ | |
CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \ | |
TF_CUDNN_VERSION=8 \ | |
TF_TENSORRT_VERSION=${trt_version} \ | |
CUDA_TOOLKIT_PATH=/usr/local/cuda \ | |
CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \ | |
TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \ | |
TF_NEED_IGNITE=0 \ | |
TF_ENABLE_XLA=0 \ | |
TF_NEED_OPENCL_SYCL=0 \ | |
TF_NEED_COMPUTECPP=0 \ | |
TF_NEED_ROCM=0 \ | |
TF_NEED_CUDA=1 \ | |
TF_NEED_TENSORRT=1 \ | |
TF_NEED_OPENCL=0 \ | |
TF_NEED_MPI=0 \ | |
GCC_HOST_COMPILER_PATH=$(which clang-10) \ | |
CC_OPT_FLAGS="-Wno-sign-compare" \ | |
TF_SET_ANDROID_WORKSPACE=0 \ | |
./configure | |
bazel build --config=opt \ | |
--config=cuda \ | |
--config=noaws \ | |
--local_cpu_resources=HOST_CPUS*0.25 \ | |
--local_ram_resources=HOST_RAM*0.5 \ | |
//tensorflow/tools/pip_package:build_pip_package | |
bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg | |
echo "** Install tensorflow-${version}" | |
sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl | |
popd | |
TF_CPP_MIN_LOG_LEVEL=3 \ | |
python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))" | |
echo "** Build and install tensorflow-${version} successfully" |
make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src' | |
../test-driver: line 107: 12829 Segmentation fault (core dumped) "$@" > $log_file 2>&1 | |
FAIL: protobuf-test | |
PASS: protobuf-lazy-descriptor-test | |
PASS: protobuf-lite-test | |
PASS: google/protobuf/compiler/zip_output_unittest.sh | |
PASS: google/protobuf/io/gzip_stream_unittest.sh | |
PASS: protobuf-lite-arena-test | |
PASS: no-warning-test | |
============================================================================ | |
Testsuite summary for Protocol Buffers 3.9.2 | |
============================================================================ | |
# TOTAL: 7 | |
# PASS: 6 | |
# SKIP: 0 | |
# XFAIL: 0 | |
# FAIL: 1 | |
# XPASS: 0 | |
# ERROR: 0 | |
============================================================================ | |
See src/test-suite.log | |
Please report to [email protected] | |
============================================================================ | |
Makefile:7751: recipe for target 'test-suite.log' failed | |
make[3]: *** [test-suite.log] Error 1 | |
make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:7857: recipe for target 'check-TESTS' failed | |
make[2]: *** [check-TESTS] Error 2 | |
make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:7972: recipe for target 'check-am' failed | |
make[1]: *** [check-am] Error 2 | |
make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:1715: recipe for target 'check-recursive' failed | |
make: *** [check-recursive] Error 1 |
diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl | |
--- a/third_party/nccl/build_defs.bzl.tpl 2021-08-10 03:10:27.000000000 +0800 | |
+++ b/third_party/nccl/build_defs.bzl.tpl 2021-10-08 10:26:33.536077745 +0800 | |
@@ -43,7 +43,7 @@ | |
# The global functions can not have a lower register count than the | |
# device functions. This is enforced by setting a fixed register count. | |
# https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48 | |
- maxrregcount = "-maxrregcount=96" | |
+ maxrregcount = "-maxrregcount=80" | |
return cuda_default_copts() + select({ | |
"@local_config_cuda//:is_cuda_compiler_nvcc": [ |
Have you tried to downgrade your bezel installation (bezel 3.7.2 for tensorflow 2.6.0)? You have to use the matching version of bezel otherwise the compilation process might likely fail.
Please refer to https://www.tensorflow.org/install/source
GPU
Version | Python version | Compiler | Build tools | cuDNN | CUDA |
---|---|---|---|---|---|
...... | ...... | ...... | ...... | ... | ... |
tensorflow-2.6.0 | 3.6-3.9 | GCC 7.3.1 | Bazel 3.7.2 | 8.1 | 11.2 |
I am not so familiar with bazel so not sure what's the best way to downgrade my bazel( it was bazel 4.2.0) to 3.7.2.
what i did:
- Download bazel-3.7.2-linux-arm64 from https://github.com/bazelbuild/bazel/releases?expanded=true&page=1&q=3.7 to my robot(Jetson Nano Jetpack 4.6, Python 3.6). This looks exe file but i don't know how to run it(tried double click etc);
- Download bazel-3.7.2-installer-linux-x86_64.sh from same side. to run it as : sudo bash bazel-3.7.2-installer-linux-x86_64.sh.
Then run install_tensorflow-2.6.0.sh again. I got below error.
Configuration finished
/usr/local/bin/bazel: line 163: /usr/local/lib/bazel/bin/bazel-real: cannot execute binary file: Exec format error
ERROR: The project you're trying to build requires Bazel 3.7.2 (specified in /home/hiwonder/src/tensorflow-2.6.0/.bazelversion), but it wasn't found in /usr/local/lib/bazel/bin.
And I can see bazel
hiwonder@JetMax:~/tensorflow$ ls -trl /usr/local/lib/bazel/bin
total 46420
-rwxr-xr-x 1 root root 47154615 Jan 1 1980 bazel-real
-rw-r--r-- 1 root root 5305 Jan 1 1980 bazel.fish
-rwxr-xr-x 1 root root 339462 Jan 1 1980 bazel-complete.bash
-rw-r--r-- 1 root root 10238 Jan 1 1980 _bazel
-rwxr-xr-x 1 root root 8678 Jan 1 1980 bazel
I don't know where to go now. would you please advise?
Hi There,
I am trying to use your valuable sh script to install tensorflow 2.6.0 in my ubuntu 18.04. But I got error below:
You have bazel 4.2.0 installed.
Please downgrade your bazel installation to version 3.99.0 or lower to build TensorFlow! To downgrade: download the installer for the old version (from https://github.com/bazelbuild/bazel/releases) then run the installer.
Could you please advise if you were facing similar issue before?
Much appreciate.