Last active
October 19, 2023 13:58
-
-
Save jkjung-avt/a89f2d76ea933ffe5567a117c4cd399e to your computer and use it in GitHub Desktop.
Scripts for installing tensorflow-2.6.0 on JetPack-4.6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
s#!/bin/bash | |
set -e | |
# tensorflow version | |
version=2.6.0 | |
if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then | |
echo "ERROR: not JetPack-4.6" | |
exit 1 | |
fi | |
case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in | |
"33" ) # Nano and TX1 | |
cuda_compute=5.3 | |
;; | |
"24" ) # TX2 | |
cuda_compute=6.2 | |
;; | |
"25" ) # Xavier NX and AGX Xavier | |
cuda_compute=7.2 | |
;; | |
* ) # default | |
cuda_compute=5.3,6.2,7.2 | |
;; | |
esac | |
script_path=$(realpath $0) | |
patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch | |
trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? | cut -d '.' -f 3) | |
src_folder=${HOME}/src | |
mkdir -p $src_folder | |
if pip3 list | grep tensorflow > /dev/null; then | |
echo "ERROR: tensorflow is installed already" | |
exit 1 | |
fi | |
if [[ -z $(pip3 list | grep numpy) ]]; then | |
echo "ERROR: missing numpy" | |
exit 1 | |
fi | |
if ! which bazel > /dev/null; then | |
echo "ERROR: bazel has not been installled" | |
exit 1 | |
fi | |
echo "** Install requirements" | |
sudo apt install -y llvm-10* clang-10* | |
sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \ | |
zip libjpeg8-dev liblapack-dev libblas-dev gfortran | |
sudo pip3 install -U six wheel setuptools typing_extensions | |
sudo pip3 install pkgconfig | |
sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0 | |
sudo pip3 install -U future==0.18.2 mock==3.0.5 \ | |
keras_preprocessing==1.1.2 keras_applications==1.0.8 \ | |
gast==0.4.0 futures pybind11 | |
echo "** Download and patch tensorflow-${version}" | |
pushd $src_folder | |
if [ ! -f tensorflow-${version}.tar.gz ]; then | |
wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz | |
fi | |
tar xzvf tensorflow-${version}.tar.gz | |
cd tensorflow-${version} | |
patch -N -p1 < $patch_path && \ | |
echo "tensorflow-${version} source tree appears to be patched already. Continue..." | |
echo "** Configure and build tensorflow-${version}" | |
export TMP=/tmp | |
export CC=/usr/bin/clang-10 | |
export CXX=/usr/bin/clang++-10 | |
export CXXFLAGS="-stdlib=libc++" | |
PYTHON_BIN_PATH=$(which python3) \ | |
PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \ | |
TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \ | |
TF_CUDA_VERSION=10.2 \ | |
TF_CUDA_CLANG=1 \ | |
TF_DOWNLOAD_CLANG=0 \ | |
CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \ | |
TF_CUDNN_VERSION=8 \ | |
TF_TENSORRT_VERSION=${trt_version} \ | |
CUDA_TOOLKIT_PATH=/usr/local/cuda \ | |
CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \ | |
TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \ | |
TF_NEED_IGNITE=0 \ | |
TF_ENABLE_XLA=0 \ | |
TF_NEED_OPENCL_SYCL=0 \ | |
TF_NEED_COMPUTECPP=0 \ | |
TF_NEED_ROCM=0 \ | |
TF_NEED_CUDA=1 \ | |
TF_NEED_TENSORRT=1 \ | |
TF_NEED_OPENCL=0 \ | |
TF_NEED_MPI=0 \ | |
GCC_HOST_COMPILER_PATH=$(which clang-10) \ | |
CC_OPT_FLAGS="-Wno-sign-compare" \ | |
TF_SET_ANDROID_WORKSPACE=0 \ | |
./configure | |
bazel build --config=opt \ | |
--config=cuda \ | |
--config=noaws \ | |
--local_cpu_resources=HOST_CPUS*0.25 \ | |
--local_ram_resources=HOST_RAM*0.5 \ | |
//tensorflow/tools/pip_package:build_pip_package | |
bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg | |
echo "** Install tensorflow-${version}" | |
sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl | |
popd | |
TF_CPP_MIN_LOG_LEVEL=3 \ | |
python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))" | |
echo "** Build and install tensorflow-${version} successfully" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src' | |
../test-driver: line 107: 12829 Segmentation fault (core dumped) "$@" > $log_file 2>&1 | |
FAIL: protobuf-test | |
PASS: protobuf-lazy-descriptor-test | |
PASS: protobuf-lite-test | |
PASS: google/protobuf/compiler/zip_output_unittest.sh | |
PASS: google/protobuf/io/gzip_stream_unittest.sh | |
PASS: protobuf-lite-arena-test | |
PASS: no-warning-test | |
============================================================================ | |
Testsuite summary for Protocol Buffers 3.9.2 | |
============================================================================ | |
# TOTAL: 7 | |
# PASS: 6 | |
# SKIP: 0 | |
# XFAIL: 0 | |
# FAIL: 1 | |
# XPASS: 0 | |
# ERROR: 0 | |
============================================================================ | |
See src/test-suite.log | |
Please report to [email protected] | |
============================================================================ | |
Makefile:7751: recipe for target 'test-suite.log' failed | |
make[3]: *** [test-suite.log] Error 1 | |
make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:7857: recipe for target 'check-TESTS' failed | |
make[2]: *** [check-TESTS] Error 2 | |
make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:7972: recipe for target 'check-am' failed | |
make[1]: *** [check-am] Error 2 | |
make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src' | |
Makefile:1715: recipe for target 'check-recursive' failed | |
make: *** [check-recursive] Error 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl | |
--- a/third_party/nccl/build_defs.bzl.tpl 2021-08-10 03:10:27.000000000 +0800 | |
+++ b/third_party/nccl/build_defs.bzl.tpl 2021-10-08 10:26:33.536077745 +0800 | |
@@ -43,7 +43,7 @@ | |
# The global functions can not have a lower register count than the | |
# device functions. This is enforced by setting a fixed register count. | |
# https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48 | |
- maxrregcount = "-maxrregcount=96" | |
+ maxrregcount = "-maxrregcount=80" | |
return cuda_default_copts() + select({ | |
"@local_config_cuda//:is_cuda_compiler_nvcc": [ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am not so familiar with bazel so not sure what's the best way to downgrade my bazel( it was bazel 4.2.0) to 3.7.2.
what i did:
Then run install_tensorflow-2.6.0.sh again. I got below error.
Configuration finished
/usr/local/bin/bazel: line 163: /usr/local/lib/bazel/bin/bazel-real: cannot execute binary file: Exec format error
ERROR: The project you're trying to build requires Bazel 3.7.2 (specified in /home/hiwonder/src/tensorflow-2.6.0/.bazelversion), but it wasn't found in /usr/local/lib/bazel/bin.
And I can see bazel
hiwonder@JetMax:~/tensorflow$ ls -trl /usr/local/lib/bazel/bin
total 46420
-rwxr-xr-x 1 root root 47154615 Jan 1 1980 bazel-real
-rw-r--r-- 1 root root 5305 Jan 1 1980 bazel.fish
-rwxr-xr-x 1 root root 339462 Jan 1 1980 bazel-complete.bash
-rw-r--r-- 1 root root 10238 Jan 1 1980 _bazel
-rwxr-xr-x 1 root root 8678 Jan 1 1980 bazel
I don't know where to go now. would you please advise?