jkjung-avt · October 19, 2023 13:58 · SandyLi2017 · Oct 19, 2023
diff --git a/install_tensorflow-2.6.0.sh b/install_tensorflow-2.6.0.sh
 s#!/bin/bash

 set -e

 # tensorflow version
 version=2.6.0

 if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then
  echo "ERROR: not JetPack-4.6"
  exit 1
 fi

 case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in
  "33" )  # Nano and TX1
    cuda_compute=5.3
    ;;
  "24" )  # TX2
    cuda_compute=6.2
    ;;
  "25" )  # Xavier NX and AGX Xavier
    cuda_compute=7.2
    ;;
  * )     # default
    cuda_compute=5.3,6.2,7.2
    ;;
 esac

 script_path=$(realpath $0)
 patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch
 trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? | cut -d '.' -f 3)

 src_folder=${HOME}/src
 mkdir -p $src_folder

 if pip3 list | grep tensorflow > /dev/null; then
  echo "ERROR: tensorflow is installed already"
  exit 1
 fi

 if [[ -z $(pip3 list | grep numpy) ]]; then
  echo "ERROR: missing numpy"
  exit 1
 fi

 if ! which bazel > /dev/null; then
  echo "ERROR: bazel has not been installled"
  exit 1
 fi

 echo "** Install requirements"
 sudo apt install -y llvm-10* clang-10*
 sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \
                    zip libjpeg8-dev liblapack-dev libblas-dev gfortran
 sudo pip3 install -U six wheel setuptools typing_extensions
 sudo pip3 install pkgconfig
 sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0
 sudo pip3 install -U future==0.18.2 mock==3.0.5 \
                     keras_preprocessing==1.1.2 keras_applications==1.0.8 \
                     gast==0.4.0 futures pybind11

 echo "** Download and patch tensorflow-${version}"
 pushd $src_folder
 if [ ! -f tensorflow-${version}.tar.gz ]; then
  wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz
 fi
 tar xzvf tensorflow-${version}.tar.gz
 cd tensorflow-${version}

 patch -N -p1 < $patch_path && \
  echo "tensorflow-${version} source tree appears to be patched already.  Continue..."

 echo "** Configure and build tensorflow-${version}"
 export TMP=/tmp
 export CC=/usr/bin/clang-10
 export CXX=/usr/bin/clang++-10
 export CXXFLAGS="-stdlib=libc++"

 PYTHON_BIN_PATH=$(which python3) \
 PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \
 TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \
 TF_CUDA_VERSION=10.2 \
 TF_CUDA_CLANG=1 \
 TF_DOWNLOAD_CLANG=0 \
 CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \
 TF_CUDNN_VERSION=8 \
 TF_TENSORRT_VERSION=${trt_version} \
 CUDA_TOOLKIT_PATH=/usr/local/cuda \
 CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
 TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
 TF_NEED_IGNITE=0 \
 TF_ENABLE_XLA=0 \
 TF_NEED_OPENCL_SYCL=0 \
 TF_NEED_COMPUTECPP=0 \
 TF_NEED_ROCM=0 \
 TF_NEED_CUDA=1 \
 TF_NEED_TENSORRT=1 \
 TF_NEED_OPENCL=0 \
 TF_NEED_MPI=0 \
 GCC_HOST_COMPILER_PATH=$(which clang-10) \
 CC_OPT_FLAGS="-Wno-sign-compare" \
 TF_SET_ANDROID_WORKSPACE=0 \
  ./configure

 bazel build --config=opt \
            --config=cuda \
            --config=noaws \
            --local_cpu_resources=HOST_CPUS*0.25 \
            --local_ram_resources=HOST_RAM*0.5 \
            //tensorflow/tools/pip_package:build_pip_package
 bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg

 echo "** Install tensorflow-${version}"
 sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl

 popd

 TF_CPP_MIN_LOG_LEVEL=3 \
  python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))"

 echo "** Build and install tensorflow-${version} successfully"
diff --git a/protobuf-3.9.2-error.log b/protobuf-3.9.2-error.log
 make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src'
 ../test-driver: line 107: 12829 Segmentation fault      (core dumped) "$@" > $log_file 2>&1
 FAIL: protobuf-test
 PASS: protobuf-lazy-descriptor-test
 PASS: protobuf-lite-test
 PASS: google/protobuf/compiler/zip_output_unittest.sh
 PASS: google/protobuf/io/gzip_stream_unittest.sh
 PASS: protobuf-lite-arena-test
 PASS: no-warning-test
 ============================================================================
 Testsuite summary for Protocol Buffers 3.9.2
 ============================================================================
 # TOTAL: 7
 # PASS:  6
 # SKIP:  0
 # XFAIL: 0
 # FAIL:  1
 # XPASS: 0
 # ERROR: 0
 ============================================================================
 See src/test-suite.log
 Please report to [email protected]
 ============================================================================
 Makefile:7751: recipe for target 'test-suite.log' failed
 make[3]: *** [test-suite.log] Error 1
 make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
 Makefile:7857: recipe for target 'check-TESTS' failed
 make[2]: *** [check-TESTS] Error 2
 make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
 Makefile:7972: recipe for target 'check-am' failed
 make[1]: *** [check-am] Error 2
 make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
 Makefile:1715: recipe for target 'check-recursive' failed
 make: *** [check-recursive] Error 1
diff --git a/tensorflow-2.6.0.patch b/tensorflow-2.6.0.patch
 diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
 --- a/third_party/nccl/build_defs.bzl.tpl	2021-08-10 03:10:27.000000000 +0800
 +++ b/third_party/nccl/build_defs.bzl.tpl	2021-10-08 10:26:33.536077745 +0800
 @@ -43,7 +43,7 @@
     # The global functions can not have a lower register count than the
     # device functions. This is enforced by setting a fixed register count.
     # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
 -    maxrregcount = "-maxrregcount=96"
 +    maxrregcount = "-maxrregcount=80"
 
     return cuda_default_copts() + select({
         "@local_config_cuda//:is_cuda_compiler_nvcc": [
	s#!/bin/bash

	set -e

	# tensorflow version
	version=2.6.0

	if [[ ! $(head -1 /etc/nv_tegra_release) =~ R32.*6\.1 ]] ; then
	echo "ERROR: not JetPack-4.6"
	exit 1
	fi

	case $(cat /sys/module/tegra_fuse/parameters/tegra_chip_id) in
	"33" ) # Nano and TX1
	cuda_compute=5.3
	;;
	"24" ) # TX2
	cuda_compute=6.2
	;;
	"25" ) # Xavier NX and AGX Xavier
	cuda_compute=7.2
	;;
	* ) # default
	cuda_compute=5.3,6.2,7.2
	;;
	esac

	script_path=$(realpath $0)
	patch_path=$(dirname $script_path)/tensorflow/tensorflow-${version}.patch
	trt_version=$(echo /usr/lib/aarch64-linux-gnu/libnvinfer.so.? \| cut -d '.' -f 3)

	src_folder=${HOME}/src
	mkdir -p $src_folder

	if pip3 list \| grep tensorflow > /dev/null; then
	echo "ERROR: tensorflow is installed already"
	exit 1
	fi

	if [[ -z $(pip3 list \| grep numpy) ]]; then
	echo "ERROR: missing numpy"
	exit 1
	fi

	if ! which bazel > /dev/null; then
	echo "ERROR: bazel has not been installled"
	exit 1
	fi

	echo "** Install requirements"
	sudo apt install -y llvm-10* clang-10*
	sudo apt install -y libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev \
	zip libjpeg8-dev liblapack-dev libblas-dev gfortran
	sudo pip3 install -U six wheel setuptools typing_extensions
	sudo pip3 install pkgconfig
	sudo env H5PY_SETUP_REQUIRES=0 pip3 install -U h5py==3.1.0
	sudo pip3 install -U future==0.18.2 mock==3.0.5 \
	keras_preprocessing==1.1.2 keras_applications==1.0.8 \
	gast==0.4.0 futures pybind11

	echo "** Download and patch tensorflow-${version}"
	pushd $src_folder
	if [ ! -f tensorflow-${version}.tar.gz ]; then
	wget https://github.com/tensorflow/tensorflow/archive/v${version}.tar.gz -O tensorflow-${version}.tar.gz
	fi
	tar xzvf tensorflow-${version}.tar.gz
	cd tensorflow-${version}

	patch -N -p1 < $patch_path && \
	echo "tensorflow-${version} source tree appears to be patched already. Continue..."

	echo "** Configure and build tensorflow-${version}"
	export TMP=/tmp
	export CC=/usr/bin/clang-10
	export CXX=/usr/bin/clang++-10
	export CXXFLAGS="-stdlib=libc++"

	PYTHON_BIN_PATH=$(which python3) \
	PYTHON_LIB_PATH=$(python3 -c 'import site; print(site.getsitepackages()[0])') \
	TF_CUDA_COMPUTE_CAPABILITIES=${cuda_compute} \
	TF_CUDA_VERSION=10.2 \
	TF_CUDA_CLANG=1 \
	TF_DOWNLOAD_CLANG=0 \
	CLANG_CUDA_COMPILER_PATH=/usr/bin/clang-10 \
	TF_CUDNN_VERSION=8 \
	TF_TENSORRT_VERSION=${trt_version} \
	CUDA_TOOLKIT_PATH=/usr/local/cuda \
	CUDNN_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
	TENSORRT_INSTALL_PATH=/usr/lib/aarch64-linux-gnu \
	TF_NEED_IGNITE=0 \
	TF_ENABLE_XLA=0 \
	TF_NEED_OPENCL_SYCL=0 \
	TF_NEED_COMPUTECPP=0 \
	TF_NEED_ROCM=0 \
	TF_NEED_CUDA=1 \
	TF_NEED_TENSORRT=1 \
	TF_NEED_OPENCL=0 \
	TF_NEED_MPI=0 \
	GCC_HOST_COMPILER_PATH=$(which clang-10) \
	CC_OPT_FLAGS="-Wno-sign-compare" \
	TF_SET_ANDROID_WORKSPACE=0 \
	./configure

	bazel build --config=opt \
	--config=cuda \
	--config=noaws \
	--local_cpu_resources=HOST_CPUS*0.25 \
	--local_ram_resources=HOST_RAM*0.5 \
	//tensorflow/tools/pip_package:build_pip_package
	bazel-bin/tensorflow/tools/pip_package/build_pip_package wheel/tensorflow_pkg

	echo "** Install tensorflow-${version}"
	sudo pip3 install wheel/tensorflow_pkg/tensorflow-${version}-*.whl

	popd

	TF_CPP_MIN_LOG_LEVEL=3 \
	python3 -c "import tensorflow as tf; tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR); print('tensorflow version: %s' % tf.__version__); print('tensorflow.test.is_built_with_cuda(): %s' % tf.test.is_built_with_cuda()); print('tensorflow.test.is_gpu_available(): %s' % tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))"

	echo "** Build and install tensorflow-${version} successfully"
	make[3]: Entering directory '/home/nvidia/src/protobuf-3.9.2/src'
	../test-driver: line 107: 12829 Segmentation fault (core dumped) "$@" > $log_file 2>&1
	FAIL: protobuf-test
	PASS: protobuf-lazy-descriptor-test
	PASS: protobuf-lite-test
	PASS: google/protobuf/compiler/zip_output_unittest.sh
	PASS: google/protobuf/io/gzip_stream_unittest.sh
	PASS: protobuf-lite-arena-test
	PASS: no-warning-test
	============================================================================
	Testsuite summary for Protocol Buffers 3.9.2
	============================================================================
	# TOTAL: 7
	# PASS: 6
	# SKIP: 0
	# XFAIL: 0
	# FAIL: 1
	# XPASS: 0
	# ERROR: 0
	============================================================================
	See src/test-suite.log
	Please report to [email protected]
	============================================================================
	Makefile:7751: recipe for target 'test-suite.log' failed
	make[3]: *** [test-suite.log] Error 1
	make[3]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:7857: recipe for target 'check-TESTS' failed
	make[2]: *** [check-TESTS] Error 2
	make[2]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:7972: recipe for target 'check-am' failed
	make[1]: *** [check-am] Error 2
	make[1]: Leaving directory '/home/nvidia/src/protobuf-3.9.2/src'
	Makefile:1715: recipe for target 'check-recursive' failed
	make: *** [check-recursive] Error 1
	diff -Naur a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
	--- a/third_party/nccl/build_defs.bzl.tpl 2021-08-10 03:10:27.000000000 +0800
	+++ b/third_party/nccl/build_defs.bzl.tpl 2021-10-08 10:26:33.536077745 +0800
	@@ -43,7 +43,7 @@
	# The global functions can not have a lower register count than the
	# device functions. This is enforced by setting a fixed register count.
	# https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
	- maxrregcount = "-maxrregcount=96"
	+ maxrregcount = "-maxrregcount=80"

	return cuda_default_copts() + select({
	"@local_config_cuda//:is_cuda_compiler_nvcc": [