#!/bin/bash

if [ ! -n "$1" ]; then
    installer=NVIDIA-Linux-x86_64-no-compat32.run
else
    installer=$1
fi
distribution_supported=0

nvidia_devices=$(lspci -d 10de:)
if [ "$nvidia_devices" == "" ]; then
	echo "make sure there is a nvidia pci device"
	exit -1
fi

if [ $(id -u) != 0 ]; then
	echo "must be root"
	exit -1
fi

cd /usr/local/qcloud/
lsb_release -a 2>&1 | grep "Ubuntu" &> /dev/null
# Ubuntu Distribution
if [ $? == 0 ]; then
	# Only support Ubuntu version later or equal 18
	ubuntu_major_version=$(lsb_release -a| grep -i "release" | cut -d':' -f2 | cut -d'.' -f1)
	if [ $ubuntu_major_version -gt 14 ]; then
		distribution_supported=1
		wait_time=0
		while fuser /var/lib/dpkg/lock >/dev/null 2>&1 ; do
			echo "Waiting for /var/lib/dpkg/lock to finish..." 
			if [ $wait_time -gt 300 ]; then
				echo "Can not get /var/lib/dpkg/lock..."
				echo "### end ###"
				exit -1
			fi
			sleep 1
			wait_time=$(($wait_time+1))
		done 
		wait_time=0
		while fuser /var/lib/apt/lists/lock >/dev/null 2>&1 ; do
			echo "Waiting for /var/lib/apt/lists/lock to finish..." 
			if [ $wait_time -gt 300 ]; then
				echo "Can not get /var/lib/apt/lists/lock..."
				echo "### end ###"
				exit -1
			fi
			sleep 1
			wait_time=$(($wait_time+1))
		done
		wait_time=0
		while true; do
			ret=0
			apt-get update
			ret=$(($ret+$?))
			apt-get -y install build-essential
			ret=$(($ret+$?))
			apt-get -y install dkms
			ret=$(($ret+$?))
			apt-get -y install gcc
			ret=$(($ret+$?))
			apt-get -y install g++
			ret=$(($ret+$?))

			if [ $ret == 0 ]; then
				break
			fi
			echo "Waiting for install dkms..." 
			if [ $wait_time -gt 30 ]; then
				echo "Can not install dkms..."
				echo "### end ###"
				exit -1
			fi
			sleep 5
			wait_time=$(($wait_time+1))
		done
		lsmod | grep nouveau
		if [ $? == 0 ]; then
			rmmod nouveau
			rm -rf /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/nouveau/nouveau.ko
			echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf
			echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf
			update-initramfs -u
		fi
	fi
fi

is_tlinux_kernel4=0
tlinuxversion=""
if [ -f /etc/redhat-release ] || [ -f /etc/tlinux-release ]; then
	if [ -f /etc/tlinux-release ]; then
		# support tlinx 2.4 and 3.1, if tk4, install gcc8
		tlinuxversion=`grep -o "[[:digit:]]\{1,\}\.[[:digit:]]\{1,\}" /etc/tlinux-release`
		if [ "$tlinuxversion" == "2.4" ] || \
		   [ "$tlinuxversion" == "2.6" ] || \
		   [ "${tlinuxversion:0:2}" == "3." ] || \
		   [ "${tlinuxversion:0:2}" == "4." ]; then
			distribution_supported=1
			tk4=`grep tkernel4 /etc/tlinux-release`
			# tkernel4, install gcc8
			if [ -n "$tk4" ]; then
				is_tlinux_kernel4=1
				yum -y install tlinux-release-scl
				yum -y install scl-utils
				yum -y install devtoolset-8-gcc 
				yum -y install devtoolset-8-gcc-c++
			else
				yum -y install gcc
				yum -y install gcc-c++
				yum -y install kernel-devel-$(uname -r)
			fi
			yum -y install dkms
		fi
		lsmod | grep nouveau
		if [ $? == 0 ]; then
			rmmod nouveau
			rm -rf /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/nouveau/nouveau.ko*
			echo blacklist nouveau > /etc/modprobe.d/blacklist-nvidia-nouveau.conf
			echo options nouveau modeset=0 >> /etc/modprobe.d/blacklist-nvidia-nouveau.conf
			dracut --force
		fi
	else
		# Only support Centos7.x+
		cat /etc/redhat-release | awk '{print $4}' | awk -F "." '{print $1}' &> /dev/null
		version=`cat /etc/redhat-release | awk '{print $4}' | awk -F "." '{print $1}' `
		if [ $version -ge 7 ]; then
			distribution_supported=1
			yum -y install gcc
			yum -y install gcc-c++
			yum -y install kernel-devel-$(uname -r)
			yum -y install dkms
		fi
	fi
fi

if [ x$distribution_supported != x1 ]; then
        echo "unsupported distribution, must be ubuntu18.x centos7.x or tlinux2.4"
        exit -1
fi

wait_time=0
get_driver=0
while [ $wait_time -lt 10 ]; do
	wget "http://mirrors.tencentyun.com/install/GPU/$installer" -O /tmp/$installer
	if [ $? == 0 ]; then
		get_driver=1
		break;
	fi
	wait_time=$(($wait_time+1))
	sleep 1
done
if [ x$get_driver != x1 ]; then
        echo "download driver file failed, please check the input,url oand network"
        exit -1
fi

chmod u+x /tmp/$installer

# cat /etc/motd
# internal version on TencentOS4:
#    Welcome to TencentOS Server 4 x86_64 Tencent Edition
#    Version 4.2 20240904
# external version on TencentOS4:
#    Welcome to TencentOS Server 4 x86_64
#    Version 4.2 20240708
tlinux4_internal=0
if [ "${tlinuxversion:0:2}" == "4." ] && [ -f /etc/motd ]; then
	if grep -ioE 'x86_64.*tencent' /etc/motd ; then
		tlinux4_internal=1
	fi
fi

# tlinux2.6\3.2 need to sign the kernel ko
if [ "$tlinuxversion" == "2.6" ] || [ "$tlinuxversion" == "3.2" ] || [ "$tlinux4_internal" == "1" ]; then
	ko_url=http://mirrors.tencentyun.com/install/GPU/tlinux_ko/
	driver_version=$(echo "$installer" | grep -oP '(?<=NVIDIA-Linux-x86_64-)\d+\.\d+\.\d+')
	
	wget -t 10 --timeout=10 $ko_url/kernel$(uname -r)-nvidia$driver_version.tar.gz -O /tmp/nvidia.tar.gz
	if [ $? -ne 0 ]; then
		wget -t 10 --timeout=10 $ko_url/kernel$(uname -r | sed 's/\(tlinux4-00[0-9][0-9]\).*/\1/')-nvidia$driver_version.tar.gz -O /tmp/nvidia.tar.gz
		if [ $? -ne 0 ]; then
			echo "download nvidia ko tar failed, please check whether support, curl $ko_url"
			exit -1
		fi
	fi

	mkdir -p /tmp/nvidia_ko
	tar zxvf /tmp/nvidia.tar.gz -C /tmp/nvidia_ko/ 1>/dev/null 2>&1
	if [ ! -d /lib/modules/$(uname -r)/extra ]; then
		mkdir -p /lib/modules/$(uname -r)/extra
	fi
	cp -a /tmp/nvidia_ko/* /lib/modules/$(uname -r)/extra/
	depmod
	modprobe nvidia
	modprobe nvidia-uvm
	modprobe nvidia-drm
	modprobe nvidia-modeset

	/tmp/$installer --ui=none --disable-nouveau --no-install-libglvnd --no-kernel-module --no-cc-version-check -s
else
	if [ x$is_tlinux_kernel4 == x1 ]; then
		# tkernel4 switch gcc, so no --dkms here
		scl enable devtoolset-8 "/tmp/$installer --ui=none --disable-nouveau --no-install-libglvnd --no-cc-version-check -s"
	else
		/tmp/$installer --ui=none --disable-nouveau --no-install-libglvnd --dkms --no-cc-version-check -s
	fi
fi

echo "install nvidia driver finished"
rm -f /tmp/$installer

command -v nvidia-smi >/dev/null 2>&1 && { nvidia-persistenced --persistence-mode; echo "nvidia-persistenced starts with persistence mode enabled for all devices";} || { echo "no nvidia-smi command. exit."; exit 1;} 

# restart barad
if [ -d /usr/local/qcloud/monitor/barad/ ]; then
    if [ $(ps -ef | grep barad_agent | wc -l) -gt 1 ]; then
        /usr/local/qcloud/monitor/barad/admin/stop.sh
        /usr/local/qcloud/monitor/barad/admin/trystart.sh
        echo "barad agent restarted"
    fi
fi
