#!/bin/bash ################################################ # script for NC cPanel autobackup monitoring # # # # Created by Bogdan Kukharskiy # # Namecheap # ################################################ PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin verbose=0 targetstr="/backup" agetreshold=172800 # 48 hours ratiotreshold=$(printf "%.2f\n" 0.20) snapagetresholdoverride="" PROGNAME=${0##*/} fstype="nfs" emrgstr="nfs/$(hostname -s)" #green='\033[0;32m' #yellow='\033[1;33m' #red='\033[0;31m' #NC='\033[0m' # No Color print_usage() { echo "" echo "Usage: $PROGNAME [-v be Verbose] [-t Target mount string to check] [-a critical Age for emergency backup partition mount ] [-r failed/success Ratio] [-s Snapshot age treshold override]" echo "Usage: $PROGNAME --help" } print_help() { print_usage echo "" echo "This script runs different checks on NC cPanel Autobackup (see TO-17214)" echo "" echo "-v be Verbose (should be last argument)" echo "-t Target mount string to check, string. Default - /backup" echo "-a critical Age for emergency backup partition mount, seconds. Default - 172800 (48 hours)" echo "-r failed/success Ratio, float. Default - 0.20" echo "-s Snapshot age treshold override, seconds, Default, for regular and resellers - 302400 (84 hours)" echo " for business and premium - 129600 (36 hours)" echo "--help Print this help screen" echo "" exit 3 } while [ $# -gt 0 ]; do case "$1" in --help) print_help exit 3 ;; -h) print_help exit 3 ;; -t) targetstr=$2; shift ;; -a) agetreshold=$2; shift ;; -r) r=$2; ratiotreshold=$(printf "%.2f\n" "$r"); shift ;; -s) snapagetresholdoverride=$2; shift ;; -v) verbose=1; shift;shift ;; *) echo >&2 "Unknown argument: $1" print_usage exit 3 ;; esac shift done # lets check if emergency partition is mounted if [[ ${verbose} == 1 ]]; then echo -e "1. Checking if the emergency partition is mounted" fi emrgchkres=$( findmnt -rno SOURCE "/backup" -t ${fstype} 2>/dev/null ) if [[ ${emrgchkres} != *"${emrgstr}"* ]]; then if [[ ${verbose} == 1 ]]; then echo "Emergency partition is mounted: ${emrgchkres}"; # Then let's check the age of emergency backup partition age=$( grep -A2 /backup /proc/self/mountstats | grep "age" | cut -d":" -f2 | tr -d '[:space:]' ) if [[ ${verbose} == 1 ]]; then echo -e "1.1. Checking the age of emergency backup partition" echo " Age of ${emrgchkres} : ${age} sec ($((age / 3600)) hours)" fi if [[ ${age} -lt ${agetreshold} ]]; then if [[ ${verbose} == 1 ]]; then echo -e "Emergency partition age is OK"; fi else echo -e "Emergency partition age > $((agetreshold / 3600)) hours ($((age / 3600)) hours)"; exit 2 fi fi else if [[ ${verbose} == 1 ]]; then echo -e "OK. Emergency partition is not mounted"; fi fi # find /backup and compare with fstab sourcestr="$(grep "/backup" /etc/fstab | grep -v "#" | awk '{ print $1 }')" mntchkres=$( findmnt -rno SOURCE "${targetstr}" -t ${fstype} 2>/dev/null ) if [ "${mntchkres: -1}" == "/" ]; then # remove last / if presents mntchkres="${mntchkres%?}" fi if [[ ${verbose} == 1 ]]; then echo -e "2. Checking mountpoint ${targetstr} of fstype ${fstype} " echo " Source string from /etc/fstab : ${sourcestr}" echo " Findmnt results : ${mntchkres}" fi if [[ "${mntchkres}" == "${sourcestr}" ]]; then if [[ ${verbose} == 1 ]]; then echo -e "OK. Mountpoint ${targetstr} is mounted according to fstab"; fi else echo -e "Mountpoint ${targetstr} is not mounted"; exit 2 fi # testing connection to HAPI and rsync if [[ ${verbose} == 1 ]]; then echo -e "3. Testing connection to HAPI and rsync (with /root/bin/nc_cp_backup_with_snapshot.sh --test)" fi tstchkres=$( /root/bin/nc_cp_backup_with_snapshot.sh --test ) if [[ ${tstchkres} == *"Connection to HAPI: test OK"* ]] && [[ ${tstchkres} == *"Connection to rsync: test OK"* ]] ; then if [[ ${verbose} == 1 ]]; then echo -e "OK"; echo "${tstchkres}"; fi else echo -e "Connection to HAPI or rsync failed"; echo "${tstchkres}"; exit 2 fi # Checking snapshot freshness if [[ ${verbose} == 1 ]]; then echo -e "4. Checking snapshot freshness (via log at /var/log/nc_audit/nc_cp_backup.log)" fi if [ -z "${snapagetresholdoverride}" ]; then if [[ $(hostname -s) == *"business"* ]] || [[ $(hostname -s) == *"premium"* ]] ; then snapagetreshold=129600 # 36 hours else snapagetreshold=302400 # 84 hours fi else snapagetreshold=${snapagetresholdoverride} fi rawsnapshotage=$( grep -B 1 "snapshot creation api has been triggered" /var/log/nc_audit/nc_cp_backup.log | tail -1 | awk '{print$1" "$2" "$3}' ) snapshotage=$( date -d"${rawsnapshotage}" +%s) snapdiff=$(($(date +%s) - snapshotage)) if [[ ${snapdiff} -lt ${snapagetreshold} ]]; then if [[ ${verbose} == 1 ]]; then echo -e "OK. Snapshot is fresh (created at ${rawsnapshotage})"; fi else echo -e "Snapshot was created more than $((snapagetreshold / 3600)) hours ago ($((snapdiff / 3600)))"; exit 2 fi # Checking success/failed ratio (should be less than 0.2%) if [[ ${verbose} == 1 ]]; then echo -e "5. Checking the success/failed ratio of backed up accounts" fi ratiores=$( grep -B 1 "snapshot creation api has been triggered" /var/log/nc_audit/nc_cp_backup.log | tail -2 | grep "failed:" ) if [[ ${#ratiores} -gt 0 ]]; then if [[ ${verbose} == 1 ]]; then echo "Failed accounts found. Calculating the ratio"; fi totalacc=$( echo "${ratiores}" | awk '{print$11}' ) failedacc=$( echo "${ratiores}" | awk '{print$16}' ) ratio=$( awk -v f="${failedacc}" -v t="${totalacc}" 'BEGIN {print 100*f/t}' ) if (( $( echo "${ratio} > ${ratiotreshold}" | bc -l ) )); then echo -e "Failed ratio is more than treshold (${ratio} > ${ratiotreshold}, failed ${failedacc} from ${totalacc}) "; exit 2 else if [[ ${verbose} == 1 ]]; then echo "Mmm..OK. Failed ratio is not more than treshold (${ratio} < ${ratiotreshold}, failed ${failedacc} from ${totalacc})"; fi fi else if [[ ${verbose} == 1 ]]; then echo -e "OK. No failed accounts. Success ratio - 100% "; fi fi echo "All OK"