Linux服务器定时健康检查,发生故障自动微信告警

时间:2023-03-09 08:17:28
Linux服务器定时健康检查,发生故障自动微信告警

此脚本适用于于各种Linux环境,可以实现各种监控项目,可自定义阀值,实现不同监控效果
已在原有脚本基础上做了简化,提取了主要功能
目前实现的有:
1、磁盘监控
2、内存监控
3、cpu负荷监控
4、进程数监控
5、iptables 状态检查(若防火墙未开启,启动防火墙)
6、多端口检查
其中若一项失败,微信提醒对应失败的监控项

脚本如下所示:

#!/bin/bash

source /etc/profile

Echo_Colour(){
echo -e "[\033[$1;1m$2\033[0m]"
}
Print_Format(){
printf "|%-12s|%15s|%10s|\n" "$1" "$2" "$3"
}
Print_Format2(){
printf "+%-12s+%15s+%10s+\n" "------------" "---------------" "----------"
}
Print_Select(){
Num1="$1"
Num2="$2"
if [ "`echo "$Num1 > $Num2"|bc`" == "" ];then
Print_Format "$3" "$4" "$5"
fi
}
Output_(){
TrueFalse=$
PrintVar=$
PrintResult=$
if [[ -z $TrueFalse && -n "$PrintVar" ]];then
PrintVar=`printf "%-45s" "${PrintVar}"`
echo -n -e "\033[32;49;1m[`date +%F\ %T`]\t${PrintVar}\033[39;49;0m"
elif [[ -n $TrueFalse && "$TrueFalse" == "" ]];then
Echo_Colour "" "$PrintResult"
elif [[ -n $TrueFalse && "$TrueFalse" == "" ]];then
Echo_Colour "" "$PrintResult"
else
Echo_Colour "" "syntax error";exit
fi
}
Output_Select(){
Num1="$1"
Num2="$2"
PrintTrue="$3"
PrintFalse="$4"
RetrunExit="$5"
if [ "$Num1" == "$Num2" ];then
Output_ "" "" "$PrintTrue"
else
Output_ "" "" "$PrintFalse"
$RetrunExit
fi
} weixin(){
#CropID 企业Id
#Secret 管理组的凭证密钥
CropID="wx80179d3a3eb67***"
Secret="ZyqFs4qfUiXcz8plHFbhCWkF3JEjj7vASkZjs8YTRqKxq1yAx-U46foyNXNKz2qw"
GURL="https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid=$CropID&corpsecret=$Secret"
#AccessToken是企业号的全局唯一票据,调用接口时需携带AccessToken
Gtoken=$(/usr/bin/curl -s -G $GURL | awk -F\" '{print $4}')
PURL="https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token=$Gtoken"
Content=`cat $TMP`
curl -l -H "Content-type: application/json" -X POST -d "{\"touser\":\"@all\",\"msgtype\":\"text\",\"toparty\":\"1\",\"agentid\":\"1\",\"text\":{\"content\": \"Server Check Monitor:\n$Content\"}, \"safe\":\"0\"}" $PURL &>/tmp/weixin.log
} server_check(){ Cpucore="`cat /proc/cpuinfo | grep -c processor`"
Cpuload="`uptime|awk -F, '{print $(NF-1)}'|tr -d " "`"
Cpuidle="`vmstat|awk '/[0-9]+/{print $(NF-2)}'`"
MemTotal="`free -m|awk '/Mem:/{print $2}'`"
MemUse="`free -m|awk '/-\/+/{print $3}'`"
MemFree="`echo "scale=;($(($MemTotal-$MemUse))) / $MemTotal*"|bc|cut -d. -f1`"
SwapTotal="`free -m|awk '/Swap:/{print $2}'`"
SwapUse="`free -m|awk '/Swap/{print $3}'`"
SwapFree="`echo "scale=;($(($SwapTotal-$SwapUse))) / $SwapTotal*"|bc|cut -d. -f1`"
DiskUse1="`df -h|awk '/\/$/{print $(NF-3)}'`"
DiskUse1Free="`df -h|awk '/\/$/{print $(NF-1)}'|tr -d "%"`"
DiskUse1Free="`echo "scale=; - $DiskUse1Free"|bc`"
DiskUse2="`df -h|awk '/\/data$/{print $(NF-3)}'`"
DiskUse2Free="`df -h|awk '/\/data$/{print $(NF-1)}'|tr -d "%"`"
DiskUse2Free="`echo "scale=; - $DiskUse2Free"|bc`"
eth0Link="`ethtool eth0|awk '/Link/{print $NF}'`"
eth0Speed="`ethtool eth0|awk '/Speed/{print $NF}'`"
eth0Duplex="`ethtool eth0|awk '/Duplex/{print $NF}'`"
eth1Link="`ethtool eth1|awk '/Link/{print $NF}'`"
eth1Speed="`ethtool eth1|awk '/Speed/{print $NF}'`"
eth1Duplex="`ethtool eth1|awk '/Duplex/{print $NF}'`" if `/sbin/iptables -L -n|grep -q "Chain INPUT (policy DROP)"` ;then
Iptables=Yes
Iptables_bc=
else
/bin/sh /data/shelltools/web_iptable.sh >>$LogFile >&
if `/sbin/iptables -L -n|grep -q "Chain INPUT (policy DROP)"`;then
Iptables=Yes
Iptables_bc=
else
Iptables=No
Iptables_bc=
fi
fi
} Game_Check(){
GameOnLineNum=`netstat -ntp|awk '/ESTABLISHED/{print $4}'|grep -P -c ":9200|:9300"`
GameJavaProNum="`jps |grep -c "Server"`"
GameTomcatProNum="`jps |grep -c Bootstrap`"
GameNginxProNum="`ps -ef |grep -v grep|grep -c "nginx: master process"`"
} Send_Warning(){
TMP=`mktemp`
echo "${Site}:${IP} ERROR" >>$TMP >&
Print_Format2 >>$TMP >&
Print_Format "Site" "Name" "Warning" >>$TMP >&
Print_Format2 >>$TMP >&
Print_Select "$Cpuload" "$Cpucore" "$Site" "CPU Load" "$Cpuload" >>$TMP >&
Print_Select "" "$Cpuidle" "$Site" "CPU Idle" "$Cpuidle%" >>$TMP >&
Print_Select "" "$MemFree" "$Site" "Mem Idle" "$MemFree%" >>$TMP >&
Print_Select "" "$DiskUse1Free" "$Site" "/" "$DiskUse1Free%" >>$TMP >&
Print_Select "" "$DiskUse2Free" "$Site" "/data" "$DiskUse2Free%" >>$TMP >&
Print_Select "" "$GameJavaProNum" "$Site" "JavaPro" "$GameJavaProNum" >>$TMP >&
Print_Select "" "$GameTomcatProNum" "$Site" "TomcatPro" "$GameTomcatProNum" >>$TMP >&
Print_Select "" "$GameNginxProNum" "$Site" "NginxPro" "$GameNginxProNum" >>$TMP >&
Print_Select "" "$Iptables_bc" "$Site" "iptables" "$Iptables_bc" >>$TMP >&
nc -nvz -w $IP >>$LogFile >&
Print_Select "$?" "" "$Site" "$IP" "Web 80 Port Fail" >>$TMP >&
nc -nvz -w $IP >>$LogFile >&
Print_Select "$?" "" "$Site" "$IP" "SSH 22 Port Fail" >>$TMP >&
Print_Format2
sed -i '/ 2: parse error/d' $TMP
if [ `cat $TMP|wc -l` -gt ];then
cd /tmp/
weixin
fi
#cat $TMP
rm -f $TMP
} main()
{
LogFile=/root/check.log
Site=`hostname`
IP=`ifconfig eth0|grep "inet addr"|awk -F: '{print $2}'|awk '{print $1}'`
Alarm=`awk -F":" '/alarm/{print $2}' /root/config |sed 's/ //g'`
server_check >>/dev/null
Game_Check >>/dev/null
if [ $Alarm = ON ];then
Send_Warning
fi
} main #取消微信告警
#echo alarm:OFF > /root/config

auto_Check.sh

需要微信告警执行:

#echo alarm:ON > /root/config 【此为微信告警开关】

效果图如下(Java进程低于5个、80端口不通,微信告警):

Linux服务器定时健康检查,发生故障自动微信告警