我的收藏

AIX主机监控脚本

上一篇 / 下一篇  2008-03-19 08:42:54 / 个人分类:经典脚本

#!/bin/sh

#
K:AE(g%d+`z0# 该脚本的作用是检查主机的负载情况,对于超出阀值的项目,发送短信告警到相关人员的手机。ITPUB个人空间s*o+{V^%@e/?y\
# 当前网管系统实际上已经具备这样的功能,但有误告或不告的情况,故以此脚本作为补充。
5l.d&B(Vcd:PW Y0# 作者:niyl
^n!d5QB,EAV(V0# 日期:2008-03-05
UT7d1n2?0Hr4^0#

#ITPUB个人空间qej,b(dggN
# 首先载入配置文件,如果配置文件不存在,则退出ITPUB个人空间:VL ]L!A'?9m
# 配置文件中包含相关资源的阀值定义以及Oracle用户的环境变量设置
,XMV JG tF0# 如果有其他的环境变量需要设置,也可以在此配置文件中进行设置ITPUB个人空间uW@8a F e
#
o ETW _v3V0HOME_DIR=/home/niyl/smsalert

SOURCE=$HOME_DIR/env.confITPUB个人空间3`5FS:}\AE-N,D Xi
check_source()
)E)YE#fw9z|Z1E0{ITPUB个人空间e5@F:R;E4epi
if [ -r $SOURCE ]; thenITPUB个人空间` m3d6m8w$\%T]'f
    . $SOURCEITPUB个人空间sF"p5tI"}
elseITPUB个人空间[ {%M%u4a? yFS
    echo "$(basename $0): Cannot locate the default setting file."ITPUB个人空间n-G { L6j `*W
    exit 1
pq2l?#NX:J0fiITPUB个人空间W/Duu:S4Q
}

#
B%E]-@1{!}-dF0# 定义一个发送告警短信的函数,函数调用格式: sendmessage "短信内容"
/[2s_/Z.jt V6M0# 该函数将读取手机列表配置文件phone.conf中的手机号码,逐个发送告警短信。
o/O7T sh7vU@?-I0#ITPUB个人空间US6L m;rF
sendmessage() {
oP-tlq/H?0cd $HOME_DIR
kk*e1K[+p6]@0cat phone.conf | grep -v ^# | while read line
1I+Z8I)}i9U0doITPUB个人空间 U#Wh&I:f9b
./send_msg $line "$1"ITPUB个人空间_6YOZ%`1\K$c:|
doneITPUB个人空间}HP7M:Gi1["x;[Gs
}

#ITPUB个人空间x&ae"x~K
# 获取主机的IP地址
x{eHy5]/I9c0#ITPUB个人空间 K&PX&u%~
EN_NUM=$(netstat -rn | grep default | awk '{print $6}')
[!l+vxFi)C0HOSTIP=$(ifconfig $EN_NUM | sed -n '2p' |awk '{print $2}')

#
}HW8a%r;}0# 定义报表头ITPUB个人空间0`@ik:eeu:iy
#
;y9?8z |-Q/s7e]0report_header()ITPUB个人空间{!R'vydT/QL
{
+sql {:m vy{(v0HOSTNAME=$(hostname)
,tX"B*M)]}0USER=`whoami`
DWey n#iM2Lh0cat<<!ITPUB个人空间#M |-r [v k H
*****************************************************************************************ITPUB个人空间 wyo^4K
* Hostname: $HOSTNAME  Server: $HOSTIP
tI)]&z`&h%T Aw0* User: $USER   Time: $(date +%Y'-'%m'-'%d' '%H':'%M':'%S)
R e:BEF"f5?J g4p Aj$~0*           ITPUB个人空间7} d?;e(f_(akQ
*                               SYSTEM CHECK REPORTITPUB个人空间#Z7l:E3z8_3Q(N
*                               ===================ITPUB个人空间 UnuE.k e0{
*****************************************************************************************ITPUB个人空间2]H$?3u+i"A3VmD.j
!ITPUB个人空间+L8X(i%T&p.^
}

ITPUB个人空间4B[^ Dbe)Z5^
#ITPUB个人空间T @.Q/p|;YUpd
# 定义脚本输出的临时目录,如果不存在,则自动创建一个新的目录ITPUB个人空间G y'd$x.i ?1_ku:z0J3H
#
%?t5?*}9eA.K y0TEMP_DIR=$HOME_DIR/tmpITPUB个人空间t3WF6c4d:Y1L Nd,P
if [ -d $TEMP_DIR ];then
1\ [9IO]0 :ITPUB个人空间$_3q,VVi
elseITPUB个人空间s(XaU9j
 mkdir $TEMP_DIRITPUB个人空间 V,?/VJ~FE
fi

#
J(b g,MZq/dj"L3IT0# 载入环境设置
%V1T9qfG Oa F0#
f,dp QB ~1Y0check_source

#
~enZ$F-G9_0# 输出报表头信息
uXqv#s!}-lr0#
/` R'g8g |;X&u BRr0report_header

#
0qH Ey'DK0# 检查 CPU的使用情况,这里使用vmstat命令ITPUB个人空间JJ3SB1Mz/UsQ+Z0f
#
jn9S3FJ3O-Z9KB0echoITPUB个人空间]+W3F4a+p0c8{5YF8aS
echo "---------------------------- Check CPU Useage ----------------------------"
v4}EJ&mFR v8u0vmstat 1 10 | awk '{print $0;if($1 ~ /^[0-9].*/) (totalcpu+=$16);(avecpu=100-totalcpu/10)}; END {print "The average usage of cpu is :"avecpu}' >$TEMP_DIR/cpu_infoITPUB个人空间-X0Z)S)Op A~ e:Q2g2?/b
cat $TEMP_DIR/cpu_infoITPUB个人空间 Sy:`8m$_ C
cpu_used_pct=`cat $TEMP_DIR/cpu_info | grep "The average usage of cpu is" |awk -F ":" '{print $2}' `
3z"m?w,S3p7Z,W.A0if [ "$cpu_used_pct" -gt "$CPU_VALUE" ] ; thenITPUB个人空间I ll5Ms$s2g
    echo "LOG-Warnning:`date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The CPU load is up to ${cpu_used_pct}%.Please check!!"ITPUB个人空间8b w+S2Rn8`DM
    sendmessage "Host:$HOSTIP The CPU load is up to ${cpu_used_pct}%.Please check!"ITPUB个人空间'Ce$dw)R[M O
else
br5h L`X0    echo "\t\t\t\t The CPU load is OK."
!g~eF&};l0fi

#ITPUB个人空间M @-x4^6X
# 内存使用监控,包括交换区的使用情况监控
K-I.m^3vdl)btK2g0#                                 
)i4k8^+s[%A)k QU0echo
F g9e3b6T/@0echo "---------------------------- Check Memory Useage ----------------------------"ITPUB个人空间dFF@!pE
svmon -G >$TEMP_DIR/mem_info
:wmz+u1f6{o0cat $TEMP_DIR/mem_infoITPUB个人空间4| j{.nE8xw
TOTAL_MEM=$(cat $TEMP_DIR/mem_info |grep "memory" |awk '{print $2}')
q9dDE)Rm~-d!_\ G0INUSE_MEM=$(cat $TEMP_DIR/mem_info |grep "memory" |awk '{print $3}')ITPUB个人空间 {e.u*{)Y
MEM_PCT=$((100*$INUSE_MEM/$TOTAL_MEM))
[0sZr X&u-T;f0echo $MEM_PCTITPUB个人空间{gQg.`
if [ "$MEM_PCT" -gt "$MEM_VALUE" ]; thenITPUB个人空间zN`rm/C#A0W
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The memory load is up to ${MEM_PCT}%.Please check!!"
L6[.c;iC lNt9Vg0    sendmessage "Host:$HOSTIP The memory load is up to ${MEM_PCT}%.Please check!"ITPUB个人空间.zQAE E7Y
elseITPUB个人空间Jh'j n8n r6Dksm
    echo "\t\t\t\t The Memory load is OK."
'~X(V-A[ojrP0fi

TOTAL_SWAP=$(cat $TEMP_DIR/mem_info |grep "pg space" |awk '{print $3}')
oi;e ^0uY:?@0INUSE_SWAP=$(cat $TEMP_DIR/mem_info |grep "pg space" |awk '{print $4}')ITPUB个人空间[L3F h!i,oL
SWAP_PCT=$((100*$INUSE_SWAP/$TOTAL_SWAP))ITPUB个人空间G0mhD/Ft!N_
if [ "$SWAP_PCT" -gt "$SWAP_VALUE" ]; thenITPUB个人空间l@2_(]m F1j/K ]2zPM!d}
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The swap load is up to ${SWAP_PCT}%.Please check!!"
7i'l ?1JZ(y0    sendmessage "Host:$HOSTIP The swap load is up to ${SWAP_PCT}%.Please check!"
BU0? Np4[Q%z:aGM0else
I s#MxFU$u0    echo "\t\t\t\t The swap load is OK."
j:Qt@(^qdRF ]9e0fi


o\ t7A'V$X7zC!f0# 检查磁盘空间.
SyyWg1t;b0echo
1D!a6`6D'dpr0echo "---------------------------- Check Disk ----------------------------"ITPUB个人空间2LQ*W,E#W oY4C;~ b l
df -k |grep -v proc |grep -v Filesystem |awk '{x=1*$4}{print $1","$2","$3","$4","$5","$6","$7}'>$TEMP_DIR/disk_infoITPUB个人空间{1FgApoZ
df -k ITPUB个人空间*_`5R_v$?!W.r(h.zc
diskflag=0
rvn:I!B{j0cat $TEMP_DIR/disk_info | grep -v '^#' | while read lineITPUB个人空间9_;t aF^T hfT4e
doITPUB个人空间Wc|+P4U+k2T
item1=$(echo $line | awk -F ',' '{print $1}')ITPUB个人空间4TK/zZ$n:r7j%w
item2=$(echo $line | awk -F ',' '{print $2}')ITPUB个人空间fH DA.|vx(M)Qpu7} @
item3=$(echo $line | awk -F ',' '{print $3}')ITPUB个人空间.j+L^D6da$`5t8i
item4=$(echo $line | awk -F ',' '{print $4}' |awk -F '%' '{print $1}')
Hc-L K:J&a9A0item5=$(echo $line | awk -F ',' '{print $5}')ITPUB个人空间+W6H/E:Yu+ZC9G$t C
item6=$(echo $line | awk -F ',' '{print $6}')
v1U Y]n0item7=$(echo $line | awk -F ',' '{print $7}')ITPUB个人空间i%^Z(eB
if [ "$item4" -gt "$DISK_VALUE" ]; thenITPUB个人空间z[8wW"N-yKa
    diskflag=1ITPUB个人空间8` Lg:VdfT~3olW
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The file system $item7 is up to ${item4}%.Please check!!"
/Ee!R+q!nL7M+b:~0    sendmessage "Host:$HOSTIP The file system $item7 is up to ${item4}%.Please check!"ITPUB个人空间EF3KXo]%n&F3{,z*C
fi
@7q5j0[8r?0done
6q0UV R"t0if [ $diskflag -eq 0 ]; then
:p1RH4fxn0    echo "\t\t\t\t All file system is OK."
p _%F:cya0fi

#ITPUB个人空间Q$Kd[IQq/U
# 检查磁盘的io进行监控,iostat
z K"GHF3LE0#
*n;ll;J'j,o w"_ x0echo
dq!WNu~L0echo "---------------------------- Check iostat ----------------------------"
pt+@2u*V0iostat 1 1 | grep -v "0." >$TEMP_DIR/iostat_infoITPUB个人空间S%HlK~:I_a
diskflag=0
:m8U#O4@-gzP b0cat $TEMP_DIR/iostat_info |grep -v "tm_act" |grep -v Disks |grep -v "tty:" |awk '{print $1,$2}' |while read line
G \_0q@8eW0doITPUB个人空间6kf#?h;_.z
hdisk=$(echo $line | awk '{print $1}')ITPUB个人空间M.@bgY?u
tm_act=$(echo $line | awk '{print $2}' | awk -F '.' '{print $1}')ITPUB个人空间 `Nl!OmKfm
if [ "$tm_act" -gt "$IOLOAD_VALUE" ]; then
]Pc+l#}0    diskflag=1ITPUB个人空间vld5Y7Rv'n'\
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The hdisk $hdisk is up to $tm_act.Please check!!"
'V!b;X"C5{"vX fM6G$[0    sendmessage "Host:$HOSTIP The hdisk $hdisk is up to $tm_act.Please check!"
*quY2pO{$s0fi
4P-g` G5]y c!C Dk0l0doneITPUB个人空间 s z'fv-J2s'l7q
if [ $diskflag -eq 0 ]; thenITPUB个人空间P T3l6Ui&y7v
    echo "\t\t\t\t All disk load is OK."
#tU(W t*n \F9k0fi

#ITPUB个人空间-q W&kxr d8Y*iLE*t
# 对网络流量进行监控
&D[ |.es*}B%ke.Qa0#ITPUB个人空间[w"}V)pB.IG
echo
9zY_'t G?1pG%`8o"r0echo "---------------------------- Check netstat ----------------------------"ITPUB个人空间~7]1@5UwJb6P"[t`
netstat -i >$TEMP_DIR/netstat_infoITPUB个人空间5`6hG2UN%v.IJ
cat $TEMP_DIR/netstat_info
^$`!uedx!NmfV0netflag=0ITPUB个人空间e%uIs2I1o3Go+ky
cat $TEMP_DIR/netstat_info |grep -v "Ipkts" |grep -v "lo0" |awk '{print $6,$8}' |while read line
j0\k"wq;M-x0doITPUB个人空间9uj9}'Q4M
ierrs=$(echo $line | awk '{print $1}')
3X5uTd+E Ta0_0oerrs=$(echo $line | awk '{print $2}')
fu3E^/^Qh9g/T0if [ "$ierrs" -gt "$NETERR_VALUE" -o "$oerrs" -gt "$NETERR_VALUE" ]; then
z5atI [4rwq#l0    netflag=1ITPUB个人空间jXF3^Jof
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The network has send too many error packages.Please check!!"ITPUB个人空间ju!{2F's
    sendmessage "Host:$HOSTIP The network has send too many error packages.Please check!"
z)q7jy2kh*y*j)M z*O0fiITPUB个人空间 b1i,Bs J@)A+s,vx k v@
done
U9S+O0_Q6K5A0if [ $netflag -eq 0 ]; then
!WEDQN*_6y0    echo "\t\t\t\t The network is OK."
?w#x]KJ*|c0fi

#ITPUB个人空间f1[[e_\0lL)q
# Check HACMP.                     ITPUB个人空间T(u)e$Z&QY,Ji
#ITPUB个人空间E To~?AF:m d Yp ha
echo
}*\,H&~h4H0echo "---------------------------- Check HACMP status ----------------------------"
-h\A-T!z P ?]0/usr/es/sbin/cluster/clstat -o > $TEMP_DIR/ha_infoITPUB个人空间B?9T,D d"i
lssrc -g cluster >> $TEMP_DIR/ha_infoITPUB个人空间D-f H5}Z;S
echoITPUB个人空间6ah%x s!t5h&SCYM e
cat $TEMP_DIR/ha_info ITPUB个人空间}lC%OP[CM
cat $TEMP_DIR/ha_info| grep "Node:" |awk -F ':' '{print $2,$3}' | awk '{print $1,$3}' | while read line
3WNo v df y0do
6e8K2twtMFnA Kq0node=$(echo $line | awk '{print $1}')ITPUB个人空间-f7ah#?6]/t[4C,xy$M3l
echo $line |grep UP$ >/dev/nullITPUB个人空间kd9ql4dsr
if [ "$?" -eq 0 ]; then
AvG4S7]$h#s~0 echo "\t\t\t\t The cluster node $node is OK." 
Ip~5vc/gt(?0elseITPUB个人空间)gHp)Xx tI/C bf#n+a
 echo "`date +%Y'-'%m'-'%d' '%H':'%M':'%S`,LOG-Warnning: The cluster node $node is abnormal.Please check!!" 
$dW oQ)\ j^0     sendmessage "Host:$HOSTIP The cluster node $node is abnormal.Please check!"
|0Z;}0_L0fi
*l5A(e/KE0done

#
k*W}a!N$uS0# Check system error report.ITPUB个人空间r0_P8xgTb P
#ITPUB个人空间6c TY-opQ4B
echoITPUB个人空间GW@;[R$@,A9l1w%m0CU5y
echo "---------------------------- Check System Err ----------------------------"ITPUB个人空间$a^+~IoO,{
TIME=$(date +%m%d%H%M%y)ITPUB个人空间b&TI&D4um n'TlK
errnum=$(errpt -s $TIME |wc -l)
:@e Jn)X|)Z5Qm0if [ "$errnum" -eq 0 ] ; then
R${*Dz"HeT'o4S0    echo "\t\t\t\t There was no errpt today."ITPUB个人空间V F:m[1^3V1jD
else
rr7q+b7Y!z S:_%U0    echoITPUB个人空间)XW/kL Qt*o
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,There was errpt alert.Please check!"
YLMYN_1d;b%HX0    sendmessage "Host:$HOSTIP There was errpt alert.Please check!"ITPUB个人空间,_(q^v;f#}
fi

cd $TEMP_DIRITPUB个人空间DT_g^7I9q7r
rm -f cpu_infoITPUB个人空间AX#b$D d eQ.^#Y
rm -f disk_info
3@&In~Cy;~$] k0x0rm -f ha_info
X'mgJ4w;m b0rm -f iostat_infoITPUB个人空间E"O_,_` z(C4J~
rm -f netstat_info
N"P X-T&Z6[0rm -f swap_infoITPUB个人空间GL|(N5Nwj,D I [
rm -f mem_info


TAG:

引用 删除 jameshd   /   2008-07-08 14:25:13
非常感谢!
一亩三分西瓜地 引用 删除 BTxigua   /   2008-07-08 13:56:06
在检查errpt 部分的脚本有点问题。
#
# Check system error report.
#
echo
echo "---------------------------- Check System Err ----------------------------"
TIME=$(date +%m%d%H%M%y)
errnum=$(errpt -s $TIME |wc -l)
if [ "$errnum" -eq 0 ] ; then
    echo "\t\t\t\t There was no errpt today."
else
    echo
    echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,There was errpt alert.Please check!"
    sendmessage "HostHOSTIP There was errpt alert.Please check!"
fi

这里的TIME=$(date +%m%d%H%M%y)
不应该这样设置,这样的结果是永远都没有errpt错误日志,因为取的是当前时间之后的报错,当然不可能存在。
所以应该修改为:
TIME=$(date +%m%d'0000'%y)
一亩三分西瓜地 引用 删除 BTxigua   /   2008-07-08 13:53:46
是的。我的配置如下:
#more env.conf
# 定义cpu阀值
#
CPU_VALUE=80;export CPU_VALUE

#
# 定义磁盘空间阀值
#
DISK_VALUE=90;export DISK_VALUE

#
# 定义表空间阀值
#
TS_VALUE=90;export TS_VALUE

#
# 定义交换阀值
#
SWAP_VALUE=60;export SWAP_VALUE   

#                              
# 定义内存阀值                 
#                              
MEM_VALUE=95;export MEM_VALUE

#
# 定义磁盘IO繁忙率阀值
#
IOLOAD_VALUE=80;export IOLOAD_VALUE

#
# 定义网络错误包数量阀值
#
NETERR_VALUE=10000;export NETERR_VALUE

# 设置ORACLE的环境变量
# ORACLE_BASE= ;export ORACLE_BASE
# ORACLE_HOME= ;export ORACLE_HOME
# ORACLE_SID= ;export ORACLE_SID
# PATH= ;export PATH

# CLASSPATH= ;export CLASSPATH
# SHLIB_PATH= ;export SHLIB_PATH
引用 删除 jameshd   /   2008-07-08 10:49:53
env.conf
是env的环境变量吗?
 

评分:0

我来说两句

显示全部

:loveliness: :handshake :victory: :funk: :time: :kiss: :call: :hug: :lol :'( :Q :L ;P :$ :P :o :@ :D :( :)

Open Toolbar