歡迎來到Linux教程網
Linux教程網
Linux教程網
Linux教程網
Linux教程網 >> Linux基礎 >> 關於Linux >> AIX主機監控腳本

AIX主機監控腳本

日期:2017/3/1 14:50:41   编辑:關於Linux
AIX主機監控腳本 該腳本包含對主機的CPU、MEMROY、IO、NET、HACMP、ERROR REPORT監控。 對數據庫的表空間、JOB、ALERT LOG等的監控。 001 #!/bin/sh 002 003 # 首先載入配置文件,如果配置文件不存在,則報錯退出 004 SOURCE=$HOME/config/config 005 check_source() 006 { 007 if [ -r $SOURCE ]; then 008 . $SOURCE 009 else 010 echo "$(basename $0): Cannot locate the default setting file." 011 exit 1 012 fi 013 } 014 015 # 定義報表頭 016 report_header() 017 { 018 HOSTIP=$(ifconfig -a | sed -n '2p' |awk '{print $2}') 019 HOSTNAME=$(hostname) 020 USER=`who am i | cut -d " " -f1` 021 cat<Hostname: $HOSTNAME Server: $HOSTIP 022 User: $USER Time: $(date +%Y'-'%m'-'%d' '%H':'%M':'%S) 023 024 SYSTEM CHECK REPORT 025 =================== 026 027 ! 028 } 029 030 # 定義日志文件存放的目錄和日志文件名,將當前用戶目錄設置為LOG_PATH 031 LOG_PATH=$(echo $HOME) 032 LOG_FILE=$LOG_PATH/log`date +%Y%m%d%H%M%S` 033 034 # 備份歷史文件 035 cd $LOG_PATH 036 test -f log2007* 037 if [ "$?" -eq 0 ];then 038 mv $LOG_PATH/log2007* $LOG_PATH/niyl/ >/dev/null 2>&1 039 else 040 : 041 fi 042 043 #define temp directory ,if not exist,create temp directory first. 044 TEMP_PATH=$LOG_PATH/temp 045 if [ -d $TEMP_PATH ];then 046 : 047 else 048 mkdir $TEMP_PATH 049 fi 050 051 # 載入環境設置 052 check_source 053 054 # 輸出報表頭信息 055 report_header >>$LOG_FILE 056 057 058 # 檢查 CPU的使用情況 059 echo "***************************************** Check CPU *****************************************">>$LOG_FILE 060 vmstat 1 10 | awk '{print $0;if($1 ~ /^[0-9].*/) (totalcpu+=$16);(avecpu=100-totalcpu/10)}; END {print "The average usage of cpu is :"avecpu}' >$TEMP_PATH/cpu_info 061 062 cat $TEMP_PATH/cpu_info >>$LOG_FILE 063 064 cpu_used_pct=`cat $TEMP_PATH/cpu_info | grep "The average usage of cpu is" |awk -F ":" '{print $2}' ` 065 if [ "$cpu_used_pct" -gt "$CPU_VALUE" ] ; then 066 echo "LOG-Warnning:`date +%Y'-'%m'-'%d' '%H':'%M':'%S`, The CPU usage is up to $cpu_used_pct%. Please check the system.">>$LOG_FILE 067 else 068 echo " The CPU load is OK!!">>$LOG_FILE 069 fi 070 071 072 # 內存使用監控,包括交換區的使用情況監控 073 echo >>$LOG_FILE 074 echo >>$LOG_FILE 075 echo "***************************************** check memory useage *****************************************">>$LOG_FILE 076 cat $TEMP_PATH/cpu_info | awk '{print $0;if($1 ~ /^[0-9].*/) (totalpi+=$6)(totalpo+=$7)}; 077 END {if(totalpi<10 && totalpo<10) print " The memory usage is OK!!"; if(totalpi>10 || totalpo>10) print "The memory pagein and pageout is to high,Please check the usage of the memory!"} '>>$LOG_FILE 078 079 080 # 檢查磁盤空間. 081 echo >>$LOG_FILE 082 echo >>$LOG_FILE 083 echo "***************************************** check disk space *****************************************">>$LOG_FILE 084 df -k >>$LOG_FILE 085 df -k |grep -v proc |grep -v Filesystem |awk '{x=1*$4}{print $1","$2","$3","$4","$5","$6","$7}'>$TEMP_PATH/disk_info 086 087 cat $TEMP_PATH/disk_info | grep -v '^#' | while read line 088 do 089 item1=$(echo $line | awk -F ',' '{print $1}') 090 item2=$(echo $line | awk -F ',' '{print $2}') 091 item3=$(echo $line | awk -F ',' '{print $3}') 092 item4=$(echo $line | awk -F ',' '{print $4}' |awk -F '%' '{print $1}') 093 item5=$(echo $line | awk -F ',' '{print $5}') 094 item6=$(echo $line | awk -F ',' '{print $6}') 095 item7=$(echo $line | awk -F ',' '{print $7}') 096 if [ "$item4" -gt "$DISK_VALUE" ]; then 097 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`, $item7 is not have enough space ,please check." >>$LOG_FILE 098 else 099 echo " The space of disk $item7 is OK!!" >>$LOG_FILE 100 fi 101 done 102 103 # 104 # 檢查磁盤的io進行監控,iostat 105 # 106 echo >>$LOG_FILE 107 echo >>$LOG_FILE 108 echo "***************************************** check iostat *****************************************">>$LOG_FILE 109 iostat 1 3 >>$LOG_FILE 110 111 # 對網絡流量進行監控 112 echo >>$LOG_FILE 113 echo >>$LOG_FILE 114 echo "***************************************** check netstat *****************************************">>$LOG_FILE 115 netstat -i >>$LOG_FILE 116 117 # Check the oracle background processes . 118 echo >>$LOG_FILE 119 echo >>$LOG_FILE 120 echo "***************************************** check oracle process *****************************************">>$LOG_FILE 121 ps -ef | grep ora_ | grep -v grep | awk -F '-' '{print $2}' | awk '{print $2}' >/$TEMP_PATH/ora_process_info 122 ps -ef | grep ora_ | grep -v grep >>$LOG_FILE 123 124 # background process ckpt 125 if [ `grep ora_ckpt_ora92 $TEMP_PATH/ora_process_info` ]; then 126 COUNT=1 127 else 128 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_ckpt_ora92 was terminated!" >>$LOG_FILE 129 fi 130 131 # background process dbwr 132 if [ `grep ora_dbw0_ora92 $TEMP_PATH/ora_process_info` ]; then 133 COUNT=$((COUNT+1)) 134 else 135 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_dbw0_ora92 was terminated !" >>$LOG_FILE 136 fi 137 138 # background process reco 139 if [ `grep ora_reco_ora92 $TEMP_PATH/ora_process_info` ]; then 140 COUNT=$((COUNT+1)) 141 else 142 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_reco_ora92 was terminated !" >>$LOG_FILE 143 fi 144 145 # background process lgwr 146 if [ `grep ora_lgwr_ora92 $TEMP_PATH/ora_process_info` ]; then 147 COUNT=$((COUNT+1)) 148 else 149 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_lgwr_ora92 was terminated !" >>$LOG_FILE 150 fi 151 152 # background process pmon 153 if [ `grep ora_pmon_ora92 $TEMP_PATH/ora_process_info` ]; then 154 COUNT=$((COUNT+1)) 155 else 156 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_pmon_ora92 was terminated !" >>$LOG_FILE 157 fi 158 159 # background process smon 160 if [ `grep ora_smon_ora92 $TEMP_PATH/ora_process_info` ]; then 161 COUNT=$((COUNT+1)) 162 else 163 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Process ora_smon_ora92 was terminated !" >>$LOG_FILE 164 fi 165 166 if [ "$COUNT" -eq 6 ];then 167 echo >>$LOG_FILE 168 echo " The main six Oracle processes is OK !!" >>$LOG_FILE 169 else 170 : 171 fi 172 173 174 # 175 # Check the oracle tablespace. 176 # 177 echo >>$LOG_FILE 178 echo >>$LOG_FILE 179 echo "***************************************** check oracle tablespace *****************************************">>$LOG_FILE 180 #su - oracle -c sqlplus dxh/dxh < /home/guest/dxhwh/niyl/tablespace_query.sql >>$LOG_FILE 181 sqlplus -s dxh/dxh < $TEMP_PATH/ts_info 182 set pagesize 100 183 set linesize 100 184 col status for a10 185 col tablespace_name for a20 186 col contents for a10 187 col "size(M)" for a15 188 col used for a15 189 col pct for a10 190 select d.status, d.tablespace_name, 191 TO_CHAR(NVL(a.bytes / 1024 /1024, 0),'99G999G990') "size(M)", 192 TO_CHAR(NVL(a.bytes - NVL(f.bytes, 0),0)/1024/1024, '99G999G990D00') used, 193 TO_CHAR(NVL((a.bytes - NVL(f.bytes, 0)) / a.bytes * 100, 0), '990D00')||'%' pct 194 FROM sys.dba_tablespaces d, 195 (select tablespace_name, sum(bytes) bytes from dba_data_files group by tablespace_name) a, 196 (select tablespace_name, sum(bytes) bytes from dba_free_space group by tablespace_name) f 197 WHERE d.tablespace_name = a.tablespace_name(+) 198 AND d.tablespace_name = f.tablespace_name(+) 199 order by tablespace_name ; 200 exit 201 !EOF 202 203 cat $TEMP_PATH/ts_info>>$LOG_FILE 204 cat $TEMP_PATH/ts_info |grep ONLINE |awk '{print $2":"$3":"$4":"$5}' |while read line 205 do 206 ts_name=$(echo $line |awk -F ':' '{print $1}') 207 ts_total=$(echo $line |awk -F ':' '{print $2}') 208 ts_used=$(echo $line |awk -F ':' '{print $3}') 209 ts_used_pct=$(echo $line |awk -F ':' '{print $4}' |awk -F '%' '{print $1}'|awk -F '.' '{print $1}') 210 if [ "$ts_used_pct" -gt "$TS_VALUE" -o "$ts_used_pct" -eq "$TS_VALUE" ]; then 211 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,表空間$ts_name 的剩余空間緊張,請盡快清理表空間!" >>$LOG_FILE 212 else 213 echo " The tablespace of $ts_name is OK!!" >>$LOG_FILE 214 fi 215 done 216 217 # 218 # Check the oracle Job. 219 # 220 echo >>$LOG_FILE 221 echo >>$LOG_FILE 222 echo "***************************************** check oracle job *****************************************">>$LOG_FILE 223 sqlplus -s dxh/dxh <> $LOG_FILE 224 col job for 999 225 col last_date for a20 226 col next_date for a20 227 col what for a40 228 set linesize 120 229 230 select job,what, 231 to_char(last_date,'yyyy-mm-dd hh24:mi:ss') last_date, 232 to_char(next_date,'yyyy-mm-dd hh24:mi:ss') next_date, 233 failures 234 from dba_jobs 235 order by job; 236 !!ET 237 238 sqlplus -s dxh/dxh < $TEMP_PATH/job_info 239 col flag for a5 240 col rou for 99999 241 select 'XXX' flag,job,failures,broken,round(next_date-sysdate,2)*100 rou from dba_jobs order by job; 242 !EOF 243 244 cat $TEMP_PATH/job_info |grep XXX |awk '{print $2,$3,$4,$5}' |while read line 245 do 246 jobnum=`echo $line | awk '{print $1}'` 247 failure=`echo $line | awk '{print $2}'` 248 broken=`echo $line | awk '{print $3}'` 249 round=`echo $line | awk '{print $4}'` 250 if [ "$jobnum" -eq 3 -o "$jobnum" -eq 4 ] ; then 251 252 if [ "$failure" -eq 0 -a "$broken"="N" -a "$round" -le 100 ]; then 253 echo " The Job $jobnum is OK!!" >>$LOG_FILE 254 else 255 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Job $jobnum was terminated !" >>$LOG_FILE 256 fi 257 else 258 if [ "$failure" -eq 0 -a "$broken"="N" -a "$round" -eq 0 ]; then 259 echo " The Job $jobnum is OK!!" >>$LOG_FILE 260 else 261 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The Job $jobnum was terminated !" >>$LOG_FILE 262 fi 263 fi 264 done 265 266 # 267 # Check the oracle session. 268 # 269 sqlplus -s dxh/dxh <> $LOG_FILE 270 select 'The Total sessions number is '||count(*)||'.' from v$session ; 271 select 'table mt: ' ,count(*) from t_dxh_mt where msgresult='SUCCESS'; 272 select 'table detect:' ,count(*) from t_dxh_opendetect where msgresult='SUCCESS'; 273 exit 274 ! 275 276 277 # 278 # Check oracle table for user information sync. 279 # 280 echo >>$LOG_FILE 281 echo >>$LOG_FILE 282 echo "*****************************************oracle 同步數據檢查信息輸出*****************************************">>$LOG_FILE 283 sqlplus -s ccmdxh/ccm@ccmdxh < $TEMP_PATH/jiya_info 284 select 'NUM_P630' flag,count(*) from T_DXH_USERINFO ; 285 select 'NUM_p570' flag,count(*) from T_DXH_USERINFO2 ; 286 ! 287 cat $TEMP_PATH/jiya_info >>$LOG_FILE 288 cat $TEMP_PATH/jiya_info |grep NUM_| grep -v COUNT | while read line 289 do 290 zhuji=`echo $line |awk '{print $1}'` 291 user_num=`echo $line |awk '{print $2}'` 292 if [ "$user_num" -le 2000 ]; then 293 echo " The node $zhuji users sync is OK!! " >>$LOG_FILE 294 else 295 echo "LOG-Warnning: The node $zhuji users sync terminated abnormally.Please check !!" >>$LOG_FILE 296 fi 297 done 298 299 # 300 # Check oracle alert log. 301 # 302 echo >>$LOG_FILE 303 echo >>$LOG_FILE 304 echo "***************************************** check oracle alert log *****************************************">>$LOG_FILE 305 tail -300 $ORACLE_BASE/admin/ora92/bdump/alert_ora92.log | grep -v Thread | 306 grep -v Current | grep -v "`date +'%a %h'`" | grep -v ":[0-9][0-9]:" >>$LOG_FILE 307 308 # 309 # Check system error report. 310 # 311 echo >>$LOG_FILE 312 echo >>$LOG_FILE 313 echo "***************************************** check system err *****************************************">>$LOG_FILE 314 errpt | head -10 >>$LOG_FILE 315 day=`date +%D |awk -F "/" '{print $1$2}'` 316 errpt | awk '{print $2}' | grep ^$day 317 if [ $? -eq 0 ] ; then 318 echo "LOG-Warnning: `date +%Y'-'%m'-'%d' '%H':'%M':'%S`,The system has found a error today.Please check the error report." >>$LOG_FILE 319 else 320 echo >>$LOG_FILE 321 echo " There is no system error report today.System is OK!!" >>$LOG_FILE 322 fi 323 324 # 325 # Check HACMP. 326 # 327 echo >>$LOG_FILE 328 echo >>$LOG_FILE 329 echo "***************************************** check HACMP status *****************************************">>$LOG_FILE 330 /usr/es/sbin/cluster/clstat -o > $TEMP_PATH/ha_info 331 lssrc -g cluster >> $TEMP_PATH/ha_info 332 cat $TEMP_PATH/ha_info >>$LOG_FILE 333 echo >>$LOG_FILE 334 cat $TEMP_PATH/ha_info| grep "Node:" |awk -F ':' '{print $2,$3}' | awk '{print $1,$3}' | while read line 335 do 336 node=$(echo $line | awk '{print $1}')"'s" 337 echo $line |grep UP$ >/dev/null 338 if [ "$?" -eq 0 ]; then 339 echo " The node $node is OK!!" >>$LOG_FILE 340 else 341 echo "`date +%Y'-'%m'-'%d' '%H':'%M':'%S`,LOG-Warnning: The node $node status is DOWN ,it was terminated ." >>$LOG_FILE 342 fi 343 done
Copyright © Linux教程網 All Rights Reserved