541 lines
16 KiB
Bash
541 lines
16 KiB
Bash
#! /sbin/sh
|
|
##################################################################
|
|
# This script should not be executed after boot becaue it
|
|
# may report system reboot and cause confusion.
|
|
#
|
|
# availmon always collects data irrespective of whether
|
|
# the data is sent outside or not.
|
|
#
|
|
# availmon's data is always logged into system support
|
|
# database. Please refer to the man page of ESP
|
|
# for more information.
|
|
#
|
|
# This script identifies any Controlled shutdowns,
|
|
# panics or other unscheduled shutdowns.
|
|
##################################################################
|
|
|
|
|
|
COMMAND=$0
|
|
AMRVERSIONNUM=2.1
|
|
AVAILDIR=/var/adm/avail
|
|
SAVEDIR=$AVAILDIR/.save
|
|
USRETCDIR=/usr/etc
|
|
PLATFORM=`uname -m`
|
|
NOSTR="NULL"
|
|
DBQUERY="/usr/sbin/espquery"
|
|
SQLSTMT="select option_default from tool where tool_name='AVAILMON'"
|
|
SSDB=ssdb
|
|
FLAG=0
|
|
SSLOGGER=/usr/sbin/esplogger
|
|
AMCONVERT=/usr/etc/amconvert
|
|
|
|
##################################################################
|
|
# Binaries and their location
|
|
##################################################################
|
|
|
|
AMTIME1970=$USRETCDIR/amtime1970
|
|
AMSYSLOG=$USRETCDIR/amsyslog
|
|
AMTICKERD=$USRETCDIR/eventmond
|
|
TICKFILE=$SAVEDIR/lasttick
|
|
|
|
##################################################################
|
|
# Get crash directory
|
|
##################################################################
|
|
|
|
CRASHDIR=""
|
|
OPTIONS=`cat /etc/config/savecore.options 2>/dev/null`
|
|
if [ "$OPTIONS" != "" ] ; then
|
|
for DIR in $OPTIONS ; do
|
|
if [ -d "$DIR" ] ; then
|
|
CRASHDIR=$DIR
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ "$CRASHDIR" = "" ] ; then
|
|
CRASHDIR=/var/adm/crash
|
|
fi
|
|
|
|
################################################################
|
|
# For old systems with old savecore script that creates crashlog
|
|
# instead of analysis.N files.
|
|
################################################################
|
|
|
|
CRASHLOGFILE=$CRASHDIR/crashlog
|
|
|
|
##################################################################
|
|
# Get the SYSLOG file and related variables
|
|
##################################################################
|
|
|
|
# First, get the configfile.
|
|
|
|
SYSLOGDCONF=""
|
|
SYSLOGDOPTS="df:m:p:"
|
|
OPTIONS=`cat /etc/config/syslogd.options 2>/dev/null`
|
|
if [ "$OPTIONS" != "" ] ; then
|
|
while getopts $SYSLOGDOPTS OPTNAME $OPTIONS 2>/dev/null
|
|
do
|
|
if [ "$OPTNAME" = "f" ] ; then
|
|
if [ -f "$OPTARG" ] ; then
|
|
SYSLOGDCONF="$OPTARG"
|
|
break
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ "$SYSLOGDCONF" = "" ] ; then
|
|
SYSLOGDCONF=/etc/syslog.conf
|
|
fi
|
|
|
|
if [ -f "$SYSLOGDCONF" ] ; then
|
|
SYSLOGFILE=`cat $SYSLOGDCONF | grep -v "^#" | grep "\*.crit" | awk '{print $NF}' | sort -u | head -1 2>/dev/null`
|
|
if [ "z$SYSLOGFILE" = "z" -o ! -f "$SYSLOGFILE" ] ; then
|
|
SYSLOGFILE=/var/adm/SYSLOG
|
|
fi
|
|
else
|
|
SYSLOGFILE=/var/adm/SYSLOG
|
|
fi
|
|
|
|
# Use SYSLOGFILE to generate its rotated version
|
|
|
|
OSYSLOGFILE=`dirname $SYSLOGFILE`/o`basename $SYSLOGFILE`
|
|
LASTSYSLOG=$SAVEDIR/lastsyslog
|
|
AMSYSLOGFILE=$CRASHDIR/syslog
|
|
|
|
##################################################################
|
|
# Other variables/files
|
|
##################################################################
|
|
|
|
BOUND=-1
|
|
PREVSTARTFILE=$SAVEDIR/prevstart
|
|
SCRATCHFILE=$AVAILDIR/init.scratch
|
|
EVENTFILE=$SAVEDIR/event
|
|
SUEVENTFILE=$SAVEDIR/suevent
|
|
EXTRASUEVENT=0
|
|
TMPSUREPORT=$CRASHDIR/suavailreport.$$
|
|
TMPREPORT=$CRASHDIR/availreport.$$
|
|
CONFIGCHANGEFILE=$SAVEDIR/configchange
|
|
CM_STATUS=0
|
|
HWCHANGED=0
|
|
SWCHANGED=0
|
|
|
|
##################################################################
|
|
# Checks for HINV / VERSIONS Changes on the machine
|
|
##################################################################
|
|
|
|
checkconfigchange()
|
|
{
|
|
|
|
# Check the exit status of configmon. If it failed, there is no
|
|
# need to check for configuration changes...
|
|
|
|
if [ $CM_STATUS -gt 0 ] ; then
|
|
return 0
|
|
fi
|
|
|
|
if [ -f "$DBQUERY" ] ; then
|
|
LASTCONFIGCHANGE=`$DBQUERY -t -s "select cm_event.time,cm_event.type from system_info,cm_event where cm_event.sys_id=system_info.sys_id and system_info.local=1 and system_info.active=1 order by cm_event.time" $SSDB 2>/dev/null | tail -1 2>/dev/null`
|
|
LASTCONFIGCHANGE=${LASTCONFIGCHANGE:='|0|0|'}
|
|
LASTCHANGEDATE=`echo $LASTCONFIGCHANGE | cut -d'|' -f2`
|
|
LASTCHANGETYPE=`echo $LASTCONFIGCHANGE | cut -d'|' -f3`
|
|
else
|
|
return 0
|
|
fi
|
|
|
|
if [ -f "$CONFIGCHANGEFILE" ] ; then
|
|
PREVCHANGEDATE=`cat $CONFIGCHANGEFILE`
|
|
if [ "$PREVCHANGEDATE" != "$LASTCHANGEDATE" ] ; then
|
|
TMPTYPE=128
|
|
TMPHINVCOUNT=1
|
|
while [ "$LASTCHANGETYPE" -gt 0 ]
|
|
do
|
|
if [ "$LASTCHANGETYPE" -ge "$TMPTYPE" ] ; then
|
|
LASTCHANGETYPE=`expr $LASTCHANGETYPE - $TMPTYPE`
|
|
case $TMPHINVCOUNT in
|
|
3 | 4)
|
|
SWCHANGED=1
|
|
;;
|
|
5 | 6)
|
|
HWCHANGED=1
|
|
;;
|
|
esac
|
|
fi
|
|
TMPTYPE=`expr $TMPTYPE / 2`
|
|
TMPHINVCOUNT=`expr $TMPHINVCOUNT + 1`
|
|
done
|
|
fi
|
|
fi
|
|
|
|
echo "$LASTCHANGEDATE" > $CONFIGCHANGEFILE
|
|
|
|
}
|
|
|
|
##################################################################
|
|
# Runs Configuration Monitor to collect Configuration data
|
|
##################################################################
|
|
|
|
run_configmon()
|
|
{
|
|
CONFIGNUM=0
|
|
CONFIGMAXLOOPS=5
|
|
|
|
if chkconfig windowsystem ; then
|
|
while true; do
|
|
sleep 3
|
|
if [ `ps -eo "comm" | grep Xsgi | wc -l` -eq 1 ] ; then
|
|
break
|
|
fi
|
|
if test $CONFIGNUM -eq $CONFIGMAXLOOPS ; then
|
|
break;
|
|
else
|
|
CONFIGNUM=`expr $CONFIGNUM + 1`
|
|
fi
|
|
done
|
|
fi
|
|
#
|
|
# It's hard to get complete config information so close to system
|
|
# start-up. We'll sleep for one minute, just to try to make the odds
|
|
# better
|
|
#
|
|
sleep 60
|
|
/usr/sbin/configmon -u > /dev/null
|
|
CM_STATUS=$?
|
|
if [ $CM_STATUS -gt 0 ] ; then
|
|
/usr/sbin/esplogger -s 0x00200107 -p syslog.warning \
|
|
-m "ConfigMon UPDATE FAILED" ;
|
|
fi
|
|
}
|
|
|
|
##################################################################
|
|
# Checks for a single-user shutdown
|
|
##################################################################
|
|
|
|
checksingleuser()
|
|
{
|
|
|
|
EXTRASUEVENT=0
|
|
if [ -f $SUEVENTFILE ] ; then
|
|
#
|
|
# Check boot-time. If boot-time is > single-user event
|
|
# time, then there is another event that we need to
|
|
# capture.
|
|
#
|
|
BOOTTIME=`$AMTIME1970 -i | cut -f2 -d'|'`
|
|
SUEVENTTIME=`cat $SUEVENTFILE | cut -f2 -d'|'`
|
|
|
|
if [ $BOOTTIME -ge $SUEVENTTIME ] ; then
|
|
EXTRASUEVENT=1
|
|
SUEVENTCODE=`cat $SUEVENTFILE | cut -f1 -d'|'`
|
|
fi
|
|
fi
|
|
}
|
|
|
|
|
|
##################################################################
|
|
# Start main function
|
|
##################################################################
|
|
|
|
#
|
|
# Startup Configuration Monitor to gather configuration information
|
|
#
|
|
|
|
run_configmon
|
|
|
|
#
|
|
# Initialization
|
|
# Check for proper installation and presence of all required
|
|
# files.
|
|
#
|
|
|
|
|
|
if [ ! -d $AVAILDIR ] ; then
|
|
echo "\n$COMMAND: Error: Cannot find directory $AVAILDIR"
|
|
echo "Please try re-installing OS\n"
|
|
logger -t availmon -p err -i cannot find $AVAILDIR
|
|
exit
|
|
elif [ ! -x $AMTICKERD -o ! -x $AMSYSLOG -o ! -x $AMTIME1970 -o ! -x "$DBQUERY" ] ; then
|
|
echo "\n$COMMAND: Error: Not all required executable files are present"
|
|
echo "Please try re-installing OS\n"
|
|
logger -t availmon -p err -i Executable files missing
|
|
exit
|
|
else
|
|
# Startup-code
|
|
FIRSTSTART=0
|
|
|
|
SINCE1970=`$AMTIME1970`
|
|
|
|
# we dont need install.sh script anymore
|
|
if [ -f $AVAILDIR/install.sh ] ; then
|
|
rm -f $AVAILDIR/install.sh
|
|
fi
|
|
|
|
if [ ! -d $SAVEDIR ] ; then
|
|
#
|
|
# We will consider this case as the first time start of availmon
|
|
# after "clean" installation
|
|
#
|
|
|
|
# Create .save directory if first time install
|
|
#
|
|
FIRSTSTART=1
|
|
mkdir $SAVEDIR
|
|
if [ $? -ne 0 ] ; then
|
|
echo "$COMMAND: Error: unable to create $SAVEDIR directory"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
#
|
|
# Set PREVSTART first. All availmon tools need it
|
|
#
|
|
|
|
if [ -f "$PREVSTARTFILE" ] ; then
|
|
PREVSTART=`cat $PREVSTARTFILE | cut -d'|' -f2`
|
|
else
|
|
if [ -f "$AVAILDIR/availlog" ] ; then
|
|
PREVSTART=`cat $AVAILDIR/availlog | tail -1 | cut -d'|' -f2`
|
|
else
|
|
PREVSTART=-1
|
|
fi
|
|
echo "$PREVSTART" > $PREVSTARTFILE
|
|
fi
|
|
|
|
# we dont need availlog anymore
|
|
rm -f $AVAILDIR/availlog
|
|
|
|
|
|
#
|
|
# get the lasttick value
|
|
#
|
|
|
|
if [ -f "$TICKFILE" ] ; then
|
|
LASTTICK=`cat $TICKFILE 2>/dev/null`
|
|
else
|
|
if [ -f "$AVAILDIR/lasttick" ] ; then
|
|
LASTTICK=`cat $AVAILDIR/lasttick 2>/dev/null`
|
|
rm -f $AVAILDIR/lasttick
|
|
else
|
|
LASTTICK=-1
|
|
fi
|
|
echo "$LASTTICK" > $TICKFILE
|
|
fi
|
|
|
|
#
|
|
# Check Bounds
|
|
#
|
|
|
|
if [ -f $CRASHDIR/bounds ] ; then
|
|
if [ -f $SAVEDIR/bounds ] ; then
|
|
diff $SAVEDIR/bounds $CRASHDIR/bounds > /dev/null
|
|
if [ $? -ne 0 ] ; then
|
|
BOUND=`cat $SAVEDIR/bounds`
|
|
cp $CRASHDIR/bounds $SAVEDIR/bounds
|
|
SUMMARYFILE=$CRASHDIR/summary.$BOUND
|
|
FRUFILE=$CRASHDIR/fru.$BOUND
|
|
AMSYSLOGFILE=$CRASHDIR/syslog.$BOUND
|
|
fi
|
|
else
|
|
cp $CRASHDIR/bounds $SAVEDIR/bounds
|
|
fi
|
|
else
|
|
echo "0" > $SAVEDIR/bounds
|
|
fi
|
|
|
|
|
|
##################################################################
|
|
# Get the configuration parameters from SSDB and convert if needed
|
|
##################################################################
|
|
|
|
SSDBCONFIGURED=`$DBQUERY -t -s "select count(*) from tool where tool_name='AVAILMON'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
|
|
|
|
if [ "$SSDBCONFIGURED" -eq "0" ] ; then
|
|
# There is no availmon configuration in the esp database
|
|
if [ -d $AVAILDIR/config ] ; then
|
|
# But there is a config directory
|
|
# so we assume that it contain valid availmon configuration
|
|
# we will try to import configuration from this directory
|
|
$AMCONVERT 2>/dev/null
|
|
|
|
if [ "$?" -eq "0" ] ; then
|
|
# we successfully "configured" availmon
|
|
# let's remove old config directory
|
|
rm -rf $AVAILDIR/config
|
|
rm -rf $SAVEDIR/autoemail
|
|
rm -rf $SAVEDIR/autoemail.list
|
|
else
|
|
# amconvert failed
|
|
echo "\n$COMMAND: Error: amconvert failed. You might need to execute amconfig to setup availmon configuration"
|
|
logger -t availmon -p err -i amconvert failed. You might need to execute amconfig to setup availmon configuration
|
|
fi
|
|
else
|
|
# there is no config directory either
|
|
echo "\n$COMMAND: Error: No availmon configuration has been found. You might need to execute amconfig to setup availmon configuration"
|
|
logger -t availmon -p err -i "No availmon configuration has been found. You might need to execute amconfig to setup availmon configuration"
|
|
fi
|
|
else
|
|
# esp database already contains configuration
|
|
# we are not going to change it
|
|
# we need to remove "config directory" if any
|
|
rm -rf $AVAILDIR/config
|
|
rm -rf $SAVEDIR/autoemail
|
|
rm -rf $SAVEDIR/autoemail.list
|
|
fi
|
|
|
|
STATUSINTERVAL=`$DBQUERY -t -s "$SQLSTMT and tool_option='statusinterval'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
|
|
TICKERD=`$DBQUERY -t -s "$SQLSTMT and tool_option='tickerd'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
|
|
TICKDURATION=`$DBQUERY -t -s "$SQLSTMT and tool_option='tickduration'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
|
|
SHUTDOWNRSN=`$DBQUERY -t -s "$SQLSTMT and tool_option='shutdownreason'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
|
|
|
|
STATUSINTERVAL=${STATUSINTERVAL:=0}
|
|
STATUSINTERVALHRS=`expr $STATUSINTERVAL \* 24`
|
|
TICKERD=${TICKERD:=0}
|
|
SHUTDOWNRSN=${SHUTDOWNRSN:=0}
|
|
TICKDURATION=${TICKDURATION:=300}
|
|
LASTTICK=${LASTTICK:=-1}
|
|
|
|
#
|
|
# start ticker if needed
|
|
#
|
|
|
|
if [ "$TICKERD" -eq 1 ] ; then
|
|
$AMTICKERD -a on -j $TICKDURATION -e $STATUSINTERVALHRS >/dev/null 2>&1
|
|
else
|
|
rm -f $TICKFILE
|
|
fi
|
|
|
|
#
|
|
# Set shutdown reason
|
|
#
|
|
|
|
echo "$SHUTDOWNRSN" > $SAVEDIR/shutdownreason
|
|
|
|
if [ $FIRSTSTART -eq 0 ] ; then
|
|
|
|
#
|
|
# Identify EVENT
|
|
#
|
|
|
|
checksingleuser
|
|
|
|
SUMMARYLINE="";
|
|
if [ $BOUND -ge 0 ] ; then
|
|
if [ -f $SUMMARYFILE ] ; then
|
|
if grep 'PANIC STRING' $SUMMARYFILE | grep NMI > /dev/null ; then
|
|
EVENTCODE=2097156
|
|
elif [ -f $FRUFILE ] ; then
|
|
cat $FRUFILE >> $SUMMARYFILE
|
|
if grep -i 'FRU ANALY' $FRUFILE > /dev/null ; then
|
|
if grep 'FRU ANALYZER' $FRUFILE | grep -i 'No error' > /dev/null ; then
|
|
EVENTCODE=2097157
|
|
elif grep -i 'SOFTWARE' $FRUFILE > /dev/null ; then
|
|
EVENTCODE=2097157
|
|
elif grep 'Inconclusive hardware error state' $FRUFILE > /dev/null ; then
|
|
EVENTCODE=2097168
|
|
else
|
|
EVENTCODE=2097167
|
|
fi
|
|
else
|
|
EVENTCODE=2097168
|
|
fi
|
|
else
|
|
EVENTCODE=2097168
|
|
fi
|
|
EVENTTIME=`awk '/CRASH TIME/ {print $3}' $SUMMARYFILE`
|
|
SUMMARYLINE=`grep 'PANIC STRING' $SUMMARYFILE`
|
|
if [ $EVENTCODE -eq 2097167 ] ; then
|
|
SUMMARYLINE="`tail +2 $SUMMARYFILE`"
|
|
fi
|
|
DIAGFILE=""
|
|
if [ -f $CRASHDIR/analysis.$BOUND ] ; then
|
|
DIAGFILE=$CRASHDIR/analysis.$BOUND
|
|
DIAGTYPE="ICRASH"
|
|
fi
|
|
|
|
NOCHARS=`echo $SUMMARYLINE | wc -c`
|
|
if [ "$NOCHARS" -gt 250 ] ; then
|
|
echo "$SUMMARYLINE" > $CRASHDIR/availsummary.$BOUND
|
|
SUMMARYLINE="$CRASHDIR/availsummary.$BOUND"
|
|
FLAG=1;
|
|
fi
|
|
|
|
elif [ -f $CRASHLOGFILE.$BOUND ] ; then
|
|
EVENTDATE=`awk '/crash time/ {print $3, $4, $5, $6, $7}' $CRASHLOGFILE.$BOUND`
|
|
EVENTTIME=`$AMTIME1970 -t $EVENTDATE`
|
|
EVENTCODE=2097168
|
|
DIAGTYPE=ICRASH
|
|
DIAGFILE=$CRASHLOGFILE.$BOUND
|
|
else
|
|
EVENTTIME=`expr $SINCE1970 - 60`
|
|
EVENTCODE=2097168
|
|
DIAGFILE="$NOSTR"
|
|
DIAGTYPE="$NOSTR"
|
|
fi
|
|
|
|
if [ ! -f $AMSYSLOGFILE ] ; then
|
|
$AMSYSLOG -S $SYSLOGFILE -O $OSYSLOGFILE >> $AMSYSLOGFILE
|
|
fi
|
|
|
|
elif [ -f $EVENTFILE ] ; then
|
|
EVENTCODE=`cat $EVENTFILE | cut -d'|' -f1`
|
|
EVENTTIME=`cat $EVENTFILE | cut -d'|' -f2`
|
|
AMSYSLOGFILE=""
|
|
else
|
|
DIAGTYPE="$NOSTR"
|
|
DIAGFILE="$NOSTR"
|
|
SUMMARYLINE="$NOSTR"
|
|
$AMSYSLOG -S $SYSLOGFILE -O $OSYSLOGFILE > $AMSYSLOGFILE
|
|
EVENTCODE=2097166
|
|
EVENTTIME=-1
|
|
fi
|
|
|
|
#
|
|
# At this point, we have determined all possible events.
|
|
#
|
|
# LASTTICK will point to the last time eventmond wrote to file
|
|
# SUEVENTCODE & SUEVENTTIME will point to single user event
|
|
# EVENTCODE & EVENTTIME will point to multi-user event
|
|
#
|
|
# Now is the time to log the event to SSDB
|
|
#
|
|
|
|
if [ "$EXTRASUEVENT" -eq 1 ] ; then
|
|
echo -n "$SUEVENTTIME,$LASTTICK,$PREVSTART" >> $TMPSUREPORT
|
|
LASTTICK=-1
|
|
if [ "$EVENTTIME" -eq -1 ] ; then
|
|
SUSTARTTIME=`expr $SINCE1970 - 60`
|
|
else
|
|
SUSTARTTIME=$EVENTTIME
|
|
fi
|
|
PREVSTART=$SUSTARTTIME
|
|
|
|
echo -n ",$SUSTARTTIME,$STATUSINTERVAL,$BOUND,$NOSTR,$NOSTR" >> $TMPSUREPORT
|
|
TMPFLAG=`expr $FLAG + 2`
|
|
FLAG=`expr $FLAG + 4`
|
|
echo -n ",$NOSTR,$NOSTR,$NOSTR,1,$TMPFLAG,$NOSTR,$NOSTR" >> $TMPSUREPORT
|
|
$SSLOGGER -s $SUEVENTCODE -f $TMPSUREPORT 2>/dev/null
|
|
RETCODE=$?
|
|
if [ $RETCODE != 0 ] ; then
|
|
logger -t availmon -p err -i "Unable to log availmon event ($SUEVENTCODE) to ESP ($RETCODE)"
|
|
fi
|
|
fi
|
|
|
|
checkconfigchange
|
|
|
|
echo -n "$EVENTTIME,$LASTTICK,$PREVSTART">> $TMPREPORT
|
|
echo -n ",$SINCE1970,$STATUSINTERVAL,$BOUND," >> $TMPREPORT
|
|
echo -n "'$DIAGTYPE','$DIAGFILE','$AMSYSLOGFILE'," >> $TMPREPORT
|
|
echo -n "$HWCHANGED,$SWCHANGED,1,$FLAG,$NOSTR," >> $TMPREPORT
|
|
echo $SUMMARYLINE | sed -e "s/%/%%/g" -e "s/\\\\/\\\\\\\\/g" -e "s/\'/\\\'/g" -e 's/\"/\\\"/g' -e "s/^/\'/" -e "s/$/\'/" >> $TMPREPORT
|
|
$SSLOGGER -s $EVENTCODE -f $TMPREPORT 2>/dev/null
|
|
RETCODE=$?
|
|
if [ $RETCODE != 0 ] ; then
|
|
logger -t availmon -p err -i "Unable to log availmon event ($EVENTCODE) to ESP ($RETCODE)"
|
|
fi
|
|
fi
|
|
|
|
tail -1 $SYSLOGFILE > $LASTSYSLOG
|
|
echo $SINCE1970 > $PREVSTARTFILE
|
|
rm -f $SCRATCHFILE $TMPREPORT $TMPSUREPORT $SUMMARYFILE $EVENTFILE $SUEVENTFILE
|
|
fi
|