1
0
Files
irix-657m-src/eoe/cmd/sss/availmon/scripts/amstart
2022-09-29 17:59:04 +03:00

541 lines
16 KiB
Bash

#! /sbin/sh
##################################################################
# This script should not be executed after boot becaue it
# may report system reboot and cause confusion.
#
# availmon always collects data irrespective of whether
# the data is sent outside or not.
#
# availmon's data is always logged into system support
# database. Please refer to the man page of ESP
# for more information.
#
# This script identifies any Controlled shutdowns,
# panics or other unscheduled shutdowns.
##################################################################
COMMAND=$0
AMRVERSIONNUM=2.1
AVAILDIR=/var/adm/avail
SAVEDIR=$AVAILDIR/.save
USRETCDIR=/usr/etc
PLATFORM=`uname -m`
NOSTR="NULL"
DBQUERY="/usr/sbin/espquery"
SQLSTMT="select option_default from tool where tool_name='AVAILMON'"
SSDB=ssdb
FLAG=0
SSLOGGER=/usr/sbin/esplogger
AMCONVERT=/usr/etc/amconvert
##################################################################
# Binaries and their location
##################################################################
AMTIME1970=$USRETCDIR/amtime1970
AMSYSLOG=$USRETCDIR/amsyslog
AMTICKERD=$USRETCDIR/eventmond
TICKFILE=$SAVEDIR/lasttick
##################################################################
# Get crash directory
##################################################################
CRASHDIR=""
OPTIONS=`cat /etc/config/savecore.options 2>/dev/null`
if [ "$OPTIONS" != "" ] ; then
for DIR in $OPTIONS ; do
if [ -d "$DIR" ] ; then
CRASHDIR=$DIR
fi
done
fi
if [ "$CRASHDIR" = "" ] ; then
CRASHDIR=/var/adm/crash
fi
################################################################
# For old systems with old savecore script that creates crashlog
# instead of analysis.N files.
################################################################
CRASHLOGFILE=$CRASHDIR/crashlog
##################################################################
# Get the SYSLOG file and related variables
##################################################################
# First, get the configfile.
SYSLOGDCONF=""
SYSLOGDOPTS="df:m:p:"
OPTIONS=`cat /etc/config/syslogd.options 2>/dev/null`
if [ "$OPTIONS" != "" ] ; then
while getopts $SYSLOGDOPTS OPTNAME $OPTIONS 2>/dev/null
do
if [ "$OPTNAME" = "f" ] ; then
if [ -f "$OPTARG" ] ; then
SYSLOGDCONF="$OPTARG"
break
fi
fi
done
fi
if [ "$SYSLOGDCONF" = "" ] ; then
SYSLOGDCONF=/etc/syslog.conf
fi
if [ -f "$SYSLOGDCONF" ] ; then
SYSLOGFILE=`cat $SYSLOGDCONF | grep -v "^#" | grep "\*.crit" | awk '{print $NF}' | sort -u | head -1 2>/dev/null`
if [ "z$SYSLOGFILE" = "z" -o ! -f "$SYSLOGFILE" ] ; then
SYSLOGFILE=/var/adm/SYSLOG
fi
else
SYSLOGFILE=/var/adm/SYSLOG
fi
# Use SYSLOGFILE to generate its rotated version
OSYSLOGFILE=`dirname $SYSLOGFILE`/o`basename $SYSLOGFILE`
LASTSYSLOG=$SAVEDIR/lastsyslog
AMSYSLOGFILE=$CRASHDIR/syslog
##################################################################
# Other variables/files
##################################################################
BOUND=-1
PREVSTARTFILE=$SAVEDIR/prevstart
SCRATCHFILE=$AVAILDIR/init.scratch
EVENTFILE=$SAVEDIR/event
SUEVENTFILE=$SAVEDIR/suevent
EXTRASUEVENT=0
TMPSUREPORT=$CRASHDIR/suavailreport.$$
TMPREPORT=$CRASHDIR/availreport.$$
CONFIGCHANGEFILE=$SAVEDIR/configchange
CM_STATUS=0
HWCHANGED=0
SWCHANGED=0
##################################################################
# Checks for HINV / VERSIONS Changes on the machine
##################################################################
checkconfigchange()
{
# Check the exit status of configmon. If it failed, there is no
# need to check for configuration changes...
if [ $CM_STATUS -gt 0 ] ; then
return 0
fi
if [ -f "$DBQUERY" ] ; then
LASTCONFIGCHANGE=`$DBQUERY -t -s "select cm_event.time,cm_event.type from system_info,cm_event where cm_event.sys_id=system_info.sys_id and system_info.local=1 and system_info.active=1 order by cm_event.time" $SSDB 2>/dev/null | tail -1 2>/dev/null`
LASTCONFIGCHANGE=${LASTCONFIGCHANGE:='|0|0|'}
LASTCHANGEDATE=`echo $LASTCONFIGCHANGE | cut -d'|' -f2`
LASTCHANGETYPE=`echo $LASTCONFIGCHANGE | cut -d'|' -f3`
else
return 0
fi
if [ -f "$CONFIGCHANGEFILE" ] ; then
PREVCHANGEDATE=`cat $CONFIGCHANGEFILE`
if [ "$PREVCHANGEDATE" != "$LASTCHANGEDATE" ] ; then
TMPTYPE=128
TMPHINVCOUNT=1
while [ "$LASTCHANGETYPE" -gt 0 ]
do
if [ "$LASTCHANGETYPE" -ge "$TMPTYPE" ] ; then
LASTCHANGETYPE=`expr $LASTCHANGETYPE - $TMPTYPE`
case $TMPHINVCOUNT in
3 | 4)
SWCHANGED=1
;;
5 | 6)
HWCHANGED=1
;;
esac
fi
TMPTYPE=`expr $TMPTYPE / 2`
TMPHINVCOUNT=`expr $TMPHINVCOUNT + 1`
done
fi
fi
echo "$LASTCHANGEDATE" > $CONFIGCHANGEFILE
}
##################################################################
# Runs Configuration Monitor to collect Configuration data
##################################################################
run_configmon()
{
CONFIGNUM=0
CONFIGMAXLOOPS=5
if chkconfig windowsystem ; then
while true; do
sleep 3
if [ `ps -eo "comm" | grep Xsgi | wc -l` -eq 1 ] ; then
break
fi
if test $CONFIGNUM -eq $CONFIGMAXLOOPS ; then
break;
else
CONFIGNUM=`expr $CONFIGNUM + 1`
fi
done
fi
#
# It's hard to get complete config information so close to system
# start-up. We'll sleep for one minute, just to try to make the odds
# better
#
sleep 60
/usr/sbin/configmon -u > /dev/null
CM_STATUS=$?
if [ $CM_STATUS -gt 0 ] ; then
/usr/sbin/esplogger -s 0x00200107 -p syslog.warning \
-m "ConfigMon UPDATE FAILED" ;
fi
}
##################################################################
# Checks for a single-user shutdown
##################################################################
checksingleuser()
{
EXTRASUEVENT=0
if [ -f $SUEVENTFILE ] ; then
#
# Check boot-time. If boot-time is > single-user event
# time, then there is another event that we need to
# capture.
#
BOOTTIME=`$AMTIME1970 -i | cut -f2 -d'|'`
SUEVENTTIME=`cat $SUEVENTFILE | cut -f2 -d'|'`
if [ $BOOTTIME -ge $SUEVENTTIME ] ; then
EXTRASUEVENT=1
SUEVENTCODE=`cat $SUEVENTFILE | cut -f1 -d'|'`
fi
fi
}
##################################################################
# Start main function
##################################################################
#
# Startup Configuration Monitor to gather configuration information
#
run_configmon
#
# Initialization
# Check for proper installation and presence of all required
# files.
#
if [ ! -d $AVAILDIR ] ; then
echo "\n$COMMAND: Error: Cannot find directory $AVAILDIR"
echo "Please try re-installing OS\n"
logger -t availmon -p err -i cannot find $AVAILDIR
exit
elif [ ! -x $AMTICKERD -o ! -x $AMSYSLOG -o ! -x $AMTIME1970 -o ! -x "$DBQUERY" ] ; then
echo "\n$COMMAND: Error: Not all required executable files are present"
echo "Please try re-installing OS\n"
logger -t availmon -p err -i Executable files missing
exit
else
# Startup-code
FIRSTSTART=0
SINCE1970=`$AMTIME1970`
# we dont need install.sh script anymore
if [ -f $AVAILDIR/install.sh ] ; then
rm -f $AVAILDIR/install.sh
fi
if [ ! -d $SAVEDIR ] ; then
#
# We will consider this case as the first time start of availmon
# after "clean" installation
#
# Create .save directory if first time install
#
FIRSTSTART=1
mkdir $SAVEDIR
if [ $? -ne 0 ] ; then
echo "$COMMAND: Error: unable to create $SAVEDIR directory"
exit 1
fi
fi
#
# Set PREVSTART first. All availmon tools need it
#
if [ -f "$PREVSTARTFILE" ] ; then
PREVSTART=`cat $PREVSTARTFILE | cut -d'|' -f2`
else
if [ -f "$AVAILDIR/availlog" ] ; then
PREVSTART=`cat $AVAILDIR/availlog | tail -1 | cut -d'|' -f2`
else
PREVSTART=-1
fi
echo "$PREVSTART" > $PREVSTARTFILE
fi
# we dont need availlog anymore
rm -f $AVAILDIR/availlog
#
# get the lasttick value
#
if [ -f "$TICKFILE" ] ; then
LASTTICK=`cat $TICKFILE 2>/dev/null`
else
if [ -f "$AVAILDIR/lasttick" ] ; then
LASTTICK=`cat $AVAILDIR/lasttick 2>/dev/null`
rm -f $AVAILDIR/lasttick
else
LASTTICK=-1
fi
echo "$LASTTICK" > $TICKFILE
fi
#
# Check Bounds
#
if [ -f $CRASHDIR/bounds ] ; then
if [ -f $SAVEDIR/bounds ] ; then
diff $SAVEDIR/bounds $CRASHDIR/bounds > /dev/null
if [ $? -ne 0 ] ; then
BOUND=`cat $SAVEDIR/bounds`
cp $CRASHDIR/bounds $SAVEDIR/bounds
SUMMARYFILE=$CRASHDIR/summary.$BOUND
FRUFILE=$CRASHDIR/fru.$BOUND
AMSYSLOGFILE=$CRASHDIR/syslog.$BOUND
fi
else
cp $CRASHDIR/bounds $SAVEDIR/bounds
fi
else
echo "0" > $SAVEDIR/bounds
fi
##################################################################
# Get the configuration parameters from SSDB and convert if needed
##################################################################
SSDBCONFIGURED=`$DBQUERY -t -s "select count(*) from tool where tool_name='AVAILMON'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
if [ "$SSDBCONFIGURED" -eq "0" ] ; then
# There is no availmon configuration in the esp database
if [ -d $AVAILDIR/config ] ; then
# But there is a config directory
# so we assume that it contain valid availmon configuration
# we will try to import configuration from this directory
$AMCONVERT 2>/dev/null
if [ "$?" -eq "0" ] ; then
# we successfully "configured" availmon
# let's remove old config directory
rm -rf $AVAILDIR/config
rm -rf $SAVEDIR/autoemail
rm -rf $SAVEDIR/autoemail.list
else
# amconvert failed
echo "\n$COMMAND: Error: amconvert failed. You might need to execute amconfig to setup availmon configuration"
logger -t availmon -p err -i amconvert failed. You might need to execute amconfig to setup availmon configuration
fi
else
# there is no config directory either
echo "\n$COMMAND: Error: No availmon configuration has been found. You might need to execute amconfig to setup availmon configuration"
logger -t availmon -p err -i "No availmon configuration has been found. You might need to execute amconfig to setup availmon configuration"
fi
else
# esp database already contains configuration
# we are not going to change it
# we need to remove "config directory" if any
rm -rf $AVAILDIR/config
rm -rf $SAVEDIR/autoemail
rm -rf $SAVEDIR/autoemail.list
fi
STATUSINTERVAL=`$DBQUERY -t -s "$SQLSTMT and tool_option='statusinterval'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
TICKERD=`$DBQUERY -t -s "$SQLSTMT and tool_option='tickerd'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
TICKDURATION=`$DBQUERY -t -s "$SQLSTMT and tool_option='tickduration'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
SHUTDOWNRSN=`$DBQUERY -t -s "$SQLSTMT and tool_option='shutdownreason'" $SSDB 2>/dev/null | cut -d'|' -f2 2>/dev/null`
STATUSINTERVAL=${STATUSINTERVAL:=0}
STATUSINTERVALHRS=`expr $STATUSINTERVAL \* 24`
TICKERD=${TICKERD:=0}
SHUTDOWNRSN=${SHUTDOWNRSN:=0}
TICKDURATION=${TICKDURATION:=300}
LASTTICK=${LASTTICK:=-1}
#
# start ticker if needed
#
if [ "$TICKERD" -eq 1 ] ; then
$AMTICKERD -a on -j $TICKDURATION -e $STATUSINTERVALHRS >/dev/null 2>&1
else
rm -f $TICKFILE
fi
#
# Set shutdown reason
#
echo "$SHUTDOWNRSN" > $SAVEDIR/shutdownreason
if [ $FIRSTSTART -eq 0 ] ; then
#
# Identify EVENT
#
checksingleuser
SUMMARYLINE="";
if [ $BOUND -ge 0 ] ; then
if [ -f $SUMMARYFILE ] ; then
if grep 'PANIC STRING' $SUMMARYFILE | grep NMI > /dev/null ; then
EVENTCODE=2097156
elif [ -f $FRUFILE ] ; then
cat $FRUFILE >> $SUMMARYFILE
if grep -i 'FRU ANALY' $FRUFILE > /dev/null ; then
if grep 'FRU ANALYZER' $FRUFILE | grep -i 'No error' > /dev/null ; then
EVENTCODE=2097157
elif grep -i 'SOFTWARE' $FRUFILE > /dev/null ; then
EVENTCODE=2097157
elif grep 'Inconclusive hardware error state' $FRUFILE > /dev/null ; then
EVENTCODE=2097168
else
EVENTCODE=2097167
fi
else
EVENTCODE=2097168
fi
else
EVENTCODE=2097168
fi
EVENTTIME=`awk '/CRASH TIME/ {print $3}' $SUMMARYFILE`
SUMMARYLINE=`grep 'PANIC STRING' $SUMMARYFILE`
if [ $EVENTCODE -eq 2097167 ] ; then
SUMMARYLINE="`tail +2 $SUMMARYFILE`"
fi
DIAGFILE=""
if [ -f $CRASHDIR/analysis.$BOUND ] ; then
DIAGFILE=$CRASHDIR/analysis.$BOUND
DIAGTYPE="ICRASH"
fi
NOCHARS=`echo $SUMMARYLINE | wc -c`
if [ "$NOCHARS" -gt 250 ] ; then
echo "$SUMMARYLINE" > $CRASHDIR/availsummary.$BOUND
SUMMARYLINE="$CRASHDIR/availsummary.$BOUND"
FLAG=1;
fi
elif [ -f $CRASHLOGFILE.$BOUND ] ; then
EVENTDATE=`awk '/crash time/ {print $3, $4, $5, $6, $7}' $CRASHLOGFILE.$BOUND`
EVENTTIME=`$AMTIME1970 -t $EVENTDATE`
EVENTCODE=2097168
DIAGTYPE=ICRASH
DIAGFILE=$CRASHLOGFILE.$BOUND
else
EVENTTIME=`expr $SINCE1970 - 60`
EVENTCODE=2097168
DIAGFILE="$NOSTR"
DIAGTYPE="$NOSTR"
fi
if [ ! -f $AMSYSLOGFILE ] ; then
$AMSYSLOG -S $SYSLOGFILE -O $OSYSLOGFILE >> $AMSYSLOGFILE
fi
elif [ -f $EVENTFILE ] ; then
EVENTCODE=`cat $EVENTFILE | cut -d'|' -f1`
EVENTTIME=`cat $EVENTFILE | cut -d'|' -f2`
AMSYSLOGFILE=""
else
DIAGTYPE="$NOSTR"
DIAGFILE="$NOSTR"
SUMMARYLINE="$NOSTR"
$AMSYSLOG -S $SYSLOGFILE -O $OSYSLOGFILE > $AMSYSLOGFILE
EVENTCODE=2097166
EVENTTIME=-1
fi
#
# At this point, we have determined all possible events.
#
# LASTTICK will point to the last time eventmond wrote to file
# SUEVENTCODE & SUEVENTTIME will point to single user event
# EVENTCODE & EVENTTIME will point to multi-user event
#
# Now is the time to log the event to SSDB
#
if [ "$EXTRASUEVENT" -eq 1 ] ; then
echo -n "$SUEVENTTIME,$LASTTICK,$PREVSTART" >> $TMPSUREPORT
LASTTICK=-1
if [ "$EVENTTIME" -eq -1 ] ; then
SUSTARTTIME=`expr $SINCE1970 - 60`
else
SUSTARTTIME=$EVENTTIME
fi
PREVSTART=$SUSTARTTIME
echo -n ",$SUSTARTTIME,$STATUSINTERVAL,$BOUND,$NOSTR,$NOSTR" >> $TMPSUREPORT
TMPFLAG=`expr $FLAG + 2`
FLAG=`expr $FLAG + 4`
echo -n ",$NOSTR,$NOSTR,$NOSTR,1,$TMPFLAG,$NOSTR,$NOSTR" >> $TMPSUREPORT
$SSLOGGER -s $SUEVENTCODE -f $TMPSUREPORT 2>/dev/null
RETCODE=$?
if [ $RETCODE != 0 ] ; then
logger -t availmon -p err -i "Unable to log availmon event ($SUEVENTCODE) to ESP ($RETCODE)"
fi
fi
checkconfigchange
echo -n "$EVENTTIME,$LASTTICK,$PREVSTART">> $TMPREPORT
echo -n ",$SINCE1970,$STATUSINTERVAL,$BOUND," >> $TMPREPORT
echo -n "'$DIAGTYPE','$DIAGFILE','$AMSYSLOGFILE'," >> $TMPREPORT
echo -n "$HWCHANGED,$SWCHANGED,1,$FLAG,$NOSTR," >> $TMPREPORT
echo $SUMMARYLINE | sed -e "s/%/%%/g" -e "s/\\\\/\\\\\\\\/g" -e "s/\'/\\\'/g" -e 's/\"/\\\"/g' -e "s/^/\'/" -e "s/$/\'/" >> $TMPREPORT
$SSLOGGER -s $EVENTCODE -f $TMPREPORT 2>/dev/null
RETCODE=$?
if [ $RETCODE != 0 ] ; then
logger -t availmon -p err -i "Unable to log availmon event ($EVENTCODE) to ESP ($RETCODE)"
fi
fi
tail -1 $SYSLOGFILE > $LASTSYSLOG
echo $SINCE1970 > $PREVSTARTFILE
rm -f $SCRATCHFILE $TMPREPORT $TMPSUREPORT $SUMMARYFILE $EVENTFILE $SUEVENTFILE
fi