#! /usr/bin/ksh 
# ===========================================================
#  arraymond                                                =
#                                                           =
#  HP disk array monitor daemon script                      =
#                                                           =
#  syntax: arraymond                                        =
#                                                           =
#  calls: shell script $DEAMON_DIR/gstat2430d               = 
#         shell script $DEAMON_DIR/gstat3586a               = 
#         shell script $DEAMON_DIR/arraymail                = 
#                                                           =
#  uses files:  $MISC_DIR/monitor.lock                      =
#               $MISC_DIR/hparray.devs                      =
#               $MISC_DIR/amond.mesg                        =
#               $MISC_DIR/monitor.mesg                      =
#               $MISC_DIR/arraymon.dest                     =
#                                                           =
#  utility programs                                         =
#               $DAEMON_DIR/aml                             =
#                                                           =
#  system utilities                                         =
#               $CMNDS_DIR/cat                              =
#               $CMNDS_DIR/cp                               =
#               $CMNDS_DIR/echo                             =
#               $CMNDS_DIR/expr                             =
#               $CMNDS_DIR/grep                             =
#               $CMNDS_DIR/mktemp                           =
#               $CMNDS_DIR/ll                               =
#               $CMNDS_DIR/ls                               =
#               $CMNDS_DIR/ps                               =
#               $CMNDS_DIR/rm                               =
#               $CMNDS_DIR/sleep                            =
#               $CMNDS_DIR/touch                            =
#               $CMNDS_DIR/wc                               =
#               $SYSCMDS_DIR/fuser                          =
#                                                           =
#                                                           =
#  (c)Copyright 1993, 1995 Hewlett-Packard Company          =
#     All rights reserved.                                  =
#                                                           =
# ===========================================================

hdr="@(#) $Header: arraymond,v 78.4 96/04/12 15:51:27 ssa Exp $"

.  /usr/lbin/hpC2400/arraymon.hdr

   if test $# -ne 0
   then
      ${CMNDS_DIR}/rm $DmsgFile 2> /dev/null
      host_name=$(${CMNDS_DIR}/uname -n)
      ${DAEMON_DIR}/aml -s $MonSet -m $MonUse -n $host_name -f $DmsgFile

      if [[ -s $DmsgFile ]]
      then
         ${CMNDS_DIR}/cat $DmsgFile > /dev/console 2> /dev/null 
         ${CMNDS_DIR}/rm $DmsgFile 2> /dev/null 
      fi
      exit 1 
   fi

   host_name=$(${CMNDS_DIR}/uname -n)
   m_send=$FALSE

#------------------------------------------------------------
# make sure no other instance of this monitor daemon exists
#------------------------------------------------------------

   fusrlst=$(${CMNDS_DIR}/mktemp -d ${TMP_DIR} -c)
   ${SYSCMDS_DIR}/fuser ${DAEMON_DIR}/arraymond > $fusrlst 2> /dev/null
   wordcnt=$(${CMNDS_DIR}/awk ' {print NF} ' $fusrlst)
   ${CMNDS_DIR}/rm $fusrlst 2> /dev/null

   if [ $wordcnt -gt 1 ]
   then 
      ${CMNDS_DIR}/rm $DmsgFile 2> /dev/null
      ${DAEMON_DIR}/aml -s $MonSet -m $NoExcl -n $host_name -f $DmsgFile

      if [[ -s $DmsgFile ]]
      then
         ${CMNDS_DIR}/cat $DmsgFile > /dev/console 2> /dev/null
         ${CMNDS_DIR}/rm $DmsgFile 2> /dev/null
      fi
      exit 1
   fi 

#------------------------------------------------------------------
# if arraymon.dest doesn't exist, create one with default entries
#------------------------------------------------------------------

  if [[ ! -s ${MISC_DIR}/arraymon.dest ]]
  then
    print "/dev/console" >> ${MISC_DIR}/arraymon.dest 2> /dev/null
    print "root@$host_name" >> ${MISC_DIR}/arraymon.dest 2> /dev/null
    ${CMNDS_DIR}/chmod 555 ${MISC_DIR}/arraymon.dest 2> /dev/null
  fi
  
#------------------------------------------------------------------
# if $ARRAY_LIST doesn't exist, create one 
#------------------------------------------------------------------

  if [[ ! -s $ARRAY_LIST ]]
  then
    ${CMNDS_DIR}/touch $ARRAY_LIST 2> /dev/null
    ${CMNDS_DIR}/chmod 666 $ARRAY_LIST 2> /dev/null
  fi
  
#------------------------------------------------------------
# we do not want to check immediately when we start.
#------------------------------------------------------------

   ${CMNDS_DIR}/sleep $SLEEP_SECS

#------------------------------------------------------------
#
# arr_file_absent: how many consecutive times array devices 
#        file has been empty or absent
#------------------------------------------------------------

   arr_file_absent=0

   lock_error=0
   open_fail=0
   lock_missng=0
   list_date="NODATE"

############################################################
#-----------------------------------------------------------
# function make_list compares the current date for the 
# $DEVS_LIST file with the previously saved date for the 
# $DEVS_LIST file. A mis-compare of the dates causes
# make_list to make a new copy of $ARRAY_LIST from 
# $DEVS_LIST. list_date is a global variable that has 
# the most recent date of $DEVS_LIST. make_list returns the 
# status of the attempted file lock.
#
# LOCKED_SLEEP_SECS: how long to sleep when we find the 
#                    array devices semaphore file locked
#                    (should be short)
#
# dmon_locked: how many consecutive times array devices 
#              semaphore file has been locked
#-----------------------------------------------------------

function make_list
{
FILE_LOCK="OK"
LOCK_FAIL="NOLOCK"
LOCK_ERR="ERREXIT"
OPEN_FAIL="NOPEN"
MIN_LOCK_COUNT=5
MAX_LOCK_COUNT=9
LOCKED_SLEEP_SECS=30
lock_status=0
send_mail=FALSE

prev_list_date=$1
cur_list_date=$(${CMNDS_DIR}/ll $DEVS_LIST | ${UXTOOL_DIR}/awk ' {print $6 $7 $8} ')

if [[ $prev_list_date != $cur_list_date ]]
then
  loop_on_lock=$TRUE

#------------------------------------------------------------
# is the array devices file locked?
#
# the program $TEST_LOCK checks to see if the array devices
# lock file ($DAEMON_LOCK) is locked. If it is, then the lock
# signifies that someone (probably the arrayscan utility) is 
# working on the $DEVS_LIST file of array devices; if the 
# array devices file is in use, go back to sleep 
#
# the arrayscan utility should only lock the array devices
# semaphore file for a very short time -- if it is found locked 
# more than 5 times in a row or if it does not exist, notify 
# the system admin 
#------------------------------------------------------------

  while (( loop_on_lock ))
  do

    ${TEST_LOCK} $DAEMON_LOCK |& 2> /dev/null
    read -p lock_result

    if [[ $lock_result = $FILE_LOCK ]]
    then

#------------------------------------------------------------
# daemon has the lock -- proceed                       
#------------------------------------------------------------

       dmon_locked=0
       loop_on_lock=$FALSE
       lock_error=0
       lock_missng=0
       open_fail=0

       ${CMNDS_DIR}/cp $DEVS_LIST $ARRAY_LIST 2> /dev/null

#------------------------------------------------------------
# if the copy fails for any reason, treat it like a lock
# failure and try again the next time
#------------------------------------------------------------

       if [ $? -eq 0 ]
       then
         list_date=$cur_list_date
       else
         list_date="NODATE"
         print -p "unlock"
         return 1
       fi

#------------------------------------------------------------
# release the lock on the $DAEMON_LOCK file
#------------------------------------------------------------

       print -p "unlock"

    elif [[ $lock_result = $LOCK_FAIL ]]

#------------------------------------------------------------
# daemon did not get the lock, which assumes that the  
# arrayscan utility is modifying the $DEVS_LIST
#
# go back to sleep, but only for a short while; 
# arrayscan utility should only have the file locked for 
# a very short time     
#
# after MAX_LOCK_COUNT times notify system admin
#------------------------------------------------------------

    then
      ((dmon_locked = dmon_locked + 1))
      if [ $dmon_locked -eq $MAX_LOCK_COUNT ]
      then
        ${DAEMON_DIR}/aml -s $MonSet -m $LockFail -L $DAEMON_LOCK -n $host_name -f $msgFile

        if [[ -s $msgFile ]]
        then
          $NOTIFY $send_mail $msgFile `cat $NOTIFY_WHOM` > /dev/null 2>&1
          ${CMNDS_DIR}/rm $msgFile 2> /dev/null
        fi

      fi

      if [ $dmon_locked -lt $MAX_LOCK_COUNT ]
      then
        ${CMNDS_DIR}/sleep $LOCKED_SLEEP_SECS
      else
        loop_on_lock=$FALSE
      fi

#------------------------------------------------------------
# lock file is missing or the open of $DAEMON_LOCK has failed
#
# go back to sleep, and then try again
#
# after MAX_DM_COUNT times, notify system admin
#------------------------------------------------------------

    elif [[ $lock_result = $OPEN_FAIL ]]
    then
      loop_on_lock=$FALSE

      ${CMNDS_DIR}/ls $DAEMON_LOCK > /dev/null 2> /dev/null

      if [ $? -eq 0 ]
      then
        (( open_fail = open_fail + 1 ))
        if [ $open_fail -eq $MAX_DM_COUNT ]
        then
          ${DAEMON_DIR}/aml -s $MonSet -m $OpenFail -L $DAEMON_LOCK -n $host_name -f $msgFile
        fi

      else

        (( lock_missng = lock_missng + 1 ))
        if [ $lock_missng -eq $MAX_DM_COUNT ]
        then
          ${SCAN_FOR_ARRAYS} > /dev/null 2>&1
          lock_missng=0
        fi
      fi

#------------------------------------------------------------
# no filename was passed to checklock or checklock returned 
# a null value probably due to an abnormal exit
#
# go back to sleep, and then try again
#
# after MAX_DM_COUNT times notify system admin
#------------------------------------------------------------

    elif [[ $lock_result = $LOCK_ERR ]] || [[ -z $lock_result ]]
    then
      loop_on_lock=$FALSE

      (( lock_error = lock_error + 1 ))
      if [ $lock_error -eq $MAX_DM_COUNT ]
      then
        ${DAEMON_DIR}/aml -s $MonSet -m $LockError -L $DAEMON_LOCK -n $host_name -f $msgFile
      fi
    fi

    if [[ -s $msgFile ]]
    then
      $NOTIFY $send_mail $msgFile `cat $NOTIFY_WHOM` > /dev/null 2>&1
      ${CMNDS_DIR}/rm $msgFile 2> /dev/null
    fi

  done

  if [[ $lock_result != $FILE_LOCK ]]
  then
    lock_status=1
  fi

fi

return $lock_status 

} # end make_list

#------------------------------------------------------------
# DEVS_LIST: a file of devices maintained by the configuration utility
#
# ARRAY_LIST: a copy of DEVS_LIST used by and maintained by arraymond
#
# format of ARRAY_LIST entries: XX YYYYYY /dev/rdsk/c#t#d#  0  0  0
#
# entry has: vendor, product ID, array file name, array status, program status, misc status
#
#        vendor:          Manufacturing Vendor
#
#        product ID:      Product Identification Number
#         
#        array file name: the device special file for this array
#         
#        array status:    how many times in a row an anomaly 
#                         has been detected on this array
#        program status:  how many times in a row a command 
#                         has failed (dld, dsp or arraydsp)
#        misc status:     how many times in a row the C2430D
#                         array has had "critical" sync status 
#                         -or-
#                         how many times in a row the arraydsp
#                         command has timed-out attempting to 
#                         get status from the array
#
# NOTIFY_WHOM: a file containing information necessary for
#        error notification
#
# first line: destination for screen messages
# rest: destinations for mail messages
#------------------------------------------------------------


#------------------------------------------------------------
# =====  MAIN  =====
#------------------------------------------------------------

#------------------------------------------------------------
# make sure their are no old copies of message archive files
# or message files
#------------------------------------------------------------

   for file in "$MISC_DIR/"*.arc
   do
      ${CMNDS_DIR}/rm $file 2> /dev/null
   done

   for file in "$MISC_DIR/"*.mesg
   do
      ${CMNDS_DIR}/rm $file 2> /dev/null
   done

   rescan=$FALSE

   while test 1  
   do

#------------------------------------------------------------
# if the $DEVS_LIST file is present but has 0 length,
# the daemon will not sound an alarm; thus, the arrayscan     
# utility has the burden of placing entries in the file  
#
# if arrayscan forgets to put any entries into the 
# $DEVS_LIST, the daemon will patiently loop, waiting forever         
#
# if the file is absent, however, notify system admin 
# but quit after 3 notices                                
#------------------------------------------------------------

     if [[ -s $DEVS_LIST ]]
     then

#------------------------------------------------------------
#      array devices file is present and has nonzero length
#------------------------------------------------------------
       arr_file_absent=0

       make_list $list_date
   
       if [ $? = 0 ]
       then

#------------------------------------------------------------
#      check the status of the arrays
#      checking routine may update file  
#
#      problem?  if $ARRAY_LIST format is bad, set list_date
#                to null to force a new copy from $DEVS_LIST
#                on the next pass.
#------------------------------------------------------------

         update_file=$(${CMNDS_DIR}/mktemp -d ${TMP_DIR} -c)
         while read -u3 dev_entry
         do

            arr_prod=$(echo $dev_entry | ${UXTOOL_DIR}/awk '{ print $1$2 }')  

            case $arr_prod in
               "HPC2430D") updated_entry=$($GET_2430_STATUS $dev_entry)
                           c2430_stat=$?

                           if [ $c2430_stat -eq 2 ]
                           then
                             list_date="NODATE"
                           fi   

                           if [ $c2430_stat -eq 0 ]
                           then
                             ${CMNDS_DIR}/echo $updated_entry >> $update_file
                           else
                             ${CMNDS_DIR}/echo $dev_entry >> $update_file
                           fi;;

               "HPC3586A") updated_entry=$($GET_3586_STATUS $dev_entry)
                           c3586_stat=$?

                           if [ $c3586_stat -eq 2 ]
                           then
                             list_date="NODATE"

                           elif [ $c3586_stat -eq 4 ]
                           then
                             rescan=$TRUE
                           fi

                           if [ $c3586_stat -eq 0 ]
                           then
                             ${CMNDS_DIR}/echo $updated_entry >> $update_file
                           else
                             ${CMNDS_DIR}/echo $dev_entry >> $update_file
                           fi;;

                        *) ${CMNDS_DIR}/echo $dev_entry >> $update_file;;
            esac

         done 3< $ARRAY_LIST 

         ${CMNDS_DIR}/cp $update_file $ARRAY_LIST 2> /dev/null
         ${CMNDS_DIR}/rm $update_file 2> /dev/null

         if [ $rescan -eq $TRUE ]
         then
           ${ARM_DIR}/arraydsp -R > /dev/null 2>&1
           rescan=$FALSE
         fi

         if [[ -s $msgFile ]]
         then
           $NOTIFY $m_send $msgFile `cat $NOTIFY_WHOM` 2> /dev/null
           ${CMNDS_DIR}/rm $msgFile 2> /dev/null
         fi

       fi

     else

#------------------------------------------------------------
#      array devices file is absent or has zero length
#        -  zero length is OK                           
#        -  absent is an error condition  
#
#      if the problem persists, run arrayscan
#      if arrayscan fails, notify the system admin
#------------------------------------------------------------

       ${CMNDS_DIR}/ls $DEVS_LIST > /dev/null 2>&1

       if [ $? -ne 0 ]
       then
         ((arr_file_absent = arr_file_absent + 1))

         if [ $arr_file_absent -eq $MIN_DM_COUNT ]
         then
           ${SCAN_FOR_ARRAYS} > /dev/null 2>&1
           ${CMNDS_DIR}/ls $DEVS_LIST > /dev/null 2>&1

           if [ $? -ne 0 ]
	   then
             ${DAEMON_DIR}/aml -s $MonSet -m $NoDlist -L $DEVS_LIST -n $host_name -f $msgFile
           else
             arr_file_absent=0
	   fi

         fi
       else
          arr_file_absent=0
       fi

       if [[ -s $msgFile ]]
       then
         $NOTIFY $m_send $msgFile `cat $NOTIFY_WHOM` 2> /dev/null
         ${CMNDS_DIR}/rm $msgFile 2> /dev/null
       fi

     fi

     ${CMNDS_DIR}/sleep $SLEEP_SECS

   done

#------------------------------------------------------------
# end of arraymond
#------------------------------------------------------------
