#!/bin/sh
# cluster   This shell script takes care of starting and stopping
#       the cluster daemons
# description:  starts and stops the cluster daemons
# chkconfig: 2345 99 01
#

PATH=/usr/bin:/sbin:/bin:/usr/sbin:/opt/cluster/bin
export PATH

# daemons and options
#
HEARTBEAT=hb
SVCMGR=svcmgr
QUORUMD=quorumd
STOPCLUSTER=stopcluster
POWERSWITCH=pswitch
POWERD=powerd
HEARTBEAT_OPTIONS=""
SVCMGR_OPTIONS=""
QUORUMD_OPTIONS=""
STOPCLUSTER_OPTIONS=""
STOPQUORUMD_OPTIONS="-p"
POWERSWITCH_OPTIONS="status"
POWERD_OPTIONS=""
CLULOG=clulog

# daemon functions
getpid()
{
    pid=''
    if [ -f /var/run/${base}.pid ] 
    then
        pid=`head -1 /var/run/${base}.pid`
    fi
    if [ "$pid" = "" ]
    then
        pid=`pidof $1`
    fi
    if [ "$pid" = "" ]
    then
        pid=`ps ax | awk 'BEGIN { prog=ARGV[1]; ARGC=1 }
                   { if ((prog == $5) || (("(" prog ")") == $5) ||
                     (("[" prog "]") == $5) ||
                   ((prog ":") == $5)) { print $1 ; exit 0 } }' $1`
    fi

    echo $pid
}

startdaemon()
{
    base=`basename $1`

    # check if it is already running
    pid=`getpid $base`
    [ -n "$pid" ] && ps h $pid >/dev/null 2>&1 && echo " already running." && return

    # don't dump core
    ulimit -c 0

    $* && echo " done." || echo " failed."
}

stopdaemon()
{
    base=`basename $1`
    pid=`getpid $base`

    if [ "$pid" != "" ]
    then
        if ps h $pid>/dev/null 2>&1
        then
            kill -TERM $pid
            if ps h $pid>/dev/null 2>&1
            then
                sleep 1
                if ps h $pid>/dev/null 2>&1
                then
                    sleep 3
                    if ps h $pid>/dev/null 2>&1
                    then
                        kill -KILL $pid
			sleep 2
                    fi
                fi
            fi
        fi
        ps h $pid >/dev/null 2>&1
        RC=$?
        [ $RC -eq 0 ] && echo " failed." || echo " done."
        [ $RC -eq 0 ] || rm -f /var/run/$base.pid >/dev/null 2>&1
    else
        echo " not running."
    fi
}

daemonstatus()
{
    base=`basename $1`
    pid=`getpid $base`
    
    if [ "$pid" != "" ]
    then
        if ps h $pid >/dev/null 2>&1
        then
            echo "$base (pid $pid) is running."
            return 0
        else
            if [ -f /var/run/${base}.pid ]
            then
                echo "$base dead but pid file exists."
                return 1
            else
                echo "$base is stopped."
                return 2
            fi
        fi
    else
        echo "$base is stopped."
        return 3
    fi
}

RETVAL=0
case "$1" in
start)
    echo "-------- Starting Cluster Daemons ---------------------"
    $CLULOG -s 6 -l 6 -n cluster "____ Performing cluster start ____"

    echo -n "Starting $QUORUMD: "
    startdaemon $QUORUMD $QUORUMD_OPTIONS

    # if this lock file doesn't exist, init won't even try to run
    # the shutdown script for this service on RedHat systems!
    # on non-RedHat systems, /var/lock/subsys may not exist.
    touch /var/lock/subsys/cluster >/dev/null 2>&1

    echo "-------- Completed Cluster Startup ---------------------"
    ;;

# The stop sequence primarily consists of sending a message to the highest
# level cluster service, which does its own shutdown and then sends a message
# to the layer it depends on.  This is necessary as you can't just send
# kill signals to the quorumd specifically, because in that case it wouldn't
# shutdown cleanly by marking its state as DOWN on the disk partition - this
# would result in the other node shooting this node when the timestamp
# stops updating.
# Here's the order that stop messages get sent:
# SM -> quorumd -> powerd 
# On the off chance that any of the daemons in the chain of shutdown
# messaging are not running, skip ahead to the next daemon or direct kill.
stop)
    echo "-------- Stopping Cluster Daemons ---------------------"
    $CLULOG -s 6 -l 6 -n cluster "____ Initiating cluster stop ____"
    pidof $SVCMGR >/dev/null 2>&1; SMRUNNING="$?"
    if [ $SMRUNNING = 0 ]; then
        echo -n "Sending stop message to $SVCMGR: "
        $STOPCLUSTER $STOPCLUSTER_OPTIONS 
    
        # Handle oddball case of SM running but quorumd isn't.
        pidof $QUORUMD >/dev/null 2>&1; QUORUMD_RUNNING="$?"
        if [ $QUORUMD_RUNNING != 0 ]; then
            echo -n "Shutting down $POWERD: "
            stopdaemon $POWERD
        fi
    else 
        # XXX - if SM isn't running its not safe to just tell quorumd to 
        # stop as this could result in services runnning on both members.
        # In this case call SM with a new (not yet implemented) option which
        # tells it to stop all services and return status when done.  If that
        # is successful, it will send a termination message to quorumd.  If
        # the new involkation of SM fails to stop services then a hard reboot
        # is warranted.
        pidof $QUORUMD >/dev/null 2>&1; QUORUMD_RUNNING="$?"
        if [ $QUORUMD_RUNNING = 0 ]; then
            echo -n "Sending stop message to $QUORUMD: "
            $STOPCLUSTER $STOPQUORUMD_OPTIONS
        else
            echo -n "Shutting down $POWERD: "
            stopdaemon $POWERD
        fi
    fi

    # Wait around for Quorum daemon to go away.  Then you know that the
    # on-disk state has been marked as cleanly down.
    while : ; do
       pidof $QUORUMD >/dev/null 2>&1; QUORUMD_RUNNING="$?"
       if [ $QUORUMD_RUNNING != 0 ]; then
       break
       fi
          echo "Waiting for Quorum Daemon to exit."
       sleep 10
    done

    # make sure it really is dead
    for process in $QUORUMD $HEARTBEAT $SVCMGR $POWERD 
    do
    	echo -n "Stopping $process: "
		stopdaemon $process
    done

    pidof $HEARTBEAT >/dev/null 2>&1; HEARTBEAT_RUNNING="$?"
    if [ $HEARTBEAT_RUNNING = 0 ]; then
        echo -n "Shutting down $HEARTBEAT: "
        stopdaemon $HEARTBEAT
        echo
    fi

    # remove the lock file, so init will allow the start script to run
    rm -f /var/lock/subsys/cluster >/dev/null 2>&1

    $CLULOG -s 6 -l 6 -n cluster "____ Completed cluster stop ____"
    echo "-------- Completed Cluster Stop ---------------------"
    ;;

reload)
    for process in $HEARTBEAT $SVCMGR $QUORUMD $POWERD 
    do
        killall -HUP $process
    done
    exit 0
    ;;

# Don't allow restart, because the stop merely initiates the shutdown
# sequence.  So this command would end up attempting to start while the
# daemons may not have been stopped.
#restart)
#    $0 stop
#    $0 start
#    ;;

status)
    for process in $HEARTBEAT $SVCMGR $QUORUMD $POWERD 
    do
	daemonstatus $process
    done
    exit 0
    ;;

probe)
    exit 0;
    ;;

*)
    echo "Usage: $0 {start|stop|status|reload}"
    exit 1
esac

exit $RETVAL
