#!/bin/sh
#
# Description:	Manages a PostrgreSQL Server as an OCF High-Availability
#		resource under Heartbeat/LinuxHA control
#
#
# Author:	Serge Dubrouski (sergeyfd@gmail.com)
# Copyright 2006 Serge Dubrouski <sergeyfd@gmail.com>
# License:	GNU General Public License (GPL)
#
# OCF parameters:
#  OCF_RESKEY_pgctl  - Path to pg_ctl. Default /usr/bin/pg_ctl
#  OCF_RESKEY_start_opt - Startup options, options passed to postgress with -o
#  OCF_RESKEY_ctl_opt - Additional options for pg_ctl (-w, -W etc...)
#  OCF_RESKEY_psql   - Path to psql. Default is /usr/bin/psql
#  OCF_RESKEY_pgdata - PGDATA directory. Default is /var/lib/pgsql/data
#  OCF_RESKEY_pgdba  - userID that manages DB. Default is postgres
#  OCF_RESKEY_pghost - Host/IP Address where PostgreSQL is listening
#  OCF_RESKEY_pgport - Port where PostgreSQL is listening
#  OCF_RESKEY_pgdb   - database to monitor. Default is template1
#  OCF_RESKEY_logfile - Path to PostgreSQL log file. Default is /dev/null
#  OCF_RESKEY_stop_escalate - Stop waiting time. Default is 30
###############################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs

unset LC_ALL; export LC_ALL
unset LANGUAGE; export LANGUAGE

usage() {
    cat <<-! >&1
	usage: $0 start|stop|status|monitor|meta-data|validate-all|methods

	$0 manages a PostgreSQL Server as an HA resource.

        The 'start' operation starts the PostgreSQL server.
        The 'stop' operation stops the PostgreSQL server.
        The 'status' operation reports whether the PostgreSQL is up.
        The 'monitor' operation reports whether the PostgreSQL is running.
        The 'validate-all' operation reports whether the parameters are valid.
        The 'methods' operation reports on the methods $0 supports.
!
  return $OCF_ERR_ARGS
}

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="pgsql">
<version>1.0</version>

<longdesc lang="en">
Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource.
</longdesc>
<shortdesc lang="en">Manages a PostgreSQL database instance</shortdesc>

<parameters>
<parameter name="pgctl" unique="0" required="0">
<longdesc lang="en">
Path to pg_ctl command.
</longdesc>
<shortdesc lang="en">pgctl</shortdesc>
<content type="string" default="/usr/bin/pg_ctl" />
</parameter>
<parameter name="start_opt" unique="0" required="0">
<longdesc lang="en">
Start options (-o start_opt in pgi_ctl). "-i -p 5432" for example.
</longdesc>
<shortdesc lang="en">start_opt</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="ctl_opt" unique="0" required="0">
<longdesc lang="en">
Additional pg_ctl options (-w, -W etc..). Default is ""
</longdesc>
<shortdesc lang="en">ctl_opt</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="/usr/bin/psql" />
</parameter>
<parameter name="pgdata" unique="0" required="0">
<longdesc lang="en">
Path PostgreSQL data directory.
</longdesc>
<shortdesc lang="en">pgdata</shortdesc>
<content type="string" default="/var/lib/pgsql/data" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="postgres" />
</parameter>
<parameter name="pghost" unique="0" required="0">
<longdesc lang="en">
Hostname/IP Addreess where PosrgeSQL is listening
</longdesc>
<shortdesc lang="en">pghost</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PosrgeSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="string" default="5432" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that will be used for monitoring.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="template1" />
</parameter>
<parameter name="logfile" unique="0" required="0">
<longdesc lang="en">
Path to PostgreSQL server log output file.
</longdesc>
<shortdesc lang="en">logfile</shortdesc>
<content type="string" default="/dev/null" />
</parameter>
<parameter name="stop_escalate" unique="0" required="0">
<longdesc lang="en">
Number of retries (using -m fast) before resorting to -m immediate
</longdesc>
<shortdesc lang="en">stop escalation</shortdesc>
<content type="string" default="30" />
</parameter>
</parameters>

<actions>
<action name="start" timeout="120" />
<action name="stop" timeout="120" />
<action name="status" timeout="60" />
<action name="monitor" depth="0" timeout="30" interval="30"/>
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
<action name="methods" timeout="5" />
</actions>
</resource-agent>
END
}


#
#	Run the given command in the Resource owner environment...
#
runasowner() {
    su $OCF_RESKEY_pgdba -c "$*"
}

#
# methods: What methods/operations do we support?
#

pgsql_methods() {
  cat <<-!
	start
	stop
	status
	monitor
	methods	
	meta-data
	validate-all
	!
}


#pgsql_start: Starts PostgreSQL
pgsql_start() {
    if pgsql_status
    then
        ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
        return $OCF_SUCCESS
    fi
    
    if [ -x $OCF_RESKEY_pgctl ]
    then
	# Remove postmastre.pid if it exists
	rm -f $PIDFILE

	# Remove backup_label if it exists
	if [ -f $BACKUPLABEL ]; then
	    ocf_log info "Removing $BACKUPLABEL. The previous backup might be failed."
	    rm -f $BACKUPLABEL
	fi

        # Check if we need to create a log file
        if ! check_log_file $OCF_RESKEY_logfile
	then
            ocf_log err "PostgreSQL can't write to the log file: $OCF_RESKEY_logfile"
	    return $OCF_ERR_GENERIC
	fi

	output=`runasowner "$OCF_RESKEY_pgctl $OCF_RESKEY_ctl_opt -D $OCF_RESKEY_pgdata -l $OCF_RESKEY_logfile -o "\'$OCF_RESKEY_start_opt\'" start" 2>&1`

	if [ $? -eq 0 ]
	then
	   # Probably started.....
            ocf_log info "PostgreSQL start command sent."
	else
	    ocf_log err "Can't start PostgreSQL."
            ocf_log info "psql command output: $output"
            return $OCF_ERR_GENERIC
	fi
    else
	ocf_log err "$OCF_RESKEY_pgctl not found!"
	return $OCF_ERR_GENERIC
    fi

    while :
    do
        pgsql_monitor warn
        rc=$?
        if [ $rc -eq 0 ]; then
            break;
        fi 
        sleep 1
	ocf_log debug "PostgreSQL still hasn't started yet. Waiting..."
    done
    ocf_log info "PostgreSQL is started."

    return $OCF_SUCCESS
}

#pgsql_stop: Stop PostgreSQL	
pgsql_stop() {
    if ! pgsql_status
    then
        #Already stopped
        return $OCF_SUCCESS
    fi

    # Stop PostgreSQL do not wait for clients to disconnect
    output=`runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m fast" 2>&1`

    # stop waiting
    count=0
    while [ $count -lt $OCF_RESKEY_stop_escalate ]
    do
        if ! pgsql_status
        then
            #PostgreSQL stopped
            break;
        fi
        count=`expr $count + 1`
        sleep 1
    done

    if pgsql_status
    then
       #PostgreSQL is still up. Use another shutdown mode.
       ocf_log info "PostgreSQL failed to stop after ${OCF_RESKEY_stop_escalate}s using -m fast. Trying -m immediate..."
       ocf_log info "psql command output: $output"
       output=`runasowner "$OCF_RESKEY_pgctl -D $OCF_RESKEY_pgdata stop -m immediate" 2>&1`
       ocf_log info "psql command output: $output"
    fi

    while :
    do
        pgsql_monitor
        rc=$?
        if [ $rc -eq $OCF_NOT_RUNNING ]; then
            # An unnecessary debug log is prevented.
            break;
        fi 
	sleep 1
	ocf_log debug "PostgreSQL still hasn't stopped yet. Waiting..."
    done
	
    # Remove postmastre.pid if it exists
    rm -f $PIDFILE

    return $OCF_SUCCESS
}

#
# pgsql_status: is PostgreSQL up?
#

pgsql_status() {
     if [ -f $PIDFILE ]
     then
         PID=`head -n 1 $PIDFILE`
         kill -0 $PID >/dev/null 2>&1 && fuser $OCF_RESKEY_pgdata 2>&1 | grep $PID >/dev/null 2>&1
         return $?
     fi

     # No PID file
     false
}

#
# pgsql_monitor
#

pgsql_monitor() {
    # Set the log level of the error message
    loglevel=${1:-err}

    if ! pgsql_status
    then
	ocf_log info "PostgreSQL is down"
	return $OCF_NOT_RUNNING
    fi

    if [ "x" = "x$OCF_RESKEY_pghost" ]
    then
       output=`runasowner "$OCF_RESKEY_psql -p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb -c 'select now();'" 2>&1`
    else
       output=`runasowner "$OCF_RESKEY_psql -h $OCF_RESKEY_pghost -p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb -c 'select now();'" 2>&1`
    fi
    
    rc=$?
    if [ $rc -ne  0 ]
    then
	ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running"
        if [ $rc -eq 1 ]
        then
            ocf_log err "Fatal error(out of memory or file not found, etc.) occurred while executing the psql command."
        elif [ $rc -eq 2 ]
        then
            ocf_log $loglevel "Connection error(connection to the server went bad and the session was not interactive) occurred while executing the psql command."
        elif [ $rc -eq 3 ]
        then
            ocf_log err "Script error(the variable ON_ERROR_STOP was set) occurred while executing the psql command."
        fi
        ocf_log info "psql command output: $output"
	return $OCF_ERR_GENERIC
    fi
    
    return $OCF_SUCCESS
}

# Validate most critical parameters
pgsql_validate_all() {
    if ! have_binary $SH
    then
        return $OCF_ERR_INSTALLED
    fi

    if ! have_binary $OCF_RESKEY_pgctl
    then
        return $OCF_ERR_INSTALLED
    fi


    if ! have_binary $OCF_RESKEY_psql
    then
        return $OCF_ERR_INSTALLED
    fi

    return $OCF_SUCCESS
}


#
# Check if we need to create a log file
#

check_log_file() {
    if [ ! -f "$1" ]
    then
        touch $1 > /dev/null 2>&1
        chown $OCF_RESKEY_pgdba:$(getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4) $1
    fi

    #Check if $OCF_RESKEY_pgdba can write to the log file
    if ! runasowner "test -w $1"
    then
        return 1
    fi

    return 0
}

#
#   'main' starts here...
#


if [ $# -ne 1 ]
then
    usage
    exit $OCF_ERR_GENERIC
fi

: ${OCF_RESKEY_pgctl=/usr/bin/pg_ctl}
: ${OCF_RESKEY_psql=/usr/bin/psql}
: ${OCF_RESKEY_pgdata=/var/lib/pgsql/data}
: ${OCF_RESKEY_pgdba=postgres}
: ${OCF_RESKEY_pgport=5432}
: ${OCF_RESKEY_start_opt="-p $OCF_RESKEY_pgport"}
: ${OCF_RESKEY_pgdb=template1}
: ${OCF_RESKEY_logfile=/dev/null}
: ${OCF_RESKEY_stop_escalate=30}

PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid
BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label

case "$1" in
    methods)    pgsql_methods
                exit $?;;
		
    meta-data)  meta_data
                exit $OCF_SUCCESS;;

    validate-all) pgsql_validate_all
                exit $?;;
esac

if ! pgsql_validate_all
then
    case "$1" in
        stop)    exit $OCF_SUCCESS;;
        monitor) exit $OCF_NOT_RUNNING;;
        status)  exit $OCF_NOT_RUNNING;;
        *)       exit $OCF_ERR_INSTALLED;;
    esac
fi

US=`id -u -n`

if [ $US != root -a $US != $OCF_RESKEY_pgdba ]
then
    ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba"
    exit $OCF_ERR_GENERIC
fi

# What kind of method was invoked?
case "$1" in
    status)     if pgsql_status
                then
                    ocf_log info "PostgreSQL is up"
                    exit $OCF_SUCCESS
                else
                    ocf_log info "PostgreSQL is down"
                    exit $OCF_NOT_RUNNING
                fi;;

    monitor)    pgsql_monitor
                exit $?;;

    start)      pgsql_start               
                exit $?;;

    stop)       pgsql_stop
                exit $?;;
    *)
                exit $OCF_ERR_UNIMPLEMENTED;;
esac
