#!/bin/sh
#
#             OCF Resource Agent for managing CTDB
#
# Copyright (c) 2009-2010 Novell Inc., Tim Serong
#                    All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
# TODO:
# - Verify timeouts are sane
# - Monitor differentiate between error and not running?
# - Do we need to verify globally unique setting?
# - Should set CTDB_NODES to ${HA_RSCTMP}/ctdb (generated based on
#   current nodes)
# - Be more clever about monitor op:
#   ctdb --socket=/tmp/ctdb.socket status
#	Number of nodes:2
#	pnn:0 192.168.101.14   DISABLED (THIS NODE)
#	pnn:1 192.168.101.15   DISABLED
#	Generation:665993634
#	Size:2
#	hash:0 lmaster:0
#	hash:1 lmaster:1
#	Recovery mode:NORMAL (0)
#	Recovery master:1
#   ^ if this says pnn:0...DISABLED|UNHEALTHY, there is a problem,
#     e.g. ctdb socket not specified in smb.conf.
# - Lots of "No public addresses file found. Nothing to do for
#   10.interfaces" junk in ctdb log file.  Can we fix/suppress this?
# - Look at enabling set_ctdb_variables() if necessary.
# - Probably possible for sysconfig file to not be restored if
#   CTDB dies unexpectedly.
#
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs

#######################################################################
# Default parameter values:

: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb}
: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb}
: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd}
: ${OCF_RESKEY_ctdb_socket:=/var/lib/ctdb/ctdb.socket}
: ${OCF_RESKEY_ctdb_dbdir:=/var/lib/ctdb}
: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb}
: ${OCF_RESKEY_ctdb_debuglevel:=2}
: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf}

#######################################################################

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="CTDB" version="1.0">
<version>1.0</version>

<longdesc lang="en">
This resource agent manages CTDB, allowing one to use Clustered Samba
in a Linux-HA/Pacemaker cluster.  You need a shared filesystem
(e.g. OCFS2) on which CTDB lock and Samba state will be stored.
Configure shares in smb.conf on all nodes, and create /etc/ctdb/nodes
containing a list of private IP addresses of each node in the cluster.
Configure this RA as a clone, and it will take care of the rest.
For more information see http://linux-ha.org/wiki/CTDB_(resource_agent)
</longdesc>
<shortdesc lang="en">CTDB Resource Agent</shortdesc>

<parameters>

<parameter name="ctdb_recovery_lock" unique="1" required="1">
<longdesc lang="en">
The location of a shared lock file, common across all nodes.
This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock
</longdesc>
<shortdesc lang="en">CTDB shared lock file</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="smb_private_dir" unique="1" required="1">
<longdesc lang="en">
The directory for smbd to use for storing such files as
smbpasswd and secrets.tdb.  This must be on shared storage,
e.g.: /shared-fs/samba/private
</longdesc>
<shortdesc lang="en">Samba private dir</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="ctdb_config_dir" unique="0" required="0">
<longdesc lang="en">
The directory containing various CTDB configuration files.
The "nodes" and "notify.sh" scripts are expected to be
in this directory, as is the "events.d" subdirectory.
</longdesc>
<shortdesc lang="en">CTDB config file directory</shortdesc>
<content type="string" default="/etc/ctdb" />
</parameter>

<parameter name="ctdb_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB binary.
</longdesc>
<shortdesc lang="en">CTDB binary path</shortdesc>
<content type="string" default="/usr/bin/ctdb" />
</parameter>

<parameter name="ctdbd_binary" unique="0" required="0">
<longdesc lang="en">
Full path to the CTDB cluster daemon binary.
</longdesc>
<shortdesc lang="en">CTDB Daemon binary path</shortdesc>
<content type="string" default="/usr/sbin/ctdbd" />
</parameter>

<parameter name="ctdb_socket" unique="0" required="0">
<longdesc lang="en">
Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
</longdesc>
<shortdesc lang="en">CTDB socket location</shortdesc>
<content type="string" default="/var/lib/ctdb/ctdb.socket" />
</parameter>

<parameter name="ctdb_dbdir" unique="0" required="0">
<longdesc lang="en">
The directory to put the local CTDB database files in.
Persistent database files will be put in ctdb_dbdir/persistent.
</longdesc>
<shortdesc lang="en">CTDB database directory</shortdesc>
<content type="string" default="/var/lib/ctdb" />
</parameter>

<parameter name="ctdb_logfile" unique="0" required="0">
<longdesc lang="en">
Full path to log file. To log to syslog instead, use the
value "syslog".
</longdesc>
<shortdesc lang="en">CTDB log file location</shortdesc>
<content type="string" default="/var/log/ctdb/log.ctdb" />
</parameter>

<parameter name="ctdb_debuglevel" unique="0" required="0">
<longdesc lang="en">
What debug level to run at (0-10). Higher means more verbose.
</longdesc>
<shortdesc lang="en">CTDB debug level</shortdesc>
<content type="integer" default="2" />
</parameter>

<parameter name="smb_conf" unique="0" required="0">
<longdesc lang="en">
Path to default samba config file.
</longdesc>
<shortdesc lang="en">Path to smb.conf</shortdesc>
<content type="string" default="/etc/samba/smb.conf" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="90" />
<action name="stop"         timeout="100" />
<action name="monitor"      timeout="20" interval="10" depth="0" />
<action name="meta-data"    timeout="5" />
<action name="validate-all"   timeout="30" />
</actions>
</resource-agent>
END
}

#######################################################################

# Figure out path to /etc/sysconfig/ctdb (same logic as
# loadconfig() from /etc/ctdb/functions
if [ -f /etc/sysconfig/ctdb ]; then
	CTDB_SYSCONFIG=/etc/sysconfig/ctdb
elif [ -f /etc/default/ctdb ]; then
	CTDB_SYSCONFIG=/etc/default/ctdb
elif [ -f $OCF_RESKEY_ctdb_config_dir/ctdb ]; then
	CTDB_SYSCONFIG=$OCF_RESKEY_ctdb_config_dir/ctdb
fi

# Backup paths
CTDB_SYSCONFIG_BACKUP=${HA_RSCTMP}/ctdb-${OCF_RESOURCE_INSTANCE}

# This function has no effect (currently no way to set CTDB_SET_*)
# but remains here in case we need it in future.
set_ctdb_variables() {
	rv=$OCF_SUCCESS
	set | grep ^CTDB_SET_ | cut -d_ -f3- |
	while read v; do
		varname=`echo $v | cut -d= -f1`
		value=`echo $v | cut -d= -f2`
		$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket setvar $varname $value || rv=$OCF_ERR_GENERIC
	done || rv=$OCF_ERR_GENERIC
	return $rv
}


# Add necessary settings to /etc/samba/smb.conf.  In a perfect world,
# we'd be able to generate a new, temporary, smb.conf file somewhere,
# something like:
#     include = /etc/samba/smb.conf
#     [global]
#       clustering = yes
#       # ...etc...
# Unfortunately, we can't do this, because there's no way to tell the
# smb init script where the temporary config is, so we just edit
# the default config file.
init_smb_conf() {
	grep -Eiv \
		'^[[:space:]]*(# CTDB-RA:|passdb backend|clustering|idmap backend|private dir|ctdbd socket)' \
		$OCF_RESKEY_smb_conf | sed "/^[[:space:]]*\[global\]/ a\\
\t# CTDB-RA: Begin auto-generated section (do not change below)\n\
\tpassdb backend = tdbsam\n\
\tclustering = yes\n\
\tidmap backend = tdb2\n\
\tprivate dir = $OCF_RESKEY_smb_private_dir\n\
\tctdbd socket = $OCF_RESKEY_ctdb_socket\n\
\t# CTDB-RA: End auto-generated section (do not change above)" > $OCF_RESKEY_smb_conf.$$
	mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}


# Get rid of that section we added
cleanup_smb_conf() {
	sed '/# CTDB-RA: Begin/,/# CTDB-RA: End/d' $OCF_RESKEY_smb_conf > $OCF_RESKEY_smb_conf.$$
	mv -f $OCF_RESKEY_smb_conf.$$ $OCF_RESKEY_smb_conf
}


# Save current CTDB config file and generate a new, minimal version
# that is just enough to get Samba running.
save_ctdb_sysconfig() {
	# If one of our auto-generated config files is already present, return immediately
	grep -qa '# CTDB-RA: Auto-generated' $CTDB_SYSCONFIG && return
	
	# Otherwise, backup...
	cp -p $CTDB_SYSCONFIG $CTDB_SYSCONFIG_BACKUP
	if [ $? -eq 0 ]; then
		ocf_log info "Saved $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP, generating new runtime $CTDB_SYSCONFIG"
		# ...and generate
		cat >$CTDB_SYSCONFIG <<EOF
# CTDB-RA: Auto-generated by ${0}, backup is at $CTDB_SYSCONFIG_BACKUP
CTDB_MONITOR_FREE_MEMORY=100
CTDB_SAMBA_SKIP_SHARE_CHECK=yes
CTDB_MANAGES_SAMBA=yes
CTDB_MANAGES_WINBIND=yes
CTDB_SERVICE_SMB=smb
CTDB_SERVICE_NMB=nmb
CTDB_SERVICE_WINBIND=winbind
EOF
	else
		ocf_log warn "Unable to backup $CTDB_SYSCONFIG to $CTDB_SYSCONFIG_BACKUP - not making any changes"
	fi
}


restore_ctdb_sysconfig() {
	if [ -f $CTDB_SYSCONFIG_BACKUP ]; then
		ocf_log info "Restoring $CTDB_SYSCONFIG_BACKUP to $CTDB_SYSCONFIG"
		# cp handles destination being a symlink (as opposed to mv)
		cp -p $CTDB_SYSCONFIG_BACKUP $CTDB_SYSCONFIG
		[ $? -ne 0 ] && ocf_log warn "Unable to restore $CTDB_SYSCONFIG_BACKUP to $CTDB_SYSCONFIG"
		rm $CTDB_SYSCONFIG_BACKUP
	fi
}


ctdb_usage() {
	cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}


ctdb_start() {
	# Do nothing if already running
	ctdb_monitor && return $OCF_SUCCESS

	# Make sure config is adequate
	ctdb_validate
	rv=$?
	[ $rv -ne 0 ] && return $rv

	# Die if databases are corrupted
	persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent"
	mkdir -p $persistent_db_dir 2>/dev/null
	for pdbase in $(ls $persistent_db_dir/*.tdb.[0-9] 2>/dev/null$) ; do
		/usr/bin/tdbdump $pdbase >/dev/null 2>/dev/null || {
			ocf_log err "Persistent database $pdbase is corrupted!  CTDB will not start."
			return $OCF_ERR_GENERIC
		}
	done

	# Add necessary configuration to smb.conf
	init_smb_conf
	if [ $? -ne 0 ]; then
		ocf_log err "Failed to update $OCF_RESKEY_smb_conf."
		return $OCF_ERR_GENERIC
	fi

	# Save sysconfig (we're going to generate a minimal one
	# in place of what's there)
	save_ctdb_sysconfig

	# Use logfile by default, or syslog if asked for
	log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
	[ "$OCF_RESKEY_ctdb_logfile" == "syslog" ] && log_option="--syslog"
	
	# Start her up
	$OCF_RESKEY_ctdbd_binary \
		--reclock=$OCF_RESKEY_ctdb_recovery_lock \
		--nlist=$OCF_RESKEY_ctdb_config_dir/nodes \
		--socket=$OCF_RESKEY_ctdb_socket \
		--dbdir=$OCF_RESKEY_ctdb_dbdir \
		--dbdir-persistent=$OCF_RESKEY_ctdb_dbdir/persistent \
		--event-script-dir=$OCF_RESKEY_ctdb_config_dir/events.d \
		--notification-script=$OCF_RESKEY_ctdb_config_dir/notify.sh \
		--transport=tcp \
		--start-as-disabled \
		$log_option \
		-d $OCF_RESKEY_ctdb_debuglevel
	if [ $? -ne 0 ]; then
		# restore sysconfig & cleanup smb.conf
		restore_ctdb_sysconfig
		cleanup_smb_conf
		
		ocf_log err "Failed to execute $OCF_RESKEY_ctdbd_binary."
		return $OCF_ERR_GENERIC
	else
		# Wait a bit for CTDB to stabilize
		# (can be broken if, e.g. ctdb socket wrong
		# or not specified in smb.conf)
		for i in {1..30}; do
			# Initial sleep is intentional (ctdb init script
			# has sleep after ctdbd start, but before invoking
			# ctdb to talk to it)
			sleep 1
			status=$($OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status 2>/dev/null)
			if [ $? -ne 0 ]; then
				# CTDB will be running, kill it before returning
				ctdb_stop
				ocf_log err "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
				return $OCF_ERR_GENERIC
			fi
			if ! echo $status | grep -qs 'UNHEALTHY (THIS'; then
				# Status does not say this node is unhealthy,
				# so we're good to go - set up any extra
				# variables and (hopefully) return success
				set_ctdb_variables
				return $?
			fi
		done
	fi
	
	# ctdbd will (or can) actually still be running at this point, so kill it
	ctdb_stop
	
	ocf_log err "Timeout waiting for CTDB to stabilize"
	return $OCF_ERR_GENERIC
}


ctdb_stop() {
	# Do nothing if already stopped
	pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
	
	# Tell it to die nicely
	$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket shutdown >/dev/null 2>&1
	rv=$?

	# No more Mr. Nice Guy
	count=0
	while pkill -0 -f $OCF_RESKEY_ctdbd_binary ; do
		sleep 1
		count=$(($count + 1))
		[ $count -gt 10 ] && {
			ocf_log info "killing ctdbd "
			pkill -9 -f $OCF_RESKEY_ctdbd_binary
			pkill -9 -f ${OCF_RESKEY_ctdb_config_dir}/events.d/
		}
	done

	# Restore saved sysconfig & cleanup smb.conf
	restore_ctdb_sysconfig
	cleanup_smb_conf

	# Be paranoid about return codes
	[ $rv -eq $OCF_SUCCESS ] && return $OCF_SUCCESS

	return $OCF_ERR_GENERIC
}


ctdb_monitor() {
	$OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket ping > /dev/null 2>&1 && return $OCF_SUCCESS
	return $OCF_NOT_RUNNING
}


ctdb_validate() {
	if [ -z "$CTDB_SYSCONFIG" ]; then
		ocf_log err "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
		return $OCF_ERR_INSTALLED
	fi

	if [ ! -f "$OCF_RESKEY_smb_conf" ]; then
		ocf_log err "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
		return $OCF_ERR_INSTALLED
	fi

	if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
		ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
	fi

	if [ -z "$OCF_RESKEY_ctdb_recovery_lock" ]; then
		ocf_log err "ctdb_recovery_lock not specified."
		return $OCF_ERR_ARGS
	fi

	if [ -z "$OCF_RESKEY_smb_private_dir" ]; then
		ocf_log err "smb_private_dir not specified."
		return $OCF_ERR_ARGS
	fi

	lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
	touch "$lock_dir/$$" 2>/dev/null
	if [ $? != 0 ]; then
		ocf_log err "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
		return $OCF_ERR_ARGS
	fi
	rm "$lock_dir/$$"
	
	touch "$OCF_RESKEY_smb_private_dir/$$" 2>/dev/null
	if [ $? != 0 ]; then
		ocf_log err "Directory for smbd private files '$OCF_RESKEY_smb_private_dir' does not exist, or is not writable."
		return $OCF_ERR_ARGS
	fi
	rm "$OCF_RESKEY_smb_private_dir/$$"

	return $OCF_SUCCESS
}


case $__OCF_ACTION in
meta-data)	meta_data
		exit $OCF_SUCCESS
		;;
start)		ctdb_start;;
stop)		ctdb_stop;;
monitor)	ctdb_monitor;;
validate-all)	ctdb_validate;;
usage|help)	ctdb_usage
		exit $OCF_SUCCESS
		;;
*)		ctdb_usage
		exit $OCF_ERR_UNIMPLEMENTED
		;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
