/*
 * Copyright 1998-2001, University of Notre Dame.
 * Authors: Jeffrey M. Squyres, Arun Rodrigues, and Brian Barrett with
 *          Kinis L. Meyer, M. D. McNally, and Andrew Lumsdaine
 * 
 * This file is part of the Notre Dame LAM implementation of MPI.
 * 
 * You should have received a copy of the License Agreement for the Notre
 * Dame LAM implementation of MPI along with the software; see the file
 * LICENSE.  If not, contact Office of Research, University of Notre
 * Dame, Notre Dame, IN 46556.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted subject to the conditions specified in the
 * LICENSE file.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Additional copyrights may follow.
 * 
 *	Ohio Trollius
 *	Copyright 1997 The Ohio State University
 *	RBD/NJN
 *
 *	$Id: mpirun.c,v 6.43.2.1 2001/10/07 17:14:27 bbarrett Exp $
 *
 *	Function:	- start an MPI application
 */

#include "lam_config.h"
#include "sfh.h"

#include <sys/types.h>
#include <sys/signal.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "all_list.h"
#include "all_opt.h"
#include "app_mgmt.h"
#include "app_schema.h"
#include "args.h"
#include "events.h"
#include "freq.h"
#include "kio.h"
#include "laminternal.h"
#include "ndi.h"
#include "net.h"
#include "portable.h"
#include "priority.h"
#include "preq.h"
#include "terror.h"
#include "typical.h"
#if LAM_WANT_IMPI
#include <impi.h>
#endif
#include <debug.h>


/*
 * Leave this here because although PTY support has been added and
 * we'd like to make it the default, it's considered too "iffy" at the
 * moment.  But we may make it the default someday.
 */
#define PTY_IS_DEFAULT 1


/*
 * private functions 
 */
static int pwait(int4 nwait, int *childstat);
static int set_mode(void);
static int set_stdio(void);
static void reset_stdio(void);
static void sigint_handler(int sig);
static LIST *build_app(int argc, char **argv, char **env);
static char *locate_aschema(char *filename);
static void node_ranges(char **buf, int4 *len, int4 initnode, int4 initindex);
static int get_mpi_world(int4 world_n, struct _gps *world, 
			 struct _gps *mpiworld);
static void error_cleanup(void);
static void str_append(char **src, int *len, char *suffix);
#if LAM_WANT_IMPI
static int send_impi_params(struct _gps *);
#endif


/*
 * external variables
 */
extern struct kio_t	_kio;			/* kernel I/O block */
extern struct fclient	_ufd[FUMAX];		/* user file desc. */


/*
 * private variables
 */
static struct _gps	*world;			/* world GPS array */
static struct _gps	*mpiworld;		/* MPI world GPS array */
static int		world_n;		/* size of world */
static int		fl_fdesc;		/* don't open stdio */
static int		fl_nprocs;		/* # procs specified */
static int		fl_verbose;		/* verbose mode */
static int		fl_wait = 1;		/* wait for completion */
static int		fl_catch = 1;		/* catch signals */
static int		fl_impi_client = 0;	/* IMPI client or not */
static int		fl_impi_server = 0;	/* IMPI server or not */
static int4		nprocs;			/* # procs */
static int4		rtf;			/* runtime flags */
static char		*wrkdir = NULL;		/* working dir */
static char		smallbuf[128];		/* small extra buffer */
static OPT		*ad;			/* argument descriptor */
static char		*syntaxerr =
	"mpirun: application schema syntax error, line %d\n";


int
main(int argc, char **argv, char **envp)
{
	LIST *app;
	int status;
	int errno_save;
	char **env = 0;
#if LAM_WANT_IMPI
	char *impi_server = 0;
	int impi_argc = 0;
	char **impi_argv = 0;
	LIST *impi_server_app;
	struct _gps *impi_server_gps;
#endif
/*
 * Ensure that we are not root.
 */
	if (getuid() == 0 || geteuid() == 0) {
	    show_help(NULL, "deny-root", NULL);
	    exit(EACCES);
	}
/*
 * Parse the command line.
 */
	ad = ao_init();
	if (ad == 0) {
		perror("mpirun (ao_init)");
		exit(errno);
	}
	ao_setopt1(ad, "fhtvDO", 0, 0, 0);
	ao_setopt(ad, "client", 0, 2, 0);
	ao_setopt(ad, "server", 0, 1, 0);
	ao_setopt(ad, "s", 0, 1, 0);
	ao_setopt(ad, "c", 0, 1, AOINT);
	ao_setopt(ad, "np", "c", 1, AOINT);
	ao_setopt(ad, "c2c", 0, 0, 0);
	ao_setopt(ad, "lamd", "c2c", 0, 0);
	ao_setopt(ad, "ger", 0, 0, 0);
	ao_setopt(ad, "nger", "ger", 0, 0);
	ao_setopt(ad, "w", 0, 0, 0);
	ao_setopt(ad, "nw", "w", 0, 0);
	ao_setopt(ad, "toff", 0, 0, 0);
	ao_setopt(ad, "ton", "toff", 0, 0);
	ao_setopt(ad, "nsigs", 0, 0, 0);
	ao_setopt(ad, "x", 0, 1, 0);
	ao_setopt(ad, "nx", 0, 0, 0);
	ao_setopt(ad, "wd", "D", 1, 0);
	ao_setopt(ad, "pty", 0, 0, 0);
#if PTY_IS_DEFAULT
	ao_setopt(ad, "npty", "pty", 0, 0);
#endif

	if (asc_compat(&argc, &argv)) {
	        errno_save = errno;
	        sfh_argv_free(argv);
		ao_free(ad);
		perror("mpirun");
		exit(errno_save);
	}

	if (ao_parse(ad, &argc, argv))	{
	        errno_save = errno;
	        show_help("mpirun", "usage", NULL);
	        sfh_argv_free(argv);
		ao_free(ad);
		exit(errno_save);
	}
/*
 * Check for help request.
 */
	if (ao_taken(ad, "h")) {
		show_help("mpirun", "usage", NULL);
	        sfh_argv_free(argv);
		ao_free(ad);
		exit(0);
	}
/*
 * Set the operating mode.
 */
	if (set_mode()) {
	        errno_save = errno;
	        sfh_argv_free(argv);
		ao_free(ad);
		exit(errno_save);
	}
/*
 * Attach to kernel.
 */
	if (kinit(PRCMD)) {
	  errno_save = errno;
	  show_help(NULL, "no-lamd", "mpirun", NULL);
	  sfh_argv_free(argv);
	  ao_free(ad);
	  exit(errno_save);
	}
/*
 * Set job identifier to be inherited by the application.
 */
	_kio.ki_jobid.jid_node = getnodeid();
	_kio.ki_jobid.jid_pid = getpid();
/*
 * Get the job environment.
 */
	if (asc_environment(!ao_taken(ad, "nx"),
			    ao_taken(ad, "x") ? 
			    ao_param(ad, "x", 0, 0) : 0, &env)) {
	        sfh_argv_free(argv);
		ao_free(ad);
		lamfail("mpirun");
	}
/*
 * Build the application and allocate the world GPS array.
 */
	if ((app = build_app(argc, argv, env)) == 0) {
		kexit(errno);
	}
#if 0
/* 
 * Why is this bad here?  Causes a *single* duplicate free...! 
 */
	sfh_argv_free(argv);
#endif

	world_n = al_count(app);
	world = (struct _gps *) malloc(world_n * sizeof(struct _gps));
	mpiworld = (struct _gps *) malloc(world_n * sizeof(struct _gps));
	if (world == 0 || mpiworld == 0)
	    lamfail("mpirun");
/*
 * Pre-qualify the GER and set buffer limitations on each node.
 */
	if (ao_taken(ad, "lamd") && !ao_taken(ad, "nger")) {
	  if (asc_ger(app, TRUE)) {
	    errno_save = errno;
	    ao_free(ad);
	    kexit(errno_save);
	  }
	}
/*
 * Set application stdio.
 */
	if (set_stdio()) {
	  lamfail("mpirun (set_stdio)");
	}
/*
 * Run the application.
 */
	if (asc_run(app, 0, rtf, fl_verbose, TRUE, world)) {
	    errno_save = errno;
	    ao_free(ad);
	    error_cleanup();
	    reset_stdio();
	    kexit(errno_save);
	}
/*
 * Set signal handler to terminate the application.
 */
	if (fl_wait) {
		_lam_signal(SIGINT, sigint_handler);
	}

	reset_stdio();
/*
 * Read pids and indices from MPI processes.
 */
	if (get_mpi_world(world_n, world, mpiworld)) {
	    errno_save = errno;
	    ao_free(ad);
	    app_doom(world_n, mpiworld, SIGUDIE);
	    app_doom(world_n, world, -15);
	    error_cleanup();
	    kexit(errno_save);
	}
/*
 * Send process table to all processes.
 */
	if (app_sendprocs(world_n, mpiworld)) {
	    ao_free(ad);
	    terror("mpirun");
	    app_doom(world_n, mpiworld, SIGUDIE);
	    app_doom(world_n, world, -15);
	    error_cleanup();
	    kexit(errno);
	}
#if LAM_WANT_IMPI
/*
 * If this is an IMPI application, send the IMPI params to rank 0 so
 * that it can spawn the impid properly.
 */
	if (fl_impi_client && send_impi_params(mpiworld) != 0) {
	  errno_save = errno;
	  ao_free(ad);
	  terror("mpirun (send_impi_params)");
	  app_doom(world_n, mpiworld, SIGUDIE);
	  app_doom(world_n, world, -15);
	  error_cleanup();
	  kexit(errno_save);
	}
/*
 * Start up the IMPI server; all the MPI ranks are now somewhere in
 * MPI_Init, and probably blocking, waiting for the IMPI server to
 * come up.  This flag will only be set if LAM_WANT_IMPI_SERVER, so we
 * don't have to check for this here.
 */
	if (fl_impi_server) {
	  impi_server = sfh_path_env_findv(LAM_IMPI_SERVER, R_OK, envp, 0);

	  if (impi_server == NULL) {
	    show_help("mpirun", "cant-find-impi-server", NULL);
	    ao_free(ad);
	    app_doom(world_n, mpiworld, SIGUDIE);
	    app_doom(world_n, world, -15);
	    error_cleanup();
	    kexit(1);
	  }

/*
 * Launch the impi server via the lamd on the localhost.  This allows
 * the impi-server to be cleaned up via lamclean.
 */
	  impi_argc = 0;
	  impi_argv = 0;
	  sfh_argv_add(&impi_argc, &impi_argv, "bogus-ignored-arg");
	  sfh_argv_add(&impi_argc, &impi_argv, "h");
	  sfh_argv_add(&impi_argc, &impi_argv, impi_server);

	  if ((impi_server_app = build_app(impi_argc, impi_argv, env)) == 0) {
	    errno_save = errno;
	    ao_free(ad);
	    app_doom(world_n, mpiworld, SIGUDIE);
	    app_doom(world_n, world, -15);
	    error_cleanup();
	    kexit(errno_save);
	  }

	  impi_server_gps = (struct _gps *) 
	    malloc(world_n * sizeof(struct _gps));
	  rtf &= ~(RTF_MPI | RTF_WAIT);
	  if (asc_run(app, 0, rtf, fl_verbose, TRUE, impi_server_gps)) {
	    errno_save = errno;
	    ao_free(ad);
	    app_doom(world_n, mpiworld, SIGUDIE);
	    app_doom(world_n, world, -15);
	    error_cleanup();
	    kexit(errno_save);
	  }
	}
#endif
/*
 * If needed, wait for the application to terminate.
 */
	status = 0;

	if (fl_wait) {
		if (pwait(world_n, &status)) {
			app_doom(world_n, mpiworld, SIGUDIE);
			app_doom(world_n, world, -15);
#if LAM_WANT_IMPI
			app_doom(world_n, impi_server_gps, -15);
#endif
			error_cleanup();
		}

		microsleep(500000);		/* 1/2 sec */
	}

	ao_free(ad);
	kexit(status);
	return(0);
}

/*
 *	build_app
 *
 *	Function:	- build application from command line specification
 *	Accepts:	- command line argument count
 *			- command line argument vector
 *	Returns:	- application descriptor or 0
 */
static LIST *
build_app(int argc, char **argv, char **env)
{
	LIST		*app;			/* application */
	LIST		*app_sched;		/* scheduled application */
	int4		nodeid;			/* target node */
	int4		index;			/* for getnode() */
	int4		nflags;			/* for getnode() */
	int		lineno;			/* line number */
	int		argtailc;		/* argument tail count */
	char		**argtailv;		/* argument tail vector */
	char		*aschema;		/* application schema */
	char		*qarg;			/* quoted argument */
	char		*fmtbuf = malloc(512);	/* formatting buffer */
	int              fmtbuf_len = 512;

	if (nid_parse(&argc, argv) || (errno = (argc > 1) ? 0 : EUSAGE)) {
	        show_help("mpirun", "usage", NULL);
		return(0);
	}
/*
 * Application schema or command line?
 */
	nid_get(&index, &nodeid, &nflags);

	if ((index < 0) && !fl_nprocs) {
/*
 * Parse the app. schema.
 */
		if ((argc != 2) || ao_taken(ad, "s")) {
		        show_help("mpirun", "usage", NULL);
			kexit(EUSAGE);
		}

		aschema = locate_aschema(argv[1]);
		if (aschema == 0) {
			fprintf(stderr, "mpirun (locate_aschema): %s: ",
				argv[1]);
			terror("");
			return(0);
		}

		app = asc_parse(aschema, &lineno, env);
	}
	else {
/*
 * Parse the "command line" application specification.
 * Reconstruct its relevant parts from the parsed information.
 * It was parsed to distinguish between the appl/cmdline cases.
 *
 * Format: [<nodes>] program [-s <srcnode>] [-c #] [-- args]
 *
 */
		fmtbuf[0] = 0;

		if (fl_nprocs) {
		  sprintf(smallbuf, "-c %d ", nprocs);
		  str_append(&fmtbuf, &fmtbuf_len, smallbuf);
		}

		if (ao_taken(ad, "s")) {
		  str_append(&fmtbuf, &fmtbuf_len, "-s ");
		  str_append(&fmtbuf, &fmtbuf_len, ao_param(ad, "s", 0, 0));
		  str_append(&fmtbuf, &fmtbuf_len, " ");
		}

		if (index == 0) {
		  if (nodeid == LOCAL) 
		    nodeid = getnodeid();
		  node_ranges(&fmtbuf, &fmtbuf_len, nodeid, index);
		}

		for (--argc, ++argv; argc > 0; --argc, ++argv) {
			qarg = sfh_argv_quote(*argv, "\\'\"");
			if (qarg == 0) {
				terror("mpirun (sfh_argv_quote)");
				return(0);
			}

			/* Oh for STL strings... */

			str_append(&fmtbuf, &fmtbuf_len, qarg);
			str_append(&fmtbuf, &fmtbuf_len, " ");
			free(qarg);
		}

		ao_tail(ad, &argtailc, &argtailv);

		if (argtailc > 0) 
		  str_append(&fmtbuf, &fmtbuf_len, "-- ");

		for ( ; argtailc > 0; --argtailc, ++argtailv) {
			qarg = sfh_argv_quote(*argtailv, "\\'\"");
			if (qarg == 0) {
				terror("mpirun (sfh_argv_quote)");
				return(0);
			}

			str_append(&fmtbuf, &fmtbuf_len, qarg);
			str_append(&fmtbuf, &fmtbuf_len, " ");
			free(qarg);
		}
/*
 * Parse the command line.
 */
		app = asc_bufparse(fmtbuf, strlen(fmtbuf), &lineno, env);
		lineno = 0;
	}
	free(fmtbuf);
	fmtbuf = 0;
	fmtbuf_len = 0;

	if (app == 0) {
		if (lineno > 0) {
			fprintf(stderr, syntaxerr, lineno);
			errno = EUSAGE;
		} else {
			fprintf(stderr, "mpirun: cannot parse: ");
			terror("");
		}

		return(0);
	}
/*
 * Set working dir.
 */
	if (wrkdir) {
	    if (asc_workdir(app, wrkdir)) {
		terror("mpirun (asc_workdir)");
		return(0);
	    }
	}
/*
 * Schedule the application.
 */
	app_sched = asc_schedule(app);
	asc_free(app);

	if (app_sched == 0) {
		terror("mpirun (asc_schedule)");
		return(0);
	} else {
		return(app_sched);
	}
}

/*
 *	pwait
 *
 *	Function:	- waits for processes to complete
 *	Accepts:	- # of processes
 *			- return status of first child in error, else 0
 *	Returns:	- 0 or LAMERROR
 */
static int
pwait(int4 nwait, int *childstat)
{
	int4		nodeid;			/* child's node ID */
	int		pid;			/* child's process ID */
	int		status;			/* return status */

	*childstat = 0;

	for ( ; nwait > 0; --nwait) {

		if (rpwait(&nodeid, &pid, &status)) {
			*childstat = errno;
			terror("mpirun (rpwait)");
			return(LAMERROR);
		}

		if (status) {
		  char spid[32], snode[32], sstatus[32];
		  snprintf(spid, 32, "%d", pid);
		  snprintf(snode, 32, "%d", nodeid);
		  snprintf(sstatus, 32, "%d", status);
		  show_help("mpirun", "abnormal-exit", spid, snode, 
			    sstatus, NULL);
		  *childstat = status;
		  errno = EINTR;
		  return(LAMERROR);
		}
	}

	return(0);
}

/*
 *	set_mode
 *
 *	Function:	- set the operating mode
 */
static int
set_mode(void)
{
/*
 * flags
 */
	fl_fdesc = ao_taken(ad, "f");
	fl_verbose = ao_taken(ad, "v");
	if (ao_taken(ad, "n"))
		fl_wait = 1;
	if (ao_taken(ad, "nw"))
		fl_wait = 0;
	if (ao_taken(ad, "nsigs"))
		fl_catch = 0;
/*
 * followed options
 */
	nprocs = -1;
	if (ao_taken(ad, "c")) {
		ao_intparam(ad, "c", 0, 0, &nprocs);
		fl_nprocs = 1;
	} else if (ao_taken(ad, "np")) {
		ao_intparam(ad, "np", 0, 0, &nprocs);
		fl_nprocs = 1;
	}

	if (ao_taken(ad, "wd")) {
	    wrkdir = ao_param(ad, "wd", 0, 0);
	}
/*
 * runtime flags
 */
	rtf = RTF_MPIRUN;

	if (fl_wait)
		rtf |= RTF_WAIT;
	if (fl_catch)
		rtf |= RTF_MPISIGS;
	if (ao_taken(ad, "D"))
		rtf |= RTF_APPWD;
	if (ao_taken(ad, "O"))
		rtf |= RTF_HOMOG;
	if (!ao_taken(ad, "lamd"))
		rtf |= RTF_MPIC2C;
	if (ao_taken(ad, "ger")
			|| (ao_taken(ad, "lamd") && !ao_taken(ad, "nger")))
		rtf |= RTF_MPIGER;
	if (ao_taken(ad, "t") || ao_taken(ad, "ton") || ao_taken(ad, "toff"))
		rtf |= RTF_TRACE;
	if (!ao_taken(ad, "toff"))
		rtf |= RTF_TRSWITCH;
	if (isatty(1))
		rtf |= RTF_TTYOUT;
	if (!fl_fdesc)
		rtf |= RTF_IO;
#if LAM_HAVE_PTYS
#if PTY_IS_DEFAULT
	if (!ao_taken(ad, "npty"))
		rtf |= RTF_PTYS;
#else
	if (ao_taken(ad, "pty"))
		rtf |= RTF_PTYS;
#endif
#else
	/* For those systems that do not have pty support */
	if (ao_taken(ad, "pty"))
	  fprintf(stderr, "Your system does not appear to include pty support.  -pty ignored.\n");
	rtf &= ~RTF_PTYS;
#endif

	fl_impi_client = ao_taken(ad, "client");
	fl_impi_server = ao_taken(ad, "server");
	if (fl_impi_client || fl_impi_server) {
#if !LAM_WANT_IMPI
	  show_help("mpirun", "no-impi-support", NULL);
#else
#if !LAM_WANT_IMPI_SERVER
	  if (fl_impi_server)
	    show_help("mpirun", "no-impi-server", NULL);
#else
	  if (fl_impi_server && !fl_impi_client) {
	    show_help("mpirun", "server-without-client", NULL);
	    return LAMERROR;
	  }
#endif
	  rtf |= RTF_IMPI;
	  
	  /* This seems to be overkill, but we'll leave it here on the
             off chance that we had a really good reason to have it
             here! */

	  if (ao_taken(ad, "O")) {
	    fprintf(stderr, "-O conflicts with -client (IMPI jobs cannot be guaranteed to be homogoneous)\n");
	    fprintf(stderr, "-O disabled.\n");
	    rtf &= ~RTF_HOMOG;
	  }
#endif
	}
	
	return(0);
}

/*
 *	set_stdio
 *
 *	Function:	- set up application stdio
 *	Returns:	- 0 or LAMERROR
 */
static int
set_stdio(void)
{
	char		server[LAM_PATH_MAX];	/* fd server socket name */

	if (fl_fdesc) return(0);

#if (LAM_HAVE_BSD43_FD_PASSING || LAM_HAVE_BSD44_FD_PASSING || LAM_HAVE_SYSV_FD_PASSING)
/*
 * Pass stdin, stdout and stderr to filed.
 */
	if (lam_mktmpid((int) getpid(), server, sizeof(server))) {
		return(LAMERROR);
	}

	if (lam_lfopenfd(server)) {
		return(LAMERROR);
	}
/*
 * Set LAM file descriptors to the passed file descriptors.  The call to
 * lam_lfopenfd() takes care of the case when stdin is a tty.
 */
	_kio.ki_stdin = _ufd[0].fu_tfd;
	_kio.ki_stdout = _ufd[1].fu_tfd;
	_kio.ki_stderr = _ufd[2].fu_tfd;
#endif
	return(0);
}

/*
 *	reset_stdio
 *
 *	Function:	- reset stdio so rfatexit will clean it up
 *	Returns:	- 0 or LAMERROR
 */
static void
reset_stdio(void)
{
	_kio.ki_stdin = 0;
	_kio.ki_stdout = 1;
	_kio.ki_stderr = 2;
}

/*
 *	locate_aschema
 *
 *	Function:	- locate an application schema
 *	Accepts:	- filename
 *	Returns:	- full pathname or NULL
 */
static char *
locate_aschema(char *filename)
{
	int		pathc = 0;		/* paths argc */
	char		**pathv = 0;		/* paths argv */
	char		*appdir;		/* application dir */
	char		*fullpath;		/* full pathname */

	if ((appdir = getenv("LAMAPPLDIR"))) {
		argvadd(&pathc, &pathv, appdir);
	}
	argvadd(&pathc, &pathv, "");
	argvadd(&pathc, &pathv, "$LAMHOME/etc");
	argvadd(&pathc, &pathv, "$TROLLIUSHOME/etc");
	argvadd(&pathc, &pathv, LAM_SYSCONFDIR);

	fullpath = sfh_path_find(filename, pathv, R_OK);
	argvfree(pathv);
	return(fullpath);
}

/*
 *	node_ranges
 *
 *	Function:	- fill buffer with list of nodes
 *			- compact nodes to ranges
 *			- formatting buffer assumed large enough
 *	Accepts:	- formatting buffer
 *			- initial node
 *			- initial index
 */
static void
node_ranges(char **buf, int *len, int4 initnode, int4 initindex)
{
	int4		node;			/* target node */
	int4		index;			/* for getnode() */
	int4		flags;			/* for getnode() */
	int4		next;			/* next expected node */
	int		fl_flush = 0;		/* flush node flag */

	sprintf(smallbuf, "n%d", initnode);
	str_append(buf, len, smallbuf);
/*
 * Loop over the nodes.
 */
	next = initnode + 1;

	nid_get(&index, &node, &flags);

	while (index != initindex) {
		if (node != next) {
			if (next > initnode + 1) {
/*
 * We have a "hole" ending a range of length >= 2.
 */
				sprintf(smallbuf, "-%d,%d", next - 1, node);
				str_append(buf, len, smallbuf);
			}
			else {
/*
 * We have a "hole" ending a range of length 1.
 */
				sprintf(smallbuf, ",%d", node);
				str_append(buf, len, smallbuf);
			}
			initnode = node;
			next = node + 1;
			fl_flush = 0;
		}
/*
 * Otherwise, keep incrementing.
 */
		else {
			++next;
			fl_flush = 1;
		}

		nid_get(&index, &node, &flags);
	}
/*
 * If we were still in a range, flush it out.
 */
	if (fl_flush) {
		sprintf(smallbuf, "-%d", next - 1);
		str_append(buf, len, smallbuf);
	}

	str_append(buf, len, " ");
}

/*
 *	sigint_handler
 *
 *	Function:	- dooms application started by mpirun
 *	Accepts:	- signal
 */
static void
sigint_handler(int sig)
{
	static int	numsigs = 0;		/* # times signal caught */
	int		pid;

	if (sig != SIGINT) return;
/*
 * Second time around just exit.
 */
	if (numsigs++ > 0) {
		exit(1);
	}

	if ((pid = fork()) < 0) {
		return;
	}
	else if (pid == 0) {

		if (kinit(PRCMD)) {
			exit(errno);
		}

		app_doom(world_n, world, -15);
		kexit(0);
	}
}

/*
 *	get_mpi_world
 *
 *	Function:	- get MPI world
 *	Accepts:	- size of world
 *			- initial process world
 *			- MPI process world (out)
 *	Returns:	- full pathname or NULL
 */
static int
get_mpi_world(int4 world_n, struct _gps *world, struct _gps *mpiworld)
{
    struct nmsg		msg;
    int			i;
    int			j;

    memcpy(mpiworld, world, world_n * sizeof(struct _gps));
    for (i = 0; i < world_n; i++) {
	mpiworld[i].gps_pid = 0;
    }

    LAM_ZERO_ME(msg);
    msg.nh_event = (-getpid()) & 0xBFFFFFFF;
    msg.nh_length = 0;
    msg.nh_flags = DINT4DATA;

    for (i = 0; i < world_n; i++) {
	msg.nh_type = 3;
	if (nrecv(&msg)) {
	    terror("mpirun");
	    return(LAMERROR);
	}

	if (msg.nh_type == 1) {
	    char node[32];
	    snprintf(node, 32, "%d", msg.nh_node);
	    show_help("mpirun", "no-init", node, NULL);
	    return(LAMERROR);
	}
/*
 * Set the MPI process pid and index.
 */
	j = msg.nh_data[0];
	if (j < 0 || j >= world_n) {
	    errno = EIMPOSSIBLE;
	    terror("mpirun");
	    return(LAMERROR);
	}

	mpiworld[j].gps_pid = msg.nh_data[1];
	mpiworld[j].gps_idx = msg.nh_data[2];
    }

    return(0);
}

/*
 *	error_cleanup
 *
 *	Function:	- try to clean up init and wait messages
 *			- this is not foolproof but better than nothing
 */
static void
error_cleanup(void)
{
    struct nmsg		msg;
/*
 * Wait a little while.
 */
    sleep(1);

    LAM_ZERO_ME(msg);
    msg.nh_event = (-getpid()) & 0xBFFFFFFF;
    msg.nh_length = 0;
    msg.nh_flags = DINT4DATA;
/*
 * Loop trying to receive init messages and wait messages.
 */
    while (1) {
	msg.nh_type = 3;
	if (ntry_recv(&msg))
	    break;
    }
}


/*
 * Safely append to a string.  Of for STL strings...
 */
static void
str_append(char **src, int *len, char *suffix)
{
  int slen1;
  int slen2;

  slen1 = strlen(*src);
  slen2 = strlen(suffix);

  if (slen1 + slen2 > *len) {
    while (slen1 + slen2 > *len)
      *len *= 2;
    *src = realloc(*src, *len);
  }

  strcat(*src, suffix);
}


#if LAM_WANT_IMPI
static int
send_impi_params(struct _gps *world)
{
  char               *rank_string;
  char               *server_string;
  struct nmsg        msg;
  int                size;
  char               *msg_string;

  /* 
   * Get the command line parameters for IMPI client rank and 
   * the servers IP name/number and port
   */
  
  server_string = ao_param(ad, "client", 0, 1);
  if (server_string == NULL) {
    errno = EINVAL;
    return (LAMERROR);
  }
  rank_string = ao_param(ad, "client", 0, 0);
  if (rank_string == NULL) {
    errno = EINVAL;
    return (LAMERROR);
  }

  size = strlen(server_string) + strlen(rank_string) + 4;
  msg_string = malloc(size);
  sprintf(msg_string, "%s\n%s\n", server_string, rank_string);

  /* Send it to rank 0 */

  LAM_ZERO_ME(msg);
  msg.nh_node = world[0].gps_node;
  msg.nh_event = (-world[0].gps_pid) & 0xBFFFFFFF;
  msg.nh_type = 4;
  msg.nh_length = size;
  msg.nh_flags = 0;
  msg.nh_msg = (char *) msg_string;

  nsend(&msg);

  free((char *) msg_string);

  return 0;
}
#endif
