static char dqs_job_exit_rcsid[]="$Id: dqs_job_exit.c,v 1.1.1.1 1998/08/18 14:39:11 green Exp $";

/*----------------------------------------------------
 * dqs_job_exit.c Tom Green Sun Dec 12 16:16:56 1993
 *
 * Copyright 1993
 *
 * SUPER COMPUTER COMPUTATIONS RESEARCH INSTITUTE
 *            FLORIDA STATE UNIVERSITY
 *
 *
 * SCRI representatives make no claims about the
 * suitability of this software for any purpose.
 * It is provided "as is" without express or
 * implied warranty.
 *
 * $Log: dqs_job_exit.c,v $
 * Revision 1.1.1.1  1998/08/18 14:39:11  green
 * DQS 3.2.0.5 WIP Import
 *
 * Revision 1.2  1997/04/10 15:35:35  green
 * commited nrl's last_user initialization patches
 *
 * Revision 3.22  1997/04/02 18:06:11  nrl
 * last_user structure was not being initialized. This fix
 * creates the last_user structure when needed.
 *
 * Revision 3.21  1996/08/02 01:46:56  nrl
 * moved the dqs_)clear_subordinate call
 * to delay clearing the subordinated state until all master
 * queue jobs are done.
 *
 * Revision 3.20  1996/03/22  04:20:34  nrl
 * Added error cataloguing number to all routines
 *
 * Revision 3.19  1996/03/17  00:57:35  nrl
 * merge in qsub prevalidation scheme and consumable restoration
 *
 * Revision 3.18  1996/03/14  03:16:15  nrl
 * merge in subordinate queues and consumable resource changes
 *
 * Revision 3.17  1996/01/19  20:58:46  nrl
 * merged SCRI code and new job and queue structure changes
 *
 * Revision 3.16  1995/02/22  14:29:27  nrl
 * added "FREE" macro to make sure all freed pointers are NULL,
 * replaced all calls to free( ) with FREE.
 *
 * Revision 3.15  1995/02/06  16:31:51  nrl
 * Added "account" info to rusage structure and to the "acc_file"
 * for analysis by user.
 *
 * Revision 3.14  1995/01/30  15:21:57  nrl
 * added "tid" verification between execd and qmaster to prevent
 * "ghost" jobs from persisting in visible queue. Changed ERROR messages
 * which were for information only to DEBUG messages.
 *
 * Revision 3.13  1995/01/27  14:09:40  nrl
 * Changed Supspend on completion to continue beyond an unsuspend operation
 * so that jobs can be "stepped" thru the queues. Increased timout
 * retries for connect to make the system more tolerant of network delyas.
 *
 * Revision 3.12  1995/01/24  21:04:52  nrl
 * made changes to plug memory leaks and to complete the suspend
 * on completion function.
 *
 * Revision 3.11  1995/01/17  16:32:08  nrl
 * completed mailer feature
 *
 * Revision 3.10  1994/06/23  20:32:23  green
 * more Solaris fun
 *
 * Revision 3.9  1994/06/23  20:01:40  green
 * Solaris porting mods...
 *
 * Revision 3.8  1994/06/07  12:37:59  green
 * moved some format descriptors out of dqs_ck_to_do_list.c and
 * dqs_job_exeit.c and into def.h
 *
 * added dqs_read_stats() to dqs_ck_to_do_list.c
 *
 * Revision 3.7  1994/03/28  14:51:29  green
 * modified dqs_job_exit.c:dqs_read_rusage()
 *
 * Revision 3.6  1994/03/26  22:22:56  green
 * yanked "qty.eq.??" out of dqs_job_exit.c:dqs_build_complex_str()
 *
 * modified dqs_c_qconf.c:dqs_c_qconf() to show "none" on NULL complex
 * strings passed in on qmon_OPT
 *
 * Revision 3.5  1994/03/26  12:24:10  green
 * bug in dqs_ck_to_do_list.c:dqs_ck_to_do_list() allowed garbage in
 * "lp2" to be forwarded to dqs_build_complex_str();
 *
 * dqs_job_exit.c:dqs_build_complex_str() modified - changed line 460
 * from strcat() to strcpy()
 *
 * added additional error logging to dqs_sec.c:dqs_qmaster() and
 * dqs_sec.c:dqs_trusted_host() to aid in tracking of bougus
 * configurations.
 *
 * Revision 3.4  1994/03/25  20:57:29  green
 * dqs_job_exit.c:dqs_build_complex_str() now returns "none" if there
 * are no complexes.
 *
 * Revision 3.3  1994/03/24  19:22:10  green
 * modified dqs_build_complex_str() to preclude buffer overruns.
 *
 * Revision 3.2  1994/03/24  19:17:37  green
 * patched dqs_job_exit.c:dqs_build_qcomplex_str() - this routine still
 * needs more work...
 *
 * modified dqs_ck_to_do_list.c accordingly.
 *
 * Revision 3.1  1994/03/17  18:36:25  green
 * added support for "STAT_FILE"
 *
 * fixed memory leak in dqs_job_exit.c
 *
 * Revision 3.0  1994/03/07  04:13:56  green
 * 3.0 freeze
 *
 * Revision 1.1.1.1  1994/02/01  17:57:42  green
 * DQS 3.0 ALPHA
 *
 *
 *--------------------------------------------------*/


#include "h.h"
#include "def.h"
#include "dqs.h"
#include "struct.h"
#include "func.h"
#include "globals.h"
#include "dqs_errno.h"

/************************************************************************/
void dqs_job_exit(request_list)
     dqs_list_type *request_list;
     
{
  
  string         str;
  dqs_job_type   *job;
  dqs_queue_type *queue;
  dqs_list_type *lp;
  dqs_list_type *mail_users;
  dqs_list_type t_last_user;     
  int            mail_options;
  
  DENTER((DQS_EVENT,"dqs_job_exit"));
  
  DPRINTF((DQS_EVENT,"******qmaster reaping \"%s\"",request_list->rusage->dqs_job_name));
  job=dqs_locate_job(request_list->rusage->dqs_job_name);
  
  if (!job)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0259 error: could not locate job \"%s\"",
	     request_list->rusage->dqs_job_name));
      DEXITE;
      return;
    }
  else
    {
      DPRINTF((DQS_EVENT,"dqs_job_name \"%s\"   job_name \"%s\"",
	       job->dqs_job_name,job->job_name));
      DPRINTF((DQS_EVENT,"=================granted_destin_identifier_list============================"));
      dqs_showlist(job->granted_destin_identifier_list,DQS_DEBUG|DQS_STR0|DQS_INT0,4);
      DPRINTF((DQS_EVENT,"==========================================================================="));
    }
  
  queue=dqs_locate_queue(request_list->rusage->qname);
  if (!queue)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0260 error: could not locate queue \"%s\"",
	     request_list->rusage->qname));
      DEXITE;
      return;
    }
  else
    {
      DPRINTF((DQS_EVENT,"queue_name1 \"%s\"",queue->qname));
      
    }
  
  
  if(queue->last_user_delay >0){
    if(!queue->last_user){
      DPRINTF((DQS_EVENT,"recreating last_user" ));
      bzero(&t_last_user,sizeof(t_last_user));
      queue->last_user= dqs_insert(DQS_STR0,TAIL,queue->last_user,&t_last_user);
    }
    queue->last_user->int0=dqs_get_gmt();
    queue->last_user->int0 = queue->last_user->int0 + queue->last_user_delay;
    queue->last_user->user=dqs_string_insert(queue->last_user->user,
					     request_list->rusage->owner);
  }
  
  
  /*-------------------------------------------------*/
  if (request_list->rusage->master)
    { /* if they ain't started yet, they ain't gonna */
      dqs_clear_granted_resources(job);
    }
  else
    {
      queue=dqs_locate_queue(request_list->rusage->qname);
      if (!queue)
	{
	  ERROR((DQS_EVENT,"DQS_ERROR_0261 error: could not locate queue \"%s\"",
		 request_list->rusage->qname));
	  return;
	}
      else
	{
	  DPRINTF((DQS_EVENT,"queue_name2 \"%s\"",queue->qname));
	  while (dqs_locate_str0(queue->job_list,request_list->rusage->qname))
	    queue->job_list=dqs_del_str0(queue->job_list,request_list->rusage->qname);
	  dqs_write_queue_to_disk(queue);
	}
    }
  
  DPRINTF((DQS_EVENT,"=================granted_destin_identifier_list============================"));
  dqs_showlist(job->granted_destin_identifier_list,DQS_DEBUG|DQS_STR0,4);
  DPRINTF((DQS_EVENT,"==========================================================================="));
  job->granted_destin_identifier_list=dqs_del_str0(job->granted_destin_identifier_list,request_list->rusage->qname);
  DPRINTF((DQS_EVENT,"=================granted_destin_identifier_list============================"));
  dqs_showlist(job->granted_destin_identifier_list,DQS_DEBUG|DQS_STR0,4);
  DPRINTF((DQS_EVENT,"==========================================================================="));
  
  if (queue)
    {
      if (queue->qty_active>0)
	queue->qty_active=queue->qty_active-1;
      if(VALID(SUSPEND_ON_COMP,queue->state) ){
	CLEARBIT(RUNNING,queue->state);
	SETBIT(SUSPENDED,queue->state);
      }
      if(queue->qty_active==0) dqs_clear_subordinated(queue);    	  
    }
  dqs_restore_consumable(job);
  if (!job->granted_destin_identifier_list) /* last "sub-job" reaped */
    {
      DPRINTF((DQS_EVENT,"all granted_destins have been reaped"));
      DPRINTF((DQS_EVENT,"***unlinking \"%s\"",job->job_file));
      unlink(job->job_file);
      unlink(job->exec_file);
      dqs_showlist(Job_head,DQS_STR0|DQS_STDERR,4);
      dqs_hash_del(request_list->rusage->dqs_job_name,Job_hash); 
      Job_head=dqs_del_str0(Job_head,request_list->rusage->dqs_job_name);
      queue->job_list=dqs_del_str0(queue->job_list,request_list->rusage->dqs_job_name);
      dqs_showlist(Job_head,DQS_STR0|DQS_STDERR,4);
      dqs_log_dusage(request_list->rusage,queue);
      
      dqs_write_queue_to_disk(queue);
    }
  else
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0262 WRITING JOB TO DISK"));
      dqs_write_queue_to_disk(queue);
    }
  
  DEXIT;
  return;
  
}

/*******************************************************************************************/
void dqs_log_dusage(d,q)
     dqs_rusage_type *d;
     dqs_queue_type *q;
     
{
  
  struct stat      buf;
  dqs_list_type    *listel_ptr=NULL;
  char *           qcomplex_str;
  FILE             *f;
  int write_result;
  
  DENTER((DQS_EVENT,"dqs_log_dusage"));
  
  if (stat(ACT_FILE,&buf)) {
    INFO((DQS_EVENT,"DQS_ERROR_0263 %s not found -- making",ACT_FILE));
    close(creat(ACT_FILE,0755));
  }
  
  f=fopen(ACT_FILE,"a");
  if (!f)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0264 error: opening %s for writing",ACT_FILE));
      DEXITE;
      return;
    }
  
  if (q->complex_list)
    listel_ptr=dqs_locate_complex(q->complex_list->str0);
  qcomplex_str=dqs_build_qcomplex_str(listel_ptr);
  
  write_result = dqs_write_rusage (f, d, qcomplex_str);
  FREE(qcomplex_str);
  
  if (write_result == EOF)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0265 error: writing to %s",ACT_FILE));
      DEXITE;
      return;
    }
  else if (write_result == -2)
    {
      /* The file should be open... */
      ERROR((DQS_EVENT,"DQS_ERROR_0266 INTERNAL ERROR"));
      DEXITE;
      return;
    }
  
  
  fclose(f);
  
  DEXIT;
  return;
  
}

/*******************************************************************************************/
void dqs_clear_granted_resources(job)
     dqs_job_type *job; 
     
     /*
       dqs_clear_granted_resources - clears granted resources which have not been activated
     */
     
{
  
  dqs_list_type  *lp;
  dqs_queue_type *qp;
  
  DENTER((DQS_EVENT,"dqs_clear_granted_resources"));
  
 one_more_time:
  lp=job->granted_destin_identifier_list;
  while (lp)
    {
      if (lp->int0==GRANTED)
	{
	  qp=dqs_locate_queue(lp->str0);
	  if (qp)
	    {
	      qp->job_list=dqs_del_str0(qp->job_list,job->dqs_job_name);
	      if (qp->qty_active>0)
		{
		  qp->qty_active--;
		  ERROR((DQS_EVENT,"DQS_ERROR_0267 %d",qp->qty_active));
		}
	      dqs_write_queue_to_disk(qp);
	    }
	  else
	    {
	      ERROR((DQS_EVENT,"DQS_ERROR_0268 the queue \"%s\" cannot be located",lp->str0));
	    }
	  lp->str0=dqs_string_insert(lp->str0,"NUKE_ME");
	  job->granted_destin_identifier_list=dqs_del_str0(job->granted_destin_identifier_list,"NUKE_ME");
	  goto one_more_time;
	}
      lp=lp->next;
    }
  
  DEXIT;
  return;
  
}

/*******************************************************************************************/
int dqs_write_rusage(f, d, qcomplex_str)
     FILE *f; 
     dqs_rusage_type *d;
     char *qcomplex_str;
     
     /*
       dqs_write_rusage - write rusage info to file.
       Returns: -2 if f is NULL
       number of characters written (> 0 indicates success)
       EOF if failure
     */
     
{
  int fprintf_count;
  int nuke_qcomplex_str=FALSE;
  
  DENTER((DQS_EVENT,"dqs_write_rusage"));
  
  if (f == NULL)
    return (-2);
  
  if (!d->qname)
    d->qname=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->hostname)
    d->hostname=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->group)
    d->group=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->owner)
    d->owner=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->job_name)
    d->job_name=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->dqs_job_name)
    d->dqs_job_name=dqs_string_insert(NULL,"UNKNOWN");
  if (!d->account)
    d->account=dqs_string_insert(NULL,"UNKNOWN");
  
  if (!qcomplex_str)
    {
      qcomplex_str=dqs_string_insert(NULL,"UNKNOWN");
      nuke_qcomplex_str=TRUE;
    }
  
  fprintf_count = fprintf(f,ACTFILE_FPRINTF_FORMAT,ACTFILE_FPRINTF_VARS);
  
  if (nuke_qcomplex_str)
    dqs_free(qcomplex_str);
  
  return (fprintf_count);
}

/*******************************************************************************************/
int dqs_read_rusage(f, d, qcomplex_str)
     FILE *f; 
     dqs_rusage_type *d;
     char *qcomplex_str;
     
     /*
       dqs_read_rusage - read rusage info from file.
       Returns: -3 if f is NULL
       -2 if unexpected EOF reached
       -1 if the number of items read was incorrect
       0 otherwise (i.e. success)
       1 if expected EOF reached
     */
     
{
  int fscanf_count;
  
  DENTER((DQS_EVENT,"dqs_read_rusage"));
  
  if (f == NULL)
    return (-3);
  
  fscanf_count = fscanf(f,ACTFILE_FSCANF_FORMAT,ACTFILE_FSCANF_VARS);
  if (fscanf_count == EOF)
    {
      /* Expected end of file. */
      return (1);
    }
  else if (fscanf_count != ACTFILE_FSCANF_COUNT)
    {
      /* Bad news.  File must be malformed. */
      fscanf_count = fscanf(f,ACTFILE_FSCANF_FORMAT,ACTFILE_FSCANF_VARS);
      if (fscanf_count != EOF)
        return (-1);
      else
        return (-2);
    }
  
  return (0);
}

/*******************************************************************************************/
char *dqs_build_qcomplex_str(qcomplex)
     dqs_list_type    *qcomplex;
     
     /*
       dqs_build_qcomplex_str - construct the string equilavent of a resource list
       Returns: "none" if listel_ptr is NULL
       otherwise, the string equilavent of a resource list
     */
     
{
  
  char *qcomplex_str=NULL;
  string temp_str;
  dqs_list_type *qc_chain;
  
  DENTER((DQS_EVENT,"dqs_build_qcomplex_str"));
  
  qcomplex_str=(char *)dqs_malloc(2*MAX_STRING_SIZE * sizeof (char));
  
  if (qcomplex == NULL)
    {
      strcpy (qcomplex_str, "none");
      DEXIT;
      return (qcomplex_str);
    }
  
  /*  sprintf (qcomplex_str,"qty.eq.%ld",qcomplex->int0); */
  
  qc_chain = qcomplex->chain;
  
  while ((qc_chain) && (strlen(qcomplex_str)<MAX_STRING_SIZE-1))
    {
      /* The resource name. */
      strcat (qcomplex_str, qc_chain->str0);
      
      if (qc_chain->int1 != 0)
	/* There's an op and field. */
	{
	  switch (qc_chain->int1)
	    {
	    case EQ_OP:
	      strcpy(temp_str,".eq.");
	      break;
	    case GE_OP:
	      strcpy(temp_str,".ge.");
	      break;
	    case GT_OP:
	      strcpy(temp_str,".gt.");
	      break;
	    case LT_OP:
	      strcpy(temp_str,".lt.");
	      break;
	    case LE_OP:
	      strcpy(temp_str,".le.");
	      break;
	    case NE_OP:
	      strcpy(temp_str,".ne.");
	      break;
	    default:
	      ERROR((DQS_EVENT,"DQS_ERROR_0269 INTERNAL ERROR"));
	      DEXITE;
	      return (NULL);
	      break;
	    }
	  
	  strcat(qcomplex_str,temp_str);
	  
	  if (qc_chain->int0)
	    {
	      sprintf (temp_str,"%ld",qc_chain->int0);
	      strcat (qcomplex_str,temp_str);
	    }
	  else if (qc_chain->str1 != NULL)
	    {
	      strcat(qcomplex_str, qc_chain->str1);
	    }
	  
	}
      qc_chain = qc_chain->next;
      if (qc_chain)
	strcat (qcomplex_str, ",");
      
    }
  
  DEXIT;
  return (qcomplex_str);
}

