/* foremost.c
 *
 * This is a work of the US Government. In accordance with 17 USC 105, 
 * copyright protection is not available for any work of the US Government.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 */

#include "foremost.h"


void usage() {
  fprintf (stdout,FOREMOST_USAGE_STRING);
}


/* catch_alarm and clean_up are used for signal handling...
   If the user sends a SIGINT (CTRL-C) or SIGTERM (kill) we 
   would like to have a chance to finish the current operation
   and close the audit file.

   catch_alarm is the signal handler, which sets the global variable
   signal_caught.

   At the top of each loop through digBuffer and digImageFile we check
   to see if signal_caught has been set.  If so, we call clean_up.
 */

void catch_alarm (int signum) {
  signal_caught = signum;
  signal(signum,catch_alarm);

#ifdef __DEBUG
  fprintf(stderr,"\nCaught signal: %s.\n",(char*) strsignal(signum));
#endif

  fprintf (stderr, "\nKill signal detected. Cleaning up...\n");
}


int extractSearchSpecData(struct SearchSpecLine *s,char **tokenarray, int modeQuick) {

  /* Process a normal line with 3-4 tokens on it
     token[0] = suffix
     token[1] = case sensitive
     token[2] = size to snarf
     token[3] = begintag
     token[4] = endtag (optional)
     token[5] = search for footer from back of buffer flag and other options (whew!)
  */
  
  /* Allocate the memory for these lines.... */
 
  s->suffix = malloc(MAX_SUFFIX_LENGTH*sizeof(char));
  s->begin  = malloc(MAX_STRING_LENGTH*sizeof(char));
  s->end    = malloc(MAX_STRING_LENGTH*sizeof(char));    
  
  if (!strncasecmp(tokenarray[0],
                   FOREMOST_NOEXTENSION_SUFFIX,
                   strlen(FOREMOST_NOEXTENSION_SUFFIX))) {
    s->suffix[0] = FOREMOST_NOEXTENSION;
    s->suffix[1] = 0;
  }
  else {

    /* Assign the current line to the SearchSpec object */
    memcpy(s->suffix,tokenarray[0],MAX_SUFFIX_LENGTH);
  }
  
  /* Check for case sensitivity */
  s->casesensitive = (!strncasecmp(tokenarray[1],"y",1) || 
		      !strncasecmp(tokenarray[1],"yes",3));
  
  s->length = atoi(tokenarray[2]);
  
  /* Determine which search type we want to use for this needle */
  s->searchtype = SEARCHTYPE_FORWARD;
  if (!strncasecmp(tokenarray[5],"REVERSE",strlen("REVERSE"))) {
    s->searchtype = SEARCHTYPE_REVERSE;
  }
  else if (!strncasecmp(tokenarray[5],"NEXT",strlen("NEXT"))) {
    s->searchtype = SEARCHTYPE_FORWARD_NEXT;
  }
  // this is the default, but just if someone wants to provide this value just to be sure
  else if (!strncasecmp(tokenarray[5],"FORWARD",strlen("FORWARD"))) {
    s->searchtype = SEARCHTYPE_FORWARD;
  }

  /* Done determining searchtype */
  
  /* We copy the tokens and translate them from the file format.
     The translate() function does the translation and returns
     the length of the argument being translated */
  
  s->beginlength = translate(tokenarray[3]);
  memcpy(s->begin,tokenarray[3],s->beginlength);
  s->endlength   = translate(tokenarray[4]);
  memcpy(s->end,tokenarray[4],s->endlength);  
  
  
  /* If we're not in quick mode, initialize the Boyer-Moore string search
     table for this needle */
  if(modeQuick == FALSE){
    /*If we aren't doing a quick search 
      to initialize the Boyer-Moore jump table*/
    init_bm_table(s->begin,s->begin_bm_table,s->beginlength, s->casesensitive,s->searchtype);
    init_bm_table(s->end,s->end_bm_table,s->endlength,s->casesensitive,s->searchtype);
  }
  return FOREMOST_OK;
}



int processSearchSpecLine(struct foremostState *state, char *buffer, 
			  int lineNumber) {
  
  char* buf = buffer;
  char* token;
  char** tokenarray = (char **) malloc(6*sizeof(char[MAX_STRING_LENGTH]));
  int i = 0, err=0, len = strlen(buffer);

  /* Any line that ends with a CTRL-M (0x0d) has been processed
     by a DOS editor. We will chop the CTRL-M to ignore it */
  if (buffer[len-2] == 0x0d && buffer[len-1] == 0x0a){
    buffer[len-2] = buffer[len-1];
    buffer[len-1] = buffer[len];
  }

  buf = (char*) skipWhiteSpace(buf);
  token = strtok(buf," \t\n");

  /* Any line that starts with a '#' is a comment and can be skipped */
  if(token == NULL || token[0] == '#'){  
    return FOREMOST_OK;
  }
    
  /* Check for the wildcard */
  if (!strncasecmp(token,"wildcard",9)) {
    if ((token = strtok(NULL," \t\n")) != NULL) {
      translate(token);
    } else {
      fprintf (stdout,"Warning: Empty wildcard in configuration file line %d. Ignoring.\n",
	       lineNumber);
      return FOREMOST_OK;
    }

    if (strlen(token) > 1) {
      fprintf(stderr,"Warning: Wildcard can only be one character,"
	      " but you specified %d characters.\n"
	      "         Using the first character, \"%c\", as the wildcard.\n",
	      strlen(token),token[0]);
    }

    wildcard = token[0];
    return FOREMOST_OK;
  }
    
  while (token && (i < NUM_SEARCH_SPEC_ELEMENTS)){
    tokenarray[i] = token;
    i++;
    token = strtok(NULL," \t\n");
  }
  
  switch(NUM_SEARCH_SPEC_ELEMENTS-i){
    case 2:
      tokenarray[NUM_SEARCH_SPEC_ELEMENTS-1] = "";
      tokenarray[NUM_SEARCH_SPEC_ELEMENTS-2] = "";
      break;
    case 1:
      tokenarray[NUM_SEARCH_SPEC_ELEMENTS-1] = "";
      break;
    case 0:
      break;
    default:
      fprintf(stderr, 
	      "\nERROR: In line %d of the configuration file, I was expecting %d tokens,\n"
	      "       but instead found only %d.\n", 
	      lineNumber,NUM_SEARCH_SPEC_ELEMENTS,i);
      return FOREMOST_ERROR_NO_SEARCH_SPEC;
      break;
      
  }

  if((err = extractSearchSpecData(&(state->SearchSpec[state->specLines]),
				  tokenarray,state->modeQuick))){
    switch(err){

    default:
      fprintf(stderr,
	      "\nERROR: Unknown error on line %d of the configuration file.\n"
	      ,lineNumber);
    }
    
  }
  state->specLines++; 
  return FOREMOST_OK;
}



int readSearchSpecFile(struct foremostState *state) {

  int lineNumber=0, status;
  FILE *f;
  
  /* The buffer holds one line of the input file at a time
     The length should be more than enough for the whitespace on one line */
  char* buffer = malloc(MAX_SUFFIX_LENGTH  * sizeof(char) +
			(2 * MAX_STRING_LENGTH) * sizeof(char) +
			16 * sizeof(char) + 
			256 * sizeof(char) + 1);

  f = fopen(state->conffile,"r");  
  if (f == NULL) {
    fprintf (stderr,
	     "ERROR: Couldn't open configuration file: %s -- %s\n", 
	     state->conffile,strerror(errno));
    free(buffer);
    return FOREMOST_ERROR_FILE_OPEN;
  }

  while(fgets(buffer,MAX_SUFFIX_LENGTH+2*MAX_STRING_LENGTH+16+256,f)){
    
    lineNumber++;

    if (state->specLines > MAX_FILE_TYPES) {
      fprintf(stderr,"Your conf file contains too many file types.\n");
      fprintf(stderr,"This version was compiled with MAX_FILE_TYPES == %d.\n",
              MAX_FILE_TYPES);
      fprintf(stderr,"Increase MAX_FILE_TYPES, recompile, and try again.\n");
      free(buffer);
      return FOREMOST_ERROR_TOO_MANY_TYPES;
    }

    if ((status = processSearchSpecLine(state,buffer,lineNumber)) != FOREMOST_OK) {
      free(buffer);
      return status;
    }
  }

  /* We add an empty object to the end of the list so that we don't
     have to worry about hitting a NULL pointer later on. */
  state->SearchSpec[state->specLines].suffix = NULL;
  state->SearchSpec[state->specLines].casesensitive = 0;
  state->SearchSpec[state->specLines].length = 0;
  state->SearchSpec[state->specLines].begin = NULL;
  state->SearchSpec[state->specLines].beginlength = 0;
  state->SearchSpec[state->specLines].end = NULL;
  state->SearchSpec[state->specLines].endlength = 0;
  
  fclose(f);
  free(buffer);
  return FOREMOST_OK;
}


  

/* Register the signal-handler that will write to 
   the audit file and close it if we catch a SIGINT or SIGTERM */
void registerSignalHandlers() {
    if(signal (SIGINT, catch_alarm) == SIG_IGN)
      signal (SIGINT, SIG_IGN);
    if(signal (SIGTERM,catch_alarm) == SIG_IGN)
      signal (SIGTERM, SIG_IGN);

#ifndef __WIN32
    /* Note: I haven't found a way to get notified of 
       console resize events in Win32.  Right now the statusbar
       will be too long or too short if the user decides to resize 
       their console window while foremost runs.. */

    signal(SIGWINCH, setttywidth);
#endif 
}


/* Set up the state variable by allocating memory and loading
   default values. Copies the command line arguments too */
void initializeState(char **argv, struct foremostState *state) {
  
  char** argvcopy = argv;
  int sss;
  
  /* Allocate memory for the state */
  state->imagefile        = (char*) malloc(MAX_STRING_LENGTH * sizeof(char));
  state->inputFileList    = (char*) malloc(MAX_STRING_LENGTH * sizeof(char));
  state->conffile         = (char*) malloc(MAX_STRING_LENGTH * sizeof(char));
  state->outputdirectory  = (char*) malloc(MAX_STRING_LENGTH * sizeof(char));
  state->invocation       = (char*) malloc(MAX_STRING_LENGTH * sizeof(char));
  sss = (MAX_FILE_TYPES+1)*(3*sizeof(char *)+4*sizeof(int)+2*(UCHAR_MAX+1)*sizeof(size_t));
  state->SearchSpec = (struct SearchSpecLine*) malloc(sss);
  state->specLines    = 0;
  state->fileswritten = 0;
  state->skip         = 0;
  state->modeVerbose      = FALSE;
  state->modeQuick        = FALSE;
  state->modeNoSuffix     = FALSE;
  state->useInputFileList = FALSE;
  state->auditFile        = NULL;


  /* Load the default values */
  strncpy(state->outputdirectory,FOREMOST_DEFAULT_OUTPUT_DIR,
	  strlen(FOREMOST_DEFAULT_OUTPUT_DIR));
  strncpy(state->conffile,FOREMOST_DEFAULT_CONFIG_FILE,
	  MAX_STRING_LENGTH);
  wildcard = FOREMOST_DEFAULT_WILDCARD;
  signal_caught = 0;
  
  /* And now copy the invocation string into the state */
  do{
    strncat(state->invocation,  
	    *argvcopy, 
	    MAX_STRING_LENGTH-strlen(state->invocation));
    strncat(state->invocation,
	    " ",
	    MAX_STRING_LENGTH-strlen(state->invocation));
    ++argvcopy;  
  } while (*argvcopy);  

  registerSignalHandlers();
}


void processCommandLineArgs(int argc, char **argv, 
			    struct foremostState *state) {

  char i;

  while ((i = getopt(argc, argv, "hvVqnf:c:o:s:i:")) != -1) {
    switch (i) {
	
    case 'V':
      fprintf (stdout,FOREMOST_COPYRIGHT_STRING);
      exit (1);
      
    case 'h':
      usage();
      exit (1);
      
    case 'v':
      state->modeVerbose = TRUE;
      fprintf (stdout,"Verbose mode on\n");
      break;
      
    case 'q':
      state->modeQuick = TRUE;
      fprintf (stdout,"Quick mode on\n");
      break;
      
    case 's':
      state->skip = strtoull(optarg,NULL,10);
      fprintf (stdout,"Skipping the first %lld bytes of each infile\n",
	       state->skip);
      break;
      
    case 'c':
      strncpy(state->conffile,optarg,MAX_STRING_LENGTH);
      break;
      
    case 'o':
      /* We don't check if this directory exists just yet.
	 If necessary, we'll try to create it later on. */
      strncpy(state->outputdirectory,optarg,MAX_STRING_LENGTH);
      break;
      
    case 'i':
      state->useInputFileList = TRUE;
      state->inputFileList = optarg;  
      break;	  
      
    case 'n':
      state->modeNoSuffix = TRUE;
      fprintf (stdout,"Extracting files without putting extensions on the filenames\n");
      break;

    default:
      usage();
    }
  }
}


/* Convert all of the filenames used to their full path values */
void convertFileNames(struct foremostState *state) {

  char fn[MAX_STRING_LENGTH];  

  realpath(state->outputdirectory,fn);
  strncpy(state->outputdirectory,fn,MAX_STRING_LENGTH);

  realpath(state->conffile,fn);
  strncpy(state->conffile,fn,MAX_STRING_LENGTH);
}



/* Assumes that everything has been set up correctly, this function
   does the real work of the program, calling the digging functions
   on each of the image files we need to process */
void digAllFiles(int argc, char **argv, struct foremostState *state) {

  int i = 0, j = 0; 
  FILE* listoffiles = NULL;

  /* Now we finally start reading the image files */
  if (state->useInputFileList) {
    fprintf(stdout, "Using batch mode: reading list of files from %s instead of command line",state->inputFileList);
    listoffiles = fopen(state->inputFileList,"r");
    if (listoffiles == NULL){
      fprintf(stderr, "Couldn't open file: %s -- %s\n", 
	      (*(state->inputFileList)=='\0')?"<blank>":state->inputFileList,
	      strerror(errno));
      closeFile(state->auditFile);
      exit(-1);
    }
    j=0;
    do {
      j++;
      
      if (fgets(state->imagefile,MAX_STRING_LENGTH,listoffiles) == NULL) {
	
	fprintf(stderr,
		"Error reading line %d of %s. Skipping line.\n", 
		j,state->inputFileList);
	continue;
      }
      if(state->imagefile[strlen(state->imagefile)-1] == '\n'){
	state->imagefile[strlen(state->imagefile)-1] = '\x00';
      }
      
      if ((i = digImageFile(state))) {
	handleError(state,i);
      }
    } while (!feof(listoffiles));
    closeFile(listoffiles);
  }
  else{
    do {
      
      state->imagefile = *argv;
      
      if ((i = digImageFile(state))) {
	handleError(state,i);
      }		
      
      ++argv;  
    } while (*argv);
  }
}



int main (int argc, char **argv){

  struct foremostState state;

  /* Before we do *anything*, we must check that SIZE_OF_BUFFER is
     divisible by FOREMOST_BLOCK_SIZE. If it's not then quick mode won't work. 
     This should only happen if somebody messes with foremost.h */
  if (ldiv(SIZE_OF_BUFFER,FOREMOST_BLOCK_SIZE).rem != 0) {
    fprintf (stderr, FOREMOST_SIZEOFBUFFER_PANIC_STRING);
    exit (-1);
  }
  
#ifndef __GLIBC__
  setProgramName(argv[0]);
#endif

  fprintf (stdout,FOREMOST_BANNER_STRING);

  initializeState(argv,&state);

  processCommandLineArgs(argc,argv,&state);

  convertFileNames(&state);

  /* We're done with the command line arguments. (Anything else on 
     the command line are image files we will process later.) 

     Let's check that the user didn't specify anything too crazy. */
  if (state.modeQuick && (state.skip > 0)) {
    printf ("\n\nWARNING: Quick mode is enabled while skipping bytes\n"
	    "         This may produce unusual or empty output.\n\n");
  }

  if (state.modeVerbose) {
    fprintf (stdout,"Output directory: %s\n", state.outputdirectory);
    fprintf (stdout,"Configuration file: %s\n", state.conffile);
  }


  /* Now let's read the user's configuration file. */
  if (readSearchSpecFile(&state)) {
    /* We don't use handleError() here as readSearchSpec produces
       it's own error messages. */
    exit(-1);
  }

  /* Whew! We're all set up now. Let's dig some image files! */
  
  /*Find out what our TTY's width is...*/
  setttywidth();

  argv += optind;
  if (*argv != NULL || state.useInputFileList) {
  /* We will open the audit file now and prepare it for use. 
     This also checks that the output directory is empty. */
    if(openAuditFile(&state)){
      fprintf (stderr, "Aborting.\n\n");
      exit (-1);
    }
    digAllFiles(argc,argv,&state);
    closeFile(state.auditFile);
  } else {      
    fprintf(stdout,"\nERROR: No image files specified.\n\n");
    usage();
  }
  
  fprintf (stdout,"\nForemost is done.\n");
  
  /* We don't do any free() calls here because we're about to exit.
     The system will automatically return all used memory.  */

  return 0;
}
