/*******************************************************
                        PFTOOLS
 *******************************************************
  Oct 3, 2011 threads.h
 *******************************************************
 (C) 2011 Swiss Institute of Bioinformatics
     Thierry Schuepbach (thierry.schuepbach@isb-sib.ch)
 *******************************************************/

static void *thread_heuristic_sse41(void * _Data) 
{
  Sequence SeqData;
  const struct Profile * const restrict prf   = ((struct ThreadData*) _Data)->prf;
  const FASTAStructure * const restrict FASTA = ((struct ThreadData*) _Data)->FASTA;
  const int * const restrict TransposeMatch   = ((struct ThreadData*) _Data)->TransposeMatch.i;
  unsigned int * const restrict Scores        = &(((struct ThreadData*) _Data)->Array->UnsignedScores);
  PFSequence * PFSeq;

  /* Allocate memory to hold sequence */
  SeqData.Data.Memory = (void*) malloc(FASTA->MaxSequenceSize*sizeof(unsigned char));
  if (SeqData.Data.Memory == NULL) {
    fputs("Thread Cannot allocate memory for sequence.\n", stderr);
    return (void*) 1;
  }
  /* Allocate work aligned memory for xali1 */
  int * Work = _mm_malloc((1+prf->Length)*4*sizeof(int)+63,64);
  if (Work == NULL) return (void*) 1;

  /* Open sequence file*/
#ifndef __USE_MMAP__  
  FILE* inSequence = fopen(((struct ThreadData*) _Data)->SequenceFileName, "r");
#else
  const char * const restrict SequenceFileMap = ((struct ThreadData*) _Data)->SequenceFileMap;
#endif
  size_t Start              = ((struct ThreadData*) _Data)->start;
  size_t Stop               = ((struct ThreadData*) _Data)->stop;
  //const unsigned int CutOff = prf->HeuristicCutOff;

   //fprintf(stderr,"Thread %lu - %lu\n", Start, Stop);
  /* LOOPS ON SEQUENCES */
   for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
    PFSeq = ReadSequenceIndex(&SeqData, i, inSequence, FASTA->DataPtr);
#else
    PFSeq = MMAPReadSequenceIndex(&SeqData, i, SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
    , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
    );
#endif
    /* Translate first sequence */
    PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);

    Scores[i] = TransposeHeuristic_sse41(TransposeMatch, prf->Alphabet_Length, prf->Length, PFSeq);                            
  }

  /* close sequence file */
#ifndef __USE_MMAP__
  fclose(inSequence);
#endif
  
  /* Free Memory */
  free(SeqData.Data.Memory);
  _mm_free(Work);

  pthread_exit(0);
};


static void *thread_heuristic_sse2(void * _Data) 
{
  Sequence SeqData;
  const struct Profile * const restrict prf   = ((struct ThreadData*) _Data)->prf;
  const FASTAStructure * const restrict FASTA = ((struct ThreadData*) _Data)->FASTA;
  const float * const restrict TransposeMatch = ((struct ThreadData*) _Data)->TransposeMatch.f;
  unsigned int * const restrict Scores        = &(((struct ThreadData*) _Data)->Array->UnsignedScores);
  PFSequence * PFSeq;
  
  /* Allocate memory to hold sequence */
  SeqData.Data.Memory = (void*) malloc(FASTA->MaxSequenceSize*sizeof(unsigned char));
  if (SeqData.Data.Memory == NULL) {
    fputs("Thread Cannot allocate memory for sequence.\n", stderr);
    return (void*) 1;
  }
  /* Allocate work aligned memory for xali1 */
  int * Work = (int*) _mm_malloc((1+prf->Length)*4*sizeof(int)+63,64);
  if (Work == NULL) return (void*) 1;

  /* Open sequence file*/
#ifndef __USE_MMAP__
  FILE* inSequence = fopen(((struct ThreadData*) _Data)->SequenceFileName, "r");
#else
  const char * const restrict SequenceFileMap = ((struct ThreadData*) _Data)->SequenceFileMap;
#endif
  
  size_t Start              = ((struct ThreadData*) _Data)->start;
  size_t Stop               = ((struct ThreadData*) _Data)->stop;
  //const unsigned int CutOff = prf->HeuristicCutOff;

  //fprintf(stderr,"Thread %lu - %lu\n", Start, Stop);
  /* LOOPS ON SEQUENCES */
   for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
    PFSeq = ReadSequenceIndex(&SeqData, i, inSequence, FASTA->DataPtr);
#else
    PFSeq = MMAPReadSequenceIndex(&SeqData, i, SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
    , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
    );
#endif
    /* Translate first sequence */
    PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);

    Scores[i] = TransposeHeuristic_sse2(TransposeMatch, prf->Alphabet_Length, prf->Length, PFSeq);                            
  }

  /* close sequence file */
#ifndef __USE_MMAP__
  fclose(inSequence);
#endif
  
  /* Free Memory */
  free(SeqData.Data.Memory);
  _mm_free(Work);
 
  pthread_exit(0);
};

static void *thread_xali1( void * _Data)
{
  Sequence SeqData;
  const struct Profile * const restrict prf   = ((struct ThreadData*) _Data)->prf;
  const FASTAStructure * const restrict FASTA = ((struct ThreadData*) _Data)->FASTA;
  const unsigned int * const restrict SeqID   = &(((struct ThreadData*) _Data)->Array[0].ToDoID);
  int * const restrict Scores                 = ((struct ThreadData*) _Data)->FilterScores;
  PFSequence * PFSeq;

  /* Allocate memory to hold sequence */
  SeqData.Data.Memory = malloc(FASTA->MaxSequenceSize*sizeof(unsigned char));
  if (SeqData.Data.Memory == NULL) {
    fputs("Thread Cannot allocate memory for sequence.\n", stderr);
    return (void*) 1;
  }
  /* Allocate work aligned memory for xali1 */
  int * Work = _mm_malloc((1+prf->Length)*4*sizeof(int)+63,64);
  if (Work == NULL) return (void*) 1;
  
  /* Open sequence file*/
#ifndef __USE_MMAP__
  FILE* inSequence = fopen(((struct ThreadData*) _Data)->SequenceFileName, "r");
#else
  const char * const restrict SequenceFileMap = ((struct ThreadData*) _Data)->SequenceFileMap;
#endif
  
  size_t Start  = ((struct ThreadData*) _Data)->start;
  size_t Stop   = ((struct ThreadData*) _Data)->stop;
  
  const _Bool RealFilterScore = (((struct ThreadData*) _Data)->counter > 0 ) ? true : false;

  /* Do we need to compute the cutoff */
  if (NormalizedToRawFunction == &N2R_3) {   
    /* LOOPS ON SEQUENCES */
    for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
      PFSeq = ReadSequenceIndex(&SeqData, SeqID[i], inSequence, FASTA->DataPtr);
#else
      PFSequence * PFSeq = MMAPReadSequenceIndex(&SeqData, SeqID[i], SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
      , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
      );
#endif
      /* Translate first sequence */
      PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);
      const float RAVE = ComputeAverageFrequencies(PFSeq, Average);
      const int CutOff = NormalizedToRawFunction(prf->CutOffData.Values[prf->Level].RCUT[prf->Mode], PFSeq->Length, RAVE);
      Scores[i] = xali1_ptr(prf, PFSeq->ProfileIndex, Work, 0, PFSeq->Length, CutOff, RealFilterScore);
    }
  } 
  else {
    const int CutOff = prf->CutOffData.Values[prf->Level].ICUT;
    /* LOOPS ON SEQUENCES */
    for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
      PFSeq = ReadSequenceIndex(&SeqData, SeqID[i], inSequence, FASTA->DataPtr);
#else
      PFSequence * PFSeq = MMAPReadSequenceIndex(&SeqData, SeqID[i], SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
      , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
      );
#endif
      /* Translate first sequence */
      PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);

      Scores[i] = xali1_ptr(prf, PFSeq->ProfileIndex, Work, 0, PFSeq->Length, CutOff, RealFilterScore);
    }
  } 
  /* close sequence file */
#ifndef __USE_MMAP__
  fclose(inSequence);
#endif
  
  /* Free Memory */
  free(SeqData.Data.Memory);
  _mm_free(Work);
  
  pthread_exit(0);;
}

static void *thread_xaliPT( void * _Data)
{ 
  Sequence SeqData;
  const struct Profile * const restrict prf   = ((struct ThreadData*) _Data)->prf;
  const FASTAStructure * const restrict FASTA = ((struct ThreadData*) _Data)->FASTA; 
  const unsigned int * const restrict SeqID   = &(((struct ThreadData*) _Data)->Array->ToDoID);
  char * restrict Sequences                   = ((struct ThreadData*) _Data)->Sequences;
  PFSequence * PFSeq;

  /* Allocate memory to hold sequence */
  SeqData.Data.Memory = (void*) malloc(FASTA->MaxSequenceSize*sizeof(unsigned char));
  if (SeqData.Data.Memory == NULL) {
    fputs("Thread cannot allocate memory for sequence.\n", stderr);
    return (void*) 1;
  }
  /* Allocate work aligned memory for xali1 */

  union lScores * const restrict iop   = _mm_malloc((1+prf->Length)*sizeof(union lScores), 16);
  union Positions * const restrict iom = _mm_malloc((1+prf->Length)*sizeof(union Positions), 16);
  union Positions * const restrict ioi = _mm_malloc((1+prf->Length)*sizeof(union Positions), 16);
  struct Alignment * const restrict alignment = _mm_malloc(NALI*sizeof(struct Alignment),16);
  _Bool * const restrict Lock = _mm_malloc(FASTA->MaxSequenceSize*sizeof(_Bool), 16);
  if ( iop == NULL || iom == NULL || ioi == NULL || alignment == NULL || Lock == NULL) return (void*) 1;

  /* Open sequence file */
#ifndef __USE_MMAP__
  FILE* inSequence = fopen(((struct ThreadData*) _Data)->SequenceFileName, "r");
#else
  const char * const restrict SequenceFileMap = ((struct ThreadData*) _Data)->SequenceFileMap;
#endif

  size_t Start = ((struct ThreadData*) _Data)->start;
  size_t Stop  = ((struct ThreadData*) _Data)->stop;

  unsigned int AlignedSeqCounter = 0;
  // Allocate on the stack for maximum NALI alignment
  char ** const AlignedSequences = (char **) alloca((NALI+1)*sizeof(char *));
  
  if (NormalizedToRawFunction != &N2R_3) {   
    register const int CutOff = prf->CutOffData.Values[prf->Level].ICUT;
  
    /* LOOPS ON SEQUENCES */
    for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
      PFSeq = ReadSequenceIndex(&SeqData, (size_t) SeqID[i], inSequence, FASTA->DataPtr);
#else
      PFSeq = MMAPReadSequenceIndex(&SeqData, (size_t) SeqID[i], SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
      , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
      );
#endif
      /* Translate first sequence */
      PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);

      /* Clear Lock */
      memset(Lock, 0, FASTA->MaxSequenceSize*sizeof(_Bool));
      
      // It seems we must have sequence starting from 1 here
      const int nali = xalip_ptr(prf, PFSeq->ProfileIndex, iop, iom, ioi, 1, PFSeq->Length, alignment,
			         Lock, prf->DisjointData.NDIP[0], prf->DisjointData.NDIP[1], false, 
			         CutOff, NALI); 
			    
      if (nali <= 0) {
	fprintf(stderr,"Thread %lu : Internal error xalip reported no possible alignment for sequence %lu(%u) (nali=%i)!\n%s\n",
		((struct ThreadData*) _Data)->threadId, i, SeqID[i], nali, SeqData.Data.Header);
	exit(1);          
      }
      
      // Clear memory to hold sequences, NOT NEEDED in fact
      //memset(&Sequences, 0, nali*3*(1+prf->Length)*sizeof(char));
      
      // Alignement is not filled from start !!!
      for ( int j=1; j<=nali; j++) {
      
	/* Remove lock for aligned sequence generation */
	memset(Lock, 0, FASTA->MaxSequenceSize*sizeof(_Bool));
	
	if (xalit_ptr(prf, prf->DisjointData.NDIP[0], prf->DisjointData.NDIP[1], 1, PFSeq->Length, &(PFSeq->ProfileIndex[0]),
		      &Sequences[j*(prf->Length+1)*3], iop, &alignment[j], Lock) < 0 ) {
	  fputs("Internal error within xalit!\n", stderr);
	  exit(1);
	}
	AlignedSequences[j-1] = &Sequences[j*(prf->Length+1)*3 + 1];
	++AlignedSeqCounter;
      }
      pthread_mutex_lock(&PrintLock);
      PrintFunction(prf, (const char ** const) AlignedSequences, &alignment[1], SeqData.Data.Header, PFSeq->Length, 0.0f, nali);
      pthread_mutex_unlock(&PrintLock);
    }
  } 
  else {
    /* LOOPS ON SEQUENCES */
    for (size_t i=Start; i<Stop; ++i) {
#ifndef __USE_MMAP__
      PFSeq = ReadSequenceIndex(&SeqData, (size_t) SeqID[i], inSequence, FASTA->DataPtr);
#else
      PFSeq = MMAPReadSequenceIndex(&SeqData, (size_t) SeqID[i], SequenceFileMap, FASTA->DataPtr, 0
#ifdef MMAP_DEBUG
      , ((struct ThreadData*) _Data)->threadId, 0, *(((struct ThreadData*) _Data)->maplength)
#endif
      );
#endif
      /* Translate first sequence */
      PFSeq = TranslateSequenceToIndex(PFSeq, prf->Alphabet_Mapping);
      
      const float RAVE = ComputeAverageFrequencies(PFSeq, Average);
      const int CutOff = NormalizedToRawFunction(prf->CutOffData.Values[prf->Level].RCUT[prf->Mode], PFSeq->Length, RAVE);

      /* Clear Lock */
      memset(Lock, 0, FASTA->MaxSequenceSize*sizeof(_Bool));
      
      // It seems we must have sequence starting from 1 here
      const int nali = xalip_ptr(prf, PFSeq->ProfileIndex, iop, iom, ioi, 1, PFSeq->Length, alignment,
			    Lock, prf->DisjointData.NDIP[0], prf->DisjointData.NDIP[1], false, 
			    CutOff, NALI); 
			    
      if (nali <= 0) {
	fprintf(stderr,"Thread %lu : Internal error xalip reported no possible alignment for sequence %lu(%u) (nali=%i)!\n%s\n",
		((struct ThreadData*) _Data)->threadId, i, SeqID[i], nali, SeqData.Data.Header);
	exit(1);          
      }

      // Clear memory to hold sequences, NOT NEEDED in fact
      //memset(&Sequences, 0, nali*3*(1+prf->Length)*sizeof(char));

      // Alignement is not filled from start !!!
      for ( int j=1; j<=nali; j++) {
      
	/* Remove lock for aligned sequence generation */
	memset(Lock, 0, FASTA->MaxSequenceSize*sizeof(_Bool));
	
  //        fprintf(stdout,"%s\n", SeqData.Data.Header); fflush(stdout);
	if (xalit_ptr(prf, prf->DisjointData.NDIP[0], prf->DisjointData.NDIP[1], 1, PFSeq->Length, &(PFSeq->ProfileIndex[0]),
		      &Sequences[j*(prf->Length+1)*3], iop, &alignment[j], Lock) < 0 ) {
	  fputs("Internal error within xalit!\n", stderr);
	  exit(1);
	}
	AlignedSequences[j-1] = &Sequences[j*(prf->Length+1)*3 + 1];
	++AlignedSeqCounter;
      }
      pthread_mutex_lock(&PrintLock);
      PrintFunction(prf, (const char ** const) AlignedSequences, &alignment[1], SeqData.Data.Header, PFSeq->Length, RAVE, nali);
      pthread_mutex_unlock(&PrintLock);
    }
  }

  /* Set the number of aligned sequences */
  ((struct ThreadData*) _Data)->counter = AlignedSeqCounter; 

  /* close sequence file */
#ifndef __USE_MMAP__
  fclose(inSequence);
#endif
  /* Free Memory */
  free(SeqData.Data.Memory);
  _mm_free(iop);
  _mm_free(iom);
  _mm_free(ioi);
  _mm_free(alignment);
  _mm_free(Lock); 

  pthread_exit(0);;
}
