#define READ_BUFFER_INC 10000	/* Increment size for the length of the buffer into which
				   the sequences are read */
#define N_SEQS_INC 100		/* Incremental number of sequences for which memory is
				   allocated */
#define MAX_HEADER_LENGTH 300	/* How much of the header will be kept  */
#define MAX_STRIP_LENGTH 100	/* Exceed this limit and this potential bug would be the 
				   least of your worries...  */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "my_types.h"
#include "data_interface.h"
#include "misc_functions.h"
#include "rand.h"


static int move_to_next_seq_start(FILE *fp, cSeqStruct *p_c_seq);
static int read_seq_into_buffer(FILE *fp, cSeqStruct *p_c_seq);
static void print_char_or_space(char c, int i, int lb, int up);
static int char_compare(const char *i, const char *j);
static int retrieveSeqChar(gappedSeq seq, int pos);
static void retrieveGapSetFromMotifConfig(const motif_Struct motif, const int motifConfig, int gaps[]);

static unsigned int DATA_SET_ID=0; // a global data set identifier used to associate other data with the set

extern const iLetter GAP_CHAR;

cSeqVec read_seqs(char *input_file_name)	{
  FILE *fp;
  cSeqVec c_seqs;
  int n_seqs=0, n_max_seqs=0;

  if ( (fp = fopen(input_file_name, "r")) == NULL )
  {
    printf( "File %s does not exist!!!\n", input_file_name);
    exit(-1);
  }
  
  c_seqs.entry = NULL;
  while (!feof(fp)) {
    if (n_max_seqs <= n_seqs)
      assert( c_seqs.entry = realloc(c_seqs.entry, (n_max_seqs+=N_SEQS_INC)*sizeof(cSeqStruct)) );
    if (move_to_next_seq_start(fp, &(c_seqs.entry[n_seqs])) == 0)
      break;
    if (read_seq_into_buffer(fp, &(c_seqs.entry[n_seqs])) == 0) {
      printf( "Sequence %d has 0 length!!!\n", n_seqs+1);
	  continue;
    }
   // assert( c_seqs.entry[n_seqs].body.entry = realloc(c_seqs.entry[n_seqs].body.entry,
					      //c_seqs.entry[n_seqs].body.len * sizeof(char)) );
     n_seqs++;
  }
  fclose(fp);
  c_seqs.len = n_seqs;
  return c_seqs;
}


    
int move_to_next_seq_start(FILE *fp, cSeqStruct *p_c_seq)
{
  int i, h_len=0;
  char c;

  assert (p_c_seq->header = malloc(MAX_HEADER_LENGTH * sizeof(char)) );
  while (!feof(fp)  &&  ((c=getc(fp)) != '>'))
    ;
  if (feof(fp))
    return 0;

  p_c_seq->header[h_len++] = '>';
  while (!feof(fp)  &&  ((c = getc(fp)) != '\n'))
    if (h_len < MAX_HEADER_LENGTH-1)
      p_c_seq->header[h_len++] = c;

  for (i = h_len; i < MAX_HEADER_LENGTH; i++)
    p_c_seq->header[i] = ' ';
  return (p_c_seq->header_len = h_len);
}
    
int read_seq_into_buffer(FILE *fp, cSeqStruct *p_c_seq)
/*
  Any !isspace character is allowed to pass through
*/
{
  int max_read_buffer=-1, seqlen=0;
  char c;

  p_c_seq->body.entry = NULL;
  while ( ((c=getc(fp)) != '>') && !feof(fp) ) {
    if (!isspace(c)) {
      if (seqlen > max_read_buffer)
        assert( p_c_seq->body.entry = realloc(p_c_seq->body.entry,
					(max_read_buffer+=READ_BUFFER_INC) * sizeof(char)) );
       p_c_seq->body.entry[seqlen++] = c;
    }
  }
  if (c == '>')
    ungetc(c, fp);
  return (p_c_seq->body.len = seqlen);
}


iSeqVec filter_sequences(cSeqVec c_seqs, char i_strip[], int report_gaps)
/*
  Filtering so that only the letters in i_strip survive. If report_gaps is true, then
  gaps[seq][j] will denote the gap between fseqs[seq][j] and fseqs[seq][j+1] in the
  original sequence.
*/
{
  int seq, i, j, last_i, lstrip, iLet;
  char c;
  iSeqVec f_seqs;
  iSeqStruct *p_i_seq;
  cSeqStruct *p_c_seq;
  unsigned char *dic;
  char *strip;
	 
  assert( dic = calloc( 256, sizeof(unsigned char)) );	/* translating dictionary */
  lstrip = strlen(i_strip);
  assert( strip = malloc((lstrip+1) * sizeof(char)) );
  strcpy(strip, i_strip);
  for (i = 0; i < lstrip; i++)
    strip[i] = tolower(strip[i]);
  qsort(strip, (size_t) lstrip, sizeof (char), (void *) char_compare );
  for (i = 0; i < lstrip; i++)  /* reserving 0 for non-translation and assuming lstrip < 256 */
    dic[(unsigned char) strip[i]] = i + 1;
  f_seqs.len = c_seqs.len;
  f_seqs.strip = strip;
  f_seqs.report_gaps = report_gaps;
  
  assert( f_seqs.entry = calloc(c_seqs.len, sizeof(iSeqStruct)) );
   if (report_gaps) {
	   for(i=0;i<c_seqs.len;i++){
		f_seqs.entry[i].num_gaps = 0;
		assert( f_seqs.entry[i].gap_len = (unsigned short *)calloc(c_seqs.entry[i].body.len, sizeof(unsigned short)) );
	   }
  }

  for (seq = 0; seq < c_seqs.len; seq++) {
    p_c_seq = c_seqs.entry+seq;	/* aesthetics */
    p_i_seq = f_seqs.entry+seq;	/* same */
			/* allocate and copy the header */
    assert( p_i_seq->header = malloc(MAX_HEADER_LENGTH * sizeof(char)) );
    for (i = 0; i < MAX_HEADER_LENGTH; i++)
      p_i_seq->header[i] = p_c_seq->header[i];
    p_i_seq->header_len = p_c_seq->header_len;
			/* allocate and "translate" body minding gaps if so requested */
    p_i_seq->body = alloc_iLetterVec(p_c_seq->body.len);
    last_i = -1;
    for (i = 0, j=0; i < p_c_seq->body.len; i++) {
      c = tolower(p_c_seq->body.entry[i]);
      if ((iLet = dic[(unsigned char) c]) > 0) {
        p_i_seq->body.entry[j++] = iLet-1;
        if (report_gaps) {
	  if (i-last_i-1) {
	    p_i_seq->num_gaps++;
	    p_i_seq->gap_len[j - 1] = i-last_i-1;
	  }
          last_i = i;
	}
      }
    }
    realloc_iLetterVec(&(p_i_seq->body), j);
  }
  return f_seqs;
}


double *nucl_frequency(iSeqVec seqs, int please_print, FILE *outfile)
/*
  Computing the frequency of each of the characters in the strip.
*/
{
  int seq, i, lstrip, total=0;
  double *freqs;

  lstrip = strlen(seqs.strip);
  assert( freqs = calloc(lstrip, sizeof(double)) );

  for (seq = 0; seq < seqs.len; seq++) {
    total += seqs.entry[seq].body.len;
    for (i = 0; i < seqs.entry[seq].body.len; i++)
      freqs[seqs.entry[seq].body.entry[i]]++;
  }
  
  if (please_print) {
    fprintf(outfile, "\nThere are %d nucl in %d sequences, distributed as follows:\n", total,
             seqs.len);
    for (i = 0; i < lstrip; i++) {
      freqs[i] /= total;
      fprintf(outfile, "%c - %3.2f , ", seqs.strip[i], freqs[i]);
    }
    fprintf(outfile, "\n\n");
  }
  return freqs;
}


void print_a_line(iSeqStruct i_seq, char *strip, int at, int len, int padding, int header)
     /*
       prints the "len" characters of the sequence "i_seq" starting at position "at"
       (translated accroding to "strip") and pads it with padding characters before
       and after - useful for printing alignments.
       Note that no "\n" are used.
       If "header" >0 then the first "header" characters of the header are printed first.
       IMPORTANT: gaps are completely ignored though they really shouldn't be
     */
{
  int i;

  if (header > 0) {
    for (i = 0; i < MIN2(header,MAX_HEADER_LENGTH); i++)
      printf("%c", i_seq.header[i]);
    printf(":%4d)", at+1);
  }
  for (i = at-padding; i < at; i++)
    print_char_or_space(strip[i_seq.body.entry[i]], i, 0, i_seq.body.len-1);
  printf("   ");

  for (i = at; i < at+len; i++)
    print_char_or_space(strip[i_seq.body.entry[i]], i, 0, i_seq.body.len-1);
  printf("   ");

  for (i = at+len; i < at+len+padding; i++)
    print_char_or_space(strip[i_seq.body.entry[i]], i, 0, i_seq.body.len-1);

  return;
}

void print_char_or_space(char c, int i, int lb, int up)
     /*
       print "c" if lb <= c <= up, otherwise prints " "
     */
{
    if ((i >= lb) && (i <= up))
      printf("%c", c);
    else
      printf(" ");
    return;
}

int char_compare(const char *i, const char *j)
{
	return ((*i) - (*j));
}



/*
 * Function chunk_decomp decomposes a sequence into sequence chunks w.r.t. gaps.
 */
gappedSeqVec chunk_decomp(iSeqVec i_seqs, FILE * outfile)
{
	int i, j;
	gappedSeqVec seqs;

	seqs = alloc_gappedSeqVec(i_seqs.len);
	for(i = 0; i < seqs.len; i++)
	{
		seqs.entry[i].header_len=i_seqs.entry[i].header_len;
		assert(seqs.entry[i].header=(char *)malloc(seqs.entry[i].header_len*sizeof(char)));
		for(j=0;j<seqs.entry[i].header_len;j++)
			seqs.entry[i].header[j]=i_seqs.entry[i].header[j];
		
		if(i_seqs.report_gaps == 0) {//gaps are omitted
			seqs.entry[i].numChunks = 1;
			seqs.entry[i].body = alloc_iLetterVec(i_seqs.entry[i].body.len);
			for(j = 0; j < i_seqs.entry[i].body.len; j++)
				seqs.entry[i].body.entry[j] = i_seqs.entry[i].body.entry[j];
			seqs.entry[i].chunkOffsets = calloc(1, sizeof (int));
			seqs.entry[i].chunkLens = calloc(1, sizeof (int));
			seqs.entry[i].chunkOffsets[0] = 0;
			seqs.entry[i].chunkLens[0] = seqs.entry[i].body.len;
		}
		else	{
			seqs.entry[i].numChunks = i_seqs.entry[i].num_gaps + 1;
			int numChunks = seqs.entry[i].numChunks;
			seqs.entry[i].chunkLens = calloc(numChunks, sizeof (int));
			seqs.entry[i].chunkOffsets = calloc(numChunks, sizeof (int));
			
			int totalLength = i_seqs.entry[i].body.len;
			assert(i_seqs.entry[i].gap_len[0] == 0);
			for (j = 1; j < i_seqs.entry[i].body.len; j++)	{
				totalLength += i_seqs.entry[i].gap_len[j];
			}
			seqs.entry[i].body = alloc_iLetterVec(totalLength);
			memset(seqs.entry[i].body.entry, GAP_CHAR, totalLength * sizeof(iLetter));

//			printf("totalLength = %d\n", totalLength);

			//assign the entries
			int pos = 0, l = 0, chunkIdx = 0;
			for (chunkIdx = 0; chunkIdx < numChunks; chunkIdx++)	{			
				seqs.entry[i].chunkOffsets[chunkIdx] = pos;
				do {
					seqs.entry[i].body.entry[pos++] = i_seqs.entry[i].body.entry[l++];
				} while ((l < i_seqs.entry[i].body.len) && (i_seqs.entry[i].gap_len[l]==0));
				seqs.entry[i].chunkLens[chunkIdx] = pos - seqs.entry[i].chunkOffsets[chunkIdx];
				if (l < i_seqs.entry[i].body.len)	{
//					printf("hit = %d, adding[%d] = %d\n", i_seqs.entry[i].body.len, l, i_seqs.entry[i].gap_len[l]);
					pos += i_seqs.entry[i].gap_len[l];
				}
			}			
//			printf("pos = %d\n", pos);
			
			assert(pos == totalLength);
			
		}
	}
	seqs.strip = allocAndStrCpy(i_seqs.strip);
	return(seqs);
}


gappedSeqVec getChunkDS(char *fileName, char *strip, FILE *outfile)
     // Returns a gappedSeqVec filtered out of the file
{
  cSeqVec cSV;
  iSeqVec iSV;
  gappedSeqVec ds;

  cSV = read_seqs(fileName);
  iSV = filter_sequences(cSV, strip, TRUE);
  if (outfile != NULL) {
    fprintf(outfile, "\nIn File %s:", fileName);
    nucl_frequency(iSV, 1, outfile);
  }
  ds = chunk_decomp(iSV, NULL);
  free_c_seqs(cSV);
  free_i_seqs(iSV);
  return ds;
}


iLetterVec readFile2iLetterVec(char *fileName, char *strip, FILE *outfile)
     // Reads the input set from fileName and returns it as a iLetterVec
{
  cSeqVec cSV;
  iSeqVec iSet;
  int iSeq, N=0, j;
  iLetterVec iLV;

  cSV = read_seqs(fileName);
  iSet = filter_sequences(cSV, strip, 0); // filter training set (0 = ignore gaps)
  for (iSeq = 0; iSeq < iSet.len; iSeq++)
    N += iSet.entry[iSeq].body.len;
  iLV = alloc_iLetterVec(N);
  N = 0;
  for (iSeq = 0; iSeq < iSet.len; iSeq++)
    for (j = 0; j < iSet.entry[iSeq].body.len; j++)
      iLV.entry[N++] = iSet.entry[iSeq].body.entry[j];

  if (outfile != NULL) {
    fprintf(outfile, "\nIn Training file %s:", fileName);
    nucl_frequency(iSet, 1, outfile);
  }

  free_c_seqs(cSV);
  free_i_seqs(iSet);

  return iLV;
}

/*
unsigned int chunkSeqVecTotalLen(chunksSeqVec set)
     // Returns the total number of non-gap residues in set
{
  int iSeq;
  unsigned int tot=0;

  for (iSeq = 0; iSeq < set.len; iSeq++)
    tot += set.entry[iSeq].total_len;
  return tot;
}


chunksSeqVec iLetterVec2chunksSeqVec(iLetterVec block, chunksSeqVec moldSet)
     // Massage block into a chunksSeqVec in the mold of moldSet
{
  int iSeq, iChunk, n=0, i;
  chunksSeqVec set;
  chunksSeq *seq;
  chunkStruct *chunk;

  if (block.len != chunkSeqVecTotalLen(moldSet))
    errorMessage("In iLetterVec2chunksSeqVec: the lengths of the inputs differ");
  set = alloc_chunksSeqVec(moldSet.len);
  set.strip = allocAndStrCpy(moldSet.strip);
  for (iSeq = 0; iSeq < set.len; iSeq++) {
    seq = set.entry+iSeq;       // readability
    *seq = moldSet.entry[iSeq]; // just being lazy
    seq->header = allocAndStrCpy(moldSet.entry[iSeq].header);
    assert( seq->chunks = (void *) malloc(seq->num_chunks * sizeof(chunkStruct)) );
    for (iChunk = 0; iChunk < seq->num_chunks; iChunk++) {
      chunk = seq->chunks + iChunk;
      *chunk = moldSet.entry[iSeq].chunks[iChunk];
      assert( chunk->body.entry = (void *) malloc(chunk->body.len * sizeof(iLetter)) );
      for (i = 0; i < chunk->body.len; i++)
	chunk->body.entry[i] = block.entry[n++];
    }
  }
  return set;
}
*/

//returns the total length of non-gap residues in the set
unsigned int gappedSeqVecTotalLen(gappedSeqVec set)	{
	unsigned int tot = 0;
	int i, j;
	for (i = 0; i < set.len; i++)	{
		for (j = 0; j < set.entry[i].numChunks; j++)	{
			tot += set.entry[i].chunkLens[j];
		}
	}
	return tot;
}

gappedSeqVec iLetterVec2gappedSeqVec(iLetterVec block, gappedSeqVec moldSet)	{
	// Massage block into a gappedSeqVec in the mold of moldSet
	if (block.len != gappedSeqVecTotalLen(moldSet))
		errorMessage("In iLetterVec2gappedSeqVec: the lengths of the inputs differ");
	int n = 0;
	gappedSeqVec set;
	
	set = alloc_gappedSeqVec(moldSet.len);
	set.strip = allocAndStrCpy(moldSet.strip);
	int iSeq;
	for (iSeq = 0; iSeq < set.len; iSeq++)	{
		gappedSeq* seq = set.entry + iSeq;
		seq->header = allocAndStrCpy(moldSet.entry[iSeq].header);
		
		seq->numChunks = moldSet.entry[iSeq].numChunks;
		seq->chunkLens = calloc(moldSet.entry[iSeq].numChunks, sizeof(int));
		assert(seq->chunkLens != NULL);
		memcpy(seq->chunkLens, moldSet.entry[iSeq].chunkLens, moldSet.entry[iSeq].numChunks * sizeof(int));
		
		seq->chunkOffsets = calloc(moldSet.entry[iSeq].numChunks, sizeof(int));
		assert(seq->chunkOffsets != NULL);
		memcpy(seq->chunkOffsets, moldSet.entry[iSeq].chunkOffsets, moldSet.entry[iSeq].numChunks * sizeof(int));
		
		seq->body = alloc_iLetterVec(moldSet.entry[iSeq].body.len);
		memset(seq->body.entry, GAP_CHAR, moldSet.entry[iSeq].body.len * sizeof(iLetter));
		
		int chunkIdx;
		for (chunkIdx = 0; chunkIdx < seq->numChunks; chunkIdx++)	{
			int startPos = seq->chunkOffsets[chunkIdx], pos;
			int absPos = startPos;
			for (pos = 0; pos < seq->chunkLens[chunkIdx]; pos++)	{
				seq->body.entry[absPos++] = block.entry[n++];
			}
		}
	}
	return set;
}

/* Added by Anand */
iLetterVec gappedSeq2iLetterVec(gappedSeq seq)
     // Returns an allocated iLetterVec with all the sequence letters
{
	int i;
	iLetterVec seqV;
 	seqV = alloc_iLetterVec(seq.body.len);
	for (i = 0; i < seq.body.len; i++)	{
		seqV.entry[i] = seq.body.entry[i];
	}
	return seqV;
}

/* Added by Anand */
void iLetterVec2gappedSeq(iLetterVec block, gappedSeq *cSeq)
     // Uses block to fill the chunks of cSeq
{
	int i;
	if (block.len != cSeq->body.len)
		errorMessage("In iLetterVec2gappedSeq: the lengths of the inputs differ");
	for (i = 0; i < cSeq->body.len; i++)
		cSeq->body.entry[i] = block.entry[i];
}

/* Added by Anand */
gappedSeqVec alloc_gappedSeqVec(int len)
     // Returns an allocated chunksSeqVec
{
  gappedSeqVec gsVec;

  assert( gsVec.entry = (gappedSeq *) calloc(len, sizeof(gappedSeq)) );
  gsVec.len = len;
  gsVec.id = DATA_SET_ID++;

  return gsVec;
}

/*
iLetterVec chunkSeq2iLetterVec(chunksSeq cSeq)
     // Returns an allocated iLetterVec with all the sequence letters
{
  iLetterVec seqV;
  int iChunk, n=0, i;

  seqV = alloc_iLetterVec(cSeq.total_len);
  for (iChunk = 0; iChunk < cSeq.num_chunks; iChunk++)
    for (i = 0; i < cSeq.chunks[iChunk].body.len; i++)
      seqV.entry[n++] = cSeq.chunks[iChunk].body.entry[i];

  return seqV;
}


void iLetterVec2chunks(iLetterVec block, chunksSeq *cSeq)
     // Uses block to fill the chunks of cSeq
{
  int iChunk, n=0, i;

  if (block.len != cSeq->total_len)
    errorMessage("In iLetterVec2chunks: the lengths of the inputs differ");
  for (iChunk = 0; iChunk < cSeq->num_chunks; iChunk++)
    for (i = 0; i < cSeq->chunks[iChunk].body.len; i++)
      cSeq->chunks[iChunk].body.entry[i] = block.entry[n++];
}
*/
/*
chunksSeqVec alloc_chunksSeqVec(int len)
     // Returns an allocated chunksSeqVec
{
  chunksSeqVec csVec;

  assert( csVec.entry = (chunksSeq *) calloc(len, sizeof(chunksSeq)) );
  csVec.len = len;
  csVec.id = DATA_SET_ID++;

  return csVec;
}
*/

iLetterVec alloc_iLetterVec(int len)
     // Returns an allocate iLetterVec
{
  iLetterVec vec;

  assert( vec.entry = (void *) calloc(len, sizeof(iLetter)) );
  vec.len = len;
  return vec;
}


void realloc_iLetterVec(iLetterVec *vec, int len)
     // Reallocates an iLetterVec
{
  vec->entry = (void *) my_realloc(vec->entry, len*sizeof(iLetter));
  vec->len = len;
}


void free_c_seqs(cSeqVec c_seqs)
{
  int i;

  for (i = 0; i < c_seqs.len; i++) {
    free(c_seqs.entry[i].body.entry);
    free(c_seqs.entry[i].header);
  }
  free(c_seqs.entry);
}


void free_i_seqs(iSeqVec i_seqs)
{
	int i;
	for(i = 0; i < i_seqs.len;i++){
		if(i_seqs.entry[i].header != 0)
			free(i_seqs.entry[i].header);
		if(i_seqs.entry[i].body.entry != 0)
			free(i_seqs.entry[i].body.entry);
		if(i_seqs.entry[i].gap_len != 0)
			free(i_seqs.entry[i].gap_len);
	}
	free(i_seqs.entry);
}


/* Added by Anand */
void free_gappedSeqVec(gappedSeqVec seqs)	{
	int i;
	for (i = 0; i < seqs.len; i++)	{
		free_gappedSeq(seqs.entry[i]);
	}
	free(seqs.entry);
	free(seqs.strip);
}

/* Added by Anand */
void free_gappedSeq(gappedSeq seq)	{
	FREE_ATOMS_VEC(seq.body);
	free(seq.chunkLens);
	free(seq.chunkOffsets);
	free(seq.header);
}

/*
void free_chunksSeqVec(chunksSeqVec seqs)	{
	int i;
	
	for(i = 0; i < seqs.len; i++){
		free_chunksSeq(seqs.entry[i]);
	}
	free(seqs.entry);
	free(seqs.strip);
}


void free_chunksSeq(chunksSeq seq)	{
	int i;

	for(i = 0; i < seq.num_chunks; i++)
	  FREE_ATOMS_VEC(seq.chunks[i].body);
	free(seq.header);
	free(seq.chunks);
}
*/

void permuteChunk(iLetterVec chunk, int *perm)
     //  Permutes chunk in place
{

  int i, len = chunk.len;
  iLetterVec temp;
  
  temp = alloc_iLetterVec(len);
  for(i=0; i< len; i++)
    temp.entry[i] = chunk.entry[i];

  for(i=0; i< len; i++)
    chunk.entry[i] = temp.entry[perm[i]-1]; // perm has values from 1 to N, not 0 to N-1

  free(temp.entry);
}

void permuteAllSeqs(gappedSeqVec set)
     // Permute all the sequences in the set: 
     //  each sequence is permuted separately and chunk boundaries are ignored
{
  int iSeq, *perm=NULL, maxPerm = 0, seqLen;
  iLetterVec seqV;
  gappedSeq cSeq;

  for (iSeq = 0; iSeq < set.len; iSeq++) {
    cSeq = set.entry[iSeq];
    seqLen = cSeq.body.len;
    if (seqLen > maxPerm) {
      maxPerm = seqLen;
      assert( perm = realloc(perm, maxPerm*sizeof(int)) );
    }
    rand_perm_prealloc(perm, seqLen);
    seqV = gappedSeq2iLetterVec(cSeq);
    permuteChunk(seqV, perm);
    iLetterVec2gappedSeq(seqV, &cSeq);
    FREE_ATOMS_VEC(seqV);
  }
  free(perm);
}
/*
void permuteAllSeqs(chunksSeqVec set)
     // Permute all the sequences in the set: 
     //  each sequence is permuted separately and chunk boundaries are ignored
{
  int iSeq, *perm=NULL, maxPerm = 0, seqLen;
  iLetterVec seqV;
  chunksSeq cSeq;

  for (iSeq = 0; iSeq < set.len; iSeq++) {
    cSeq = set.entry[iSeq];
    seqLen = cSeq.total_len;
    if (seqLen > maxPerm) {
      maxPerm = seqLen;
      assert( perm = realloc(perm, maxPerm*sizeof(int)) );
    }
    rand_perm_prealloc(perm, seqLen);
    seqV = chunkSeq2iLetterVec(cSeq);
    permuteChunk(seqV, perm);
    iLetterVec2chunks(seqV, &cSeq);
    free(seqV.entry);
  }
  free(perm);
}
*/


#define NUM_FLANK_CHARS 15  // number of characters to be printed on each side of the site

void printSite(gappedSeqVec *set, int siteSeqID, int chunkID, int leftChunkOffset, Boolean siteRev, double siteScore, motif_Struct motif, int motifConfig, int motifSpan, FILE *output)
     /*
       Print the given site to the given output file using the format:
          NUM_FLANK_CHARS site NUM_FLANK_CHARS
       Sites that are RC are printed that way
       Note that the function assume the strip is "ACGT"!
     */
{
	int pos, i, iBuf = 0, *buffer, *tmpBuf;
	
	char *printChar=" -acgt";
	char *printHitChar = " -ACGT";

	if (strcmp(printChar+2, set->strip) != 0)
	  errorMessage("The set strip is not 'acgt' as I expect");

	int bufferLen = 2*NUM_FLANK_CHARS + motifSpan;
	assert(buffer = (int *)malloc(bufferLen * sizeof(int)));

	gappedSeq *seq = set->entry+siteSeqID;
	int globalStart = seq->chunkOffsets[chunkID] + leftChunkOffset;
	
	for (pos = globalStart - NUM_FLANK_CHARS; pos < globalStart + motifSpan + NUM_FLANK_CHARS; pos++)
	  buffer[iBuf++] = retrieveSeqChar(*seq, pos);

	if (siteRev) {
		assert(tmpBuf = (int *)malloc(bufferLen * sizeof(int)));
		for (iBuf = 0; iBuf < bufferLen; iBuf++) {
			int iChar = buffer[bufferLen - iBuf - 1];
			tmpBuf[iBuf] = (3 >= iChar && iChar >= 0) ? (3 - iChar) : iChar;
		}
		free(buffer);
		buffer = tmpBuf;
	}

	int nPWMs = motif.nPWMs;
	int gaps[nPWMs];	
	retrieveGapSetFromMotifConfig(motif, motifConfig, gaps);
	gaps[nPWMs - 1] = 0;	//sentinel
	
	for (iBuf = 0; iBuf < NUM_FLANK_CHARS; iBuf++)
	  fprintf(output, "%c", printChar[buffer[iBuf]+2]);
	fprintf(output, " ");
	//print each pwm hit in caps letters, and each portion that isn't in the pwm in small letters
	int pwmIdx;
	for (pwmIdx = 0; pwmIdx < nPWMs; pwmIdx++)	{
		for (i = 0; i < motif.pwms[pwmIdx].width; i++, iBuf++)	{
			//printf("buffer[%d] = %d\n", iBuf, buffer[iBuf]);
			assert(0 <= buffer[iBuf] && buffer[iBuf] <= 3);	//must have a valid character at the pwm hits
			fprintf(output, "%c", printHitChar[buffer[iBuf]+2]);
		}
		for (i = 0; i < gaps[pwmIdx]; i++, iBuf++)	{
			fprintf(output, "%c", printChar[buffer[iBuf]+2]);
		}
	}
  	fprintf(output, " ");
	assert(iBuf == NUM_FLANK_CHARS + motifSpan);
  	for (; iBuf < bufferLen; iBuf++)
    	fprintf(output, "%c", printChar[buffer[iBuf]+2]);

	fprintf(output, "  Score: %.3g\t Seq/ loc: %3d/%5d ", siteScore, siteSeqID+1, globalStart+1);
	if(siteRev)
	  fprintf(output, "reversed strand");
	else
	  fprintf(output, "forward strand");
	fprintf(output, "\t'");
	for (i = 0; i < seq->header_len; i++)
	  fprintf(output, "%c", seq->header[i]);
	fprintf(output, "'\n");

	free(buffer);
}


/* Added by Anand */
//get the sequence of gaps out of the config in motifConfig, and put it in the gaps array
void retrieveGapSetFromMotifConfig(const motif_Struct motif, const int motifConfig, int gaps[])	{
	int cur = motifConfig, i;
	for (i = motif.nPWMs - 2; i >= 0; i--)	{
		int divi = motif.pwms[i].maxNextGap - motif.pwms[i].minNextGap + 1;
		gaps[i] = motif.pwms[i].minNextGap + (cur % divi);
		cur /= divi;
	}
}


int retrieveSeqChar(gappedSeq seq, int pos)
     /*
       Return the numeric code of the character that's at (global) position pos
       Special values the function returns:
         -1 if it concludes this is a gapped position 
	 -2 if pos < 0 or is greater than the *raw* length of seq
       There are more efficient ways to do this but if it's just for printing better KISS
     */
{
	int seqRawLen, iChk;

	seqRawLen = seq.body.len;
	if (pos < 0 || pos >= seqRawLen)
	  	return -2;
	int val = seq.body.entry[pos];
	if (val > 3)	{
		assert(val == GAP_CHAR);
		return -1;
	}
  	return val;
}

/*
//Added by Anand, but later realized that they're not needed
chunksSeqVec gappedSeqVec2chunksSeqVec(gappedSeqVec seqVec)	{
	chunksSeqVec cSeqVec;
	cSeqVec.len = seqVec.len;
	cSeqVec.entry = calloc(seqVec.len, sizeof(chunksSeq));
	assert(cSeqVec.entry != NULL);
	int seqIdx = 0;
	for (seqIdx = 0; seqIdx < seqVec.len; seqIdx++) {
		cSeqVec.entry[seqIdx] = gappedSeq2chunksSeq(seqVec.entry[seqIdx]);
	}
	cSeqVec.id = seqVec.id;
	cSeqVec.strip = allocAndStrCpy(seqVec.strip);
	
	return cSeqVec;
}

chunksSeq gappedSeq2chunksSeq(gappedSeq seq)	{
	chunksSeq cSeq;
	
	cSeq.chunks = calloc(seq.numChunks, sizeof(chunkStruct));
	cSeq.num_chunks = seq.numChunks;
	cSeq.total_len = seq.body.len;
	cSeq.header_len = seq.header_len;
	cSeq.header = calloc(seq.header_len, sizeof(char));
	memcpy(cSeq.header, seq.header, seq.header_len * sizeof(char));
	
	int chunkIdx;
	for (chunkIdx = 0; chunkIdx < seq.numChunks; chunkIdx++)	{
		cSeq.chunks[chunkIdx].startPos = seq.chunkOffsets[chunkIdx];
		cSeq.chunks[chunkIdx].body = alloc_iLetterVec(seq.chunkLens[chunkIdx]);
		memcpy(cSeq.chunks[chunkIdx].body.entry, seq.body.entry + seq.chunkOffsets[chunkIdx], seq.chunkLens[chunkIdx] * sizeof(iLetter));
	}
	
	return cSeq;
}


gappedSeqVec chunksSeqVec2gappedSeqVec(chunksSeqVec cSeqVec)	{
	gappedSeqVec seqVec;
	seqVec.len = cSeqVec.len;
	seqVec.entry = calloc(cSeqVec.len, sizeof(gappedSeq));
	assert(seqVec.entry != NULL);
	int seqIdx = 0;
	for (seqIdx = 0; seqIdx < cSeqVec.len; seqIdx++)	{
		seqVec.entry[seqIdx] = chunksSeq2gappedSeq(cSeqVec.entry[seqIdx]);
	}
	seqVec.id = cSeqVec.id;
	seqVec.strip = allocAndStrCpy(cSeqVec.strip);	//dummy
	
	return seqVec;
}

gappedSeq chunksSeq2gappedSeq(chunksSeq cSeq)	{
	gappedSeq seq;
	
	seq.numChunks = cSeq.num_chunks;
	seq.body = alloc_iLetterVec(cSeq.total_len);
	memset(seq.body.entry, GAP_CHAR, cSeq.total_len * sizeof(iLetter));
	seq.header_len = cSeq.header_len;
	seq.header = calloc(cSeq.header_len, sizeof(char));
	memcpy(seq.header, cSeq.header, cSeq.header_len * sizeof(char));
	seq.chunkOffsets = calloc(cSeq.num_chunks, sizeof(int));
	seq.chunkLens = calloc(cSeq.num_chunks, sizeof(int));
	
	int chunkIdx;
	for (chunkIdx = 0; chunkIdx < cSeq.num_chunks; chunkIdx++)	{
		seq.chunkOffsets[chunkIdx] = cSeq.chunks[chunkIdx].startPos;
		seq.chunkLens[chunkIdx] = cSeq.chunks[chunkIdx].body.len;
		memcpy(seq.body.entry + cSeq.chunks[chunkIdx].startPos, cSeq.chunks[chunkIdx].body.entry, cSeq.chunks[chunkIdx].body.len * sizeof(iLetter));
	}
	return seq;
}
*/
