#ifndef __MOTIF_SCAN_H__
#define __MOTIF_SCAN_H__

#include "my_types.h"

// Properties
typedef struct {
	int seqID;	//which sequence
	int chunkID;	//ungapped chunk of the sequence where the motif match begins
	int leftChunkOffset;	//offset in the ungapped chunk where the motif match begins
	Boolean rev;	//rc ?
	double score;	//motif hit score
	int motifConfig;	//set of gaps between the pwms in the optimal motif hit
	int motifSpan;	//span of the motif in the optimal motif hit
} siteStruct;

typedef struct {
	siteStruct* entry;
	int len;
} siteStructVec;
	
/*
typedef struct {
  int seqID;    // which sequence
  int chunkID;  // which chunk
  int leftPos;  // left most occupied position within the chunk
  Boolean rev;  // reverse complement?
  double score; // site score
} siteStruct;      // Site specific information

typedef struct {
  siteStruct * entry;
  int len;
} siteStructVec; // a vector of siteStruct elements
*/

typedef struct {
  siteStructVec topK; // holds the top K sites
  int K;     // The requested K (though fewer might have been found)
  siteStructVec aboveT; // holds the sites >= T
  double T;  // The given threshold
} slctdSitesStruct;


// The following structure should be attached to the augmented DS structure
typedef struct {
  siteStructVec allSites; // all ds sites lumped togteher (should be in descending order)
  slctdSitesStruct slctdSites; // selected sites across the entire set
  struct {
    siteStructVec * entry;
    int len;
  } perSeqSites;     // each sequence has its own vector (should be in descending order)
  struct {
    slctdSitesStruct * entry;
    int len;
  } perSeqSlctdSites; // good sites in each sequence
} dsSitesStruct;



void get_dsSitesScores(gappedSeqVec set, motif_Struct motif, dsSitesStruct *dsSites);

//int get_chunkSites(iLetterVec chunk, motif_Struct motif, int scanDirection, int seqID, int chunkID, siteStruct * chunkSites, Boolean onlyCountGTT, double T);
int get_chunkSites(const iLetterVec seq, const motif_Struct motif, const int scanDirection, const int seqID, const int chunkID,
		siteStruct *seqSites, const Boolean onlyCountGTT, const double T);
		
//int get_gappedSeqMotifScores(const gappedSeq seq, const motif_Struct motif, const int scanDirection, const int seqID, 
//		siteStruct *seqSites, const Boolean onlyCountGTT, const double T);

int findDsTopK(gappedSeqVec set, motif_Struct motif, int K, dsSitesStruct *dsSites);

void findPerSeqTopK(gappedSeqVec set, motif_Struct motif, int *Ks, dsSitesStruct *dsSites);

int findDsSitesGTT(gappedSeqVec set, motif_Struct motif, double T, dsSitesStruct *dsSites);

void findPerSeqSitesGTT(gappedSeqVec set, motif_Struct motif, double T, dsSitesStruct *dsSites);

void setNullTrainSeqFromFile(char *fname);

void setNullTrainSeq(iLetterVec trainData, int order, double pseudoCount, int scanMode);

void set_avg2StrandsNull(Boolean value);

void set_scanDirection(int value);

void set_overlapThreshold(double value);

int siteStructCompare(const siteStruct *e1, const siteStruct *e2);

void free_dsSitesStruct(dsSitesStruct *dsSites);

void set_printTopK(Boolean value);

void set_printGTT(Boolean value);

// this functions decides if the top K or GTT sites should be printed
void printSites(gappedSeqVec *set, dsSitesStruct *dsSites, motif_Struct *motif, char *setName, FILE *output);

// this function prints the top K sites from the given set of sequences to the given output
void printTopKSites(gappedSeqVec *set, dsSitesStruct *dsSites, motif_Struct *motif, char *setName, FILE *output);

// this function prints the GTT sites from the given set of sequences to the given output
void printGTTSites(gappedSeqVec *set, dsSitesStruct *dsSites, motif_Struct *motif, char *setName, FILE *output);
// Same but does it one sequence at a time so much more memory efficient
void printPerSeqSites(gappedSeqVec *set, dsSitesStruct *dsSites, motif_Struct *motif, char *setName, FILE *output, Boolean GTTflag);

// specify what "K" should be when printing the top K sites
void set_printSitesTopK(char *strK);

#endif
