// Properties
typedef struct {
  int seqID;    // which sequence
  int chunkID;  // which chunk
  int leftPos;  // left most occupied position within the chunk
  Boolean rev;  // reverse complement?
  double score; // site score
} siteStruct;      // Site specific information

typedef struct {
  siteStruct * entry;
  int len;
} siteStructVec; // a vector of siteStruct elements


typedef struct {
  siteStructVec topK; // holds the top K sites
  int K;     // The requested K (though fewer might have been found)
  siteStructVec aboveT; // holds the sites >= T
  double T;  // The given threshold
} slctdSitesStruct;


// The following structure should be attached to the augmented DS structure
typedef struct {
  siteStructVec allSites; // all ds sites lumped togteher (should be in descending order)
  slctdSitesStruct slctdSites; // selected sites across the entire set
  struct {
    siteStructVec * entry;
    int len;
  } perSeqSites;     // each sequence has its own vector (should be in descending order)
  struct {
    slctdSitesStruct * entry;
    int len;
  } perSeqSlctdSites; // good sites in each sequence
} dsSitesStruct;



void get_dsSitesScores(chunksSeqVec set, PWM_Struct pwm, dsSitesStruct *dsSites);

int get_chunkSites(iLetterVec chunk, PWM_Struct pwm, int scanDirection, int seqID, int chunkID, siteStruct * chunkSites);

int get_nChunkSitesGTT(iLetterVec chunk, PWM_Struct pwm, int scanDirection, double T);

int findDsTopK(chunksSeqVec set, PWM_Struct pwm, int K, dsSitesStruct *dsSites);

void findPerSeqTopK(chunksSeqVec set, PWM_Struct pwm, int *Ks, dsSitesStruct *dsSites);

int findDsSitesGTT(chunksSeqVec set, PWM_Struct pwm, double T, dsSitesStruct *dsSites);

void findPerSeqSitesGTT(chunksSeqVec set, PWM_Struct pwm, double T, dsSitesStruct *dsSites);

void setNullTrainSeq(iLetterVec trainData, int order, double pseudoCount);

void set_scanDirection(int value);

void set_overlapThreshold(double value);

int siteStructCompare(const siteStruct *e1, const siteStruct *e2);

void free_dsSitesStruct(dsSitesStruct *dsSites);

void set_printTopK(Boolean value);

void set_printGTT(Boolean value);

// this functions decides if the top K or GTT sites should be printed
void printSites(chunksSeqVec *set, dsSitesStruct *dsSites, PWM_Struct *pwm, char *setName, FILE *output);

// this function prints the top K sites from the given set of sequences to the given output
void printTopKSites(chunksSeqVec *set, dsSitesStruct *dsSites, PWM_Struct *pwm, char *setName, FILE *output);

// this function prints the GTT sites from the given set of sequences to the given output
void printGTTSites(chunksSeqVec *set, dsSitesStruct *dsSites, PWM_Struct *pwm, char *setName, FILE *output);
// Same but does it one sequence at a time so much more memory efficient
void printPerSeqSites(chunksSeqVec *set, dsSitesStruct *dsSites, PWM_Struct *pwm, char *setName, FILE *output, Boolean GTTflag);

// specify what "K" should be when printing the top K sites
void set_printSitesTopK(char *strK);
