/*
  Manage the tests that gauge the difference between two sets
*/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <float.h>
#include <assert.h>
#include <math.h>
#include "my_types.h"
#include "misc_functions.h"
#include "data_interface.h"
#include "markov.h"
#include "pwms.h"
#include "sitePrtctdBtstrp.h"
#include "mc_sets.h"
#include "motif_scan.h"
#include "tests.h"
#include "diff_tests.h"


#define MC_PSEUDO 0.1 // the pseudocount added for MC tests to avoid a p-value of 0
extern FILE *outputFile;

typedef enum {SetWide=0, PerSeq, SetAndPerSeq} FeaturesScope;
static FeaturesScope MC_scope; // which type of MC stats

static char *MWsigEval;   // How the MW test will be evaluated

static int numRandomSets; // used in the MC test
static int K;             // in top K selected sites tests
static double Perc;       // in top % selected sites tests

  // Note that the features have to be in the same order as their names!!!
typedef enum {FreqScoresGTT=0, QualityScoresGTT, TopK, TopPercent, HandT, HandMW, MC_FreqMeanGTT, N_FEATURES} Features; 
char *featureNamesD[] = {"freqScoresGTT", "qualityScoresGTT", "topK", "topPercent", "HandT", "HandMW", "setWideMCqualityAndFreq"};
stringVec featureNames = {N_FEATURES, featureNamesD};
  // Note that the features have to be in the same order as their names!!!


  // Note that the tests have to be in the same order as their names!!!
typedef enum {Hypergeometric=0, MannWhitney, T_Test, SetWideMC, PerSeqMC, UniformSum, GammaLogProd, MC, N_TESTS} Tests;  // MC is not a test per se but it's convenient below
char *testNamesD[] = {"Hypergeometric", "MannWhitney", "t-test", "setWideMC", "perSeqMC", "uniformSum", "gammaLogProd", "MC"};
stringVec testNames = {N_TESTS, testNamesD};
  // Note that the tests have to be in the same order as their names!!!


#define N_QUALITY_FEATURES 3
#define N_QUANTITY_FEATURES 1
#define QUALITY_FEATURES QualityScoresGTT, TopK, TopPercent
static Features qualityFeatures[N_QUALITY_FEATURES]={QUALITY_FEATURES};
static Features quantityFeatures[N_QUANTITY_FEATURES]={FreqScoresGTT};

#define N_QUALITY_TESTS 5
#define N_QUANTITY_TESTS 4
static Tests qualityFeaturesTests[N_QUALITY_TESTS]={MannWhitney, T_Test, SetWideMC, PerSeqMC, MC};
static Tests quantityFeaturesTests[N_QUANTITY_TESTS]={Hypergeometric, SetWideMC, PerSeqMC, MC};

#define N_MC_FEATURES 4
#define N_MC_TESTS 2
static Features mcFeatures[N_MC_FEATURES]={FreqScoresGTT, QUALITY_FEATURES};
static Tests mcTests[N_MC_TESTS] = {SetWideMC, PerSeqMC};

#define N_COMB_FEATURES 3
#define N_COMB_TESTS 2
static Features combFeatures[N_COMB_FEATURES]={HandT, HandMW, MC_FreqMeanGTT};
static Tests combTests[N_COMB_TESTS]={UniformSum, GammaLogProd};


static Boolean availableTests[N_FEATURES][N_TESTS];
static Boolean rqstdTests[N_FEATURES][N_TESTS];
static testStruct tests[N_FEATURES][N_TESTS];


         // internal methods
static void find2dsSitesGTT(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm);
static void find2dsSitesTopK(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, int K);
static void find2dsSitesTopPercent(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, double Perc);
static void get_MC_stats(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm);
static void compute_2DS_diffs(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, double statsDiffs[N_FEATURES][N_TESTS]) ;
static void compute_DS_stats(analyzedSetType *set, PWM_Struct pwm, double stats[N_FEATURES][N_TESTS]);
static void siteStructVecs2realVecs(siteStructVec sv1, realVec *rv1, siteStructVec sv2, realVec *rv2);
static void siteStructVec2realVec(siteStructVec sv, realVec *rv);
static double compMeanSiteStructVec(siteStructVec sv);




// --------- initialization and output --------------

void initialize_test_matrix()
     // A relatively cheap way to figure out permissble combinations of features and tests.
     // An alternative is for each test to hold the relevant features or vice versa.
{
  int i, j;

  for (i = 0; i < N_FEATURES; i++)
    for (j = 0; j < N_TESTS; j++) {
      availableTests[i][j] = FALSE;
      rqstdTests[i][j] = FALSE;
      tests[i][j].test = NULL;
    }

  // {MannWhitney, T_Test, MC} qualityFeaturesTests
  for (i = 0; i < N_QUALITY_FEATURES; i++)
    for (j = 0; j < N_QUALITY_TESTS; j++)
      availableTests[qualityFeatures[i]][qualityFeaturesTests[j]] = TRUE;

  // {Hypergeometric, MC} quantityFeaturesTests
  for (i = 0; i < N_QUANTITY_FEATURES; i++)
    for (j = 0; j < N_QUANTITY_TESTS; j++)
      availableTests[quantityFeatures[i]][quantityFeaturesTests[j]] = TRUE;

  // combintaion tests are a bit messy (make sure they are legit after parsing the user command)
  for (i = 0; i < N_COMB_FEATURES; i++)
    for (j = 0; j < N_COMB_TESTS; j++)
      availableTests[combFeatures[i]][combTests[j]] = TRUE;
}


void printTests(testStruct *tests, char *pwmHeader)
     // Prints a subset of the input tests matrix according to the test's ".test" field
{
  int i, j;
  char *featurePrintHeader[N_FEATURES]; // will be printed if feature is tested
  testStruct test;
  Boolean printFeature;

  featurePrintHeader[FreqScoresGTT] = "frequency of scores >= T";
  featurePrintHeader[QualityScoresGTT] = "quality of scores >= T";
  featurePrintHeader[TopK] = "quality of top K scores";
  featurePrintHeader[TopPercent] = "quality of top % scores";
  featurePrintHeader[HandT] = "combined score of hypergeometric and t-test of scores >= T";
  featurePrintHeader[HandMW] = "combined score of hypergeometric and Mann-Whitney of scores >= T";
  featurePrintHeader[MC_FreqMeanGTT] = "combined score of MC tests of frequency and quality of scores >= T";

  for (i = 0; i < N_FEATURES; i++) {
    printFeature = FALSE;
    for (j = 0; j < N_TESTS; j++) {
      test = tests[i*N_TESTS + j]; // clarity only
      if (test.test != NULL) {
	if (!printFeature) {
	  printf("\nThe p-value of the difference between the %s under the alternative hypothesis that\n", featurePrintHeader[i]);
	  fprintf(outputFile, "\nThe p-value of the difference between the %s under the alternative hypothesis that\n", featurePrintHeader[i]);
	  printFeature = TRUE;
	}
	printf("set 1 (2) is better = %.3g ( %.3g ) [%s, %s]\n", test.pGT, test.pLT, test.test, pwmHeader);
	fprintf(outputFile, "set 1 (2) is better = %.3g ( %.3g ) [%s, %s]\n", test.pGT, test.pLT, test.test, pwmHeader);
      }
    }
  }
}


// --------- Executing the requested tests --------------


testStruct *run_tests(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm)
     /*
       Executes the requested battery of tests to find differences between sites of pwm
       in the two input sets.
     */
{
  realVec realV1={0, NULL}, realV2={0, NULL};

  free_dsSitesStruct(&(set1->dsSites)); // clear all previous sites data assuming new pwm
  free_dsSitesStruct(&(set2->dsSites));

  if (MCtestsRqstd()) // MC needs to be done in advance so all the stats can be accumulated at once
    get_MC_stats(set1, set2, pwm);

  if (rqstdTests[FreqScoresGTT][Hypergeometric]) { // Hypergeomtric test of difference in # of scores >= T
    find2dsSitesGTT(set1, set2, pwm);
    tests[FreqScoresGTT][Hypergeometric] = 
      hypergeometric_test(set1->dsSites.slctdSites.aboveT.len, set1->dsSites.allSites.len, set2->dsSites.allSites.len,
			  set1->dsSites.slctdSites.aboveT.len + set2->dsSites.slctdSites.aboveT.len);
  }

  if (rqstdTests[QualityScoresGTT][MannWhitney]) { // Mann Whitney test of difference in ranks of scores >= T
    find2dsSitesGTT(set1, set2, pwm);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.aboveT, &realV1, set2->dsSites.slctdSites.aboveT, &realV2);
    tests[QualityScoresGTT][MannWhitney] =  MW_test(realV1, realV2, MWsigEval);
  }

  if (rqstdTests[QualityScoresGTT][T_Test]) {      // t-test of difference in means of scores >= T
    find2dsSitesGTT(set1, set2, pwm);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.aboveT, &realV1, set2->dsSites.slctdSites.aboveT, &realV2);
    tests[QualityScoresGTT][T_Test] = t_test(realV1, realV2);
  }

  if (rqstdTests[HandT][UniformSum]) { // uniform test of combining the hyper and t-tests of scores >= T
    tests[HandT][UniformSum] = unifSum2tests(tests[FreqScoresGTT][Hypergeometric], tests[QualityScoresGTT][T_Test]);
  }

  if (rqstdTests[HandT][GammaLogProd]) { // log prod test of combining the hyper and t-tests of scores >= T
    tests[HandT][GammaLogProd] = logProdGamma2tests(tests[FreqScoresGTT][Hypergeometric], tests[QualityScoresGTT][T_Test]);
  }

  if (rqstdTests[HandMW][UniformSum]) { // uniform test of combining the hyper and MW-tests of scores >= T
    tests[HandMW][UniformSum] = unifSum2tests(tests[FreqScoresGTT][Hypergeometric], tests[QualityScoresGTT][MannWhitney]);
  }

  if (rqstdTests[HandMW][GammaLogProd]) { // log prod test of combining the hyper and MW-tests of scores >= T
    tests[HandMW][GammaLogProd] = logProdGamma2tests(tests[FreqScoresGTT][Hypergeometric], tests[QualityScoresGTT][MannWhitney]);
  }

  if (rqstdTests[TopK][MannWhitney]) { // Mann Whitney test of difference in ranks of top K scores
    find2dsSitesTopK(set1, set2, pwm, K);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.topK, &realV1, set2->dsSites.slctdSites.topK, &realV2);
    tests[TopK][MannWhitney] =  MW_test(realV1, realV2, MWsigEval);
  }

  if (rqstdTests[TopK][T_Test]) { // t-test of difference in ranks of top K scores
    find2dsSitesTopK(set1, set2, pwm, K);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.topK, &realV1, set2->dsSites.slctdSites.topK, &realV2);
    tests[TopK][T_Test] = t_test(realV1, realV2);
  }

  if (rqstdTests[TopPercent][MannWhitney]) { // Mann Whitney test of difference in ranks of top % scores
    find2dsSitesTopPercent(set1, set2, pwm, Perc);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.topK, &realV1, set2->dsSites.slctdSites.topK, &realV2);
    tests[TopPercent][MannWhitney] = MW_test(realV1, realV2, MWsigEval);
  }

  if (rqstdTests[TopPercent][T_Test]) { // t-test of difference in ranks of top % scores
    find2dsSitesTopPercent(set1, set2, pwm, Perc);
    siteStructVecs2realVecs(set1->dsSites.slctdSites.topK, &realV1, set2->dsSites.slctdSites.topK, &realV2);
    tests[TopPercent][T_Test] = t_test(realV1, realV2);
  }
  FREE_ATOMS_VEC(realV1);
  FREE_ATOMS_VEC(realV2);

  return (testStruct *) tests;
}


void find2dsSitesGTT(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm)
     // Verify that GTT sites are selected in both datasets
{
  findDsSitesGTT(set1->data, pwm, *(pwm.siteThreshold), &(set1->dsSites));
  findDsSitesGTT(set2->data, pwm, *(pwm.siteThreshold), &(set2->dsSites));
}


void find2dsSitesTopK(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, int K)
     // Verify that top K sites are selected in both datasets
{
  findDsTopK(set1->data, pwm, K, &(set1->dsSites));
  findDsTopK(set2->data, pwm, K, &(set2->dsSites));
}


void find2dsSitesTopPercent(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, double Perc)
     // Verify that top %Perc sites are selected in both datasets
{
  findDsTopK(set1->data, pwm, round(Perc*set1->dsSites.allSites.len), &(set1->dsSites));
  findDsTopK(set2->data, pwm, round(Perc*set2->dsSites.allSites.len), &(set2->dsSites));
}


// --------- MC tests --------------


void get_MC_stats(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm)
{
  double statsDiffs[N_FEATURES][N_TESTS]; // all these [N_FEATURES][N_TESTS] are slightly wastefull
  double bStatsDiffs[N_FEATURES][N_TESTS];
  long mcStatsDiffsLE[N_FEATURES][N_TESTS];
  long mcStatsDiffsGE[N_FEATURES][N_TESTS];
  analyzedSetType *randSet1, *randSet2;
  chunksSeqVec rSet1, rSet2;
  Tests test;
  Features feature;
  int i, j, iRun;

  memset(mcStatsDiffsLE, 0, N_FEATURES*N_TESTS*sizeof(long));
  memset(mcStatsDiffsGE, 0, N_FEATURES*N_TESTS*sizeof(long));
  compute_2DS_diffs(set1, set2, pwm, statsDiffs);
  for (iRun = 0; iRun < numRandomSets; iRun++) {
    gen_mc_2ds(set1->data, set2->data, set1->randomGenerator, set2->randomGenerator, pwm, &rSet1, &rSet2);
    randSet1 = alloc_analyzedSetType(rSet1);
    randSet2 = alloc_analyzedSetType(rSet2);
    compute_2DS_diffs(randSet1, randSet2, pwm, bStatsDiffs);
    for (i = 0; i < N_MC_FEATURES; i++) {
      feature = mcFeatures[i];
      for (j = 0; j < N_MC_TESTS; j++) { // update relevant <= and >= counts
	test = mcTests[j];
	if (rqstdTests[feature][test]) {
	  mcStatsDiffsLE[feature][test] += (statsDiffs[feature][test] >= bStatsDiffs[feature][test]);
	  mcStatsDiffsGE[feature][test] += (statsDiffs[feature][test] <= bStatsDiffs[feature][test]);
	}
      }
    }
    free_analyzedSetType(randSet1);
    free_analyzedSetType(randSet2);
  }
  // summarize the tests
  for (i = 0; i < N_MC_FEATURES; i++) {
    feature = mcFeatures[i];
    for (j = 0; j < N_MC_TESTS; j++) {
      test = mcTests[j];
      if (rqstdTests[feature][test]) {
	tests[feature][test].pLT = ((double) mcStatsDiffsLE[feature][test] + MC_PSEUDO) / (numRandomSets+MC_PSEUDO);
	tests[feature][test].pGT = ((double) mcStatsDiffsGE[feature][test] + MC_PSEUDO) / (numRandomSets+MC_PSEUDO);
	tests[feature][test].stat = statsDiffs[feature][test];
	tests[feature][test].test = testNamesD[test];
      }
    }
  }
  // combination tests
  if (rqstdTests[MC_FreqMeanGTT][UniformSum])
    tests[MC_FreqMeanGTT][UniformSum] = unifSum2tests(tests[FreqScoresGTT][SetWideMC], tests[QualityScoresGTT][SetWideMC]);
  if (rqstdTests[MC_FreqMeanGTT][GammaLogProd])
    tests[MC_FreqMeanGTT][GammaLogProd] = logProdGamma2tests(tests[FreqScoresGTT][SetWideMC], tests[QualityScoresGTT][SetWideMC]);
}


void compute_2DS_diffs(analyzedSetType *set1, analyzedSetType *set2, PWM_Struct pwm, double statsDiffs[N_FEATURES][N_TESTS]) 
{
  double stats1[N_FEATURES][N_TESTS];
  double stats2[N_FEATURES][N_TESTS];
  Tests test;
  Features feature;
  int i, j;

  compute_DS_stats(set1, pwm, stats1);
  compute_DS_stats(set2, pwm, stats2);

  for (i = 0; i < N_MC_FEATURES; i++) {
    feature = mcFeatures[i];
    for (j = 0; j < N_MC_TESTS; j++) {
      test = mcTests[j];
      if (rqstdTests[feature][test])
	statsDiffs[feature][test] = stats1[feature][test] - stats2[feature][test];
    }
  }
}


void compute_DS_stats(analyzedSetType *set, PWM_Struct pwm, double stats[N_FEATURES][N_TESTS])
     /*
       Returns the values of all the statistics that are requested in association
       with the MC tests.
     */
{
  int iSeq, *Ks, nSeqs;
  double mean;

  // set wide stats
  if (rqstdTests[FreqScoresGTT][SetWideMC]) {
    findDsSitesGTT(set->data, pwm, *(pwm.siteThreshold), &(set->dsSites));
    stats[FreqScoresGTT][SetWideMC] = ((double) set->dsSites.slctdSites.aboveT.len) / (set->dsSites.allSites.len + DBL_MIN);
  }

  if (rqstdTests[QualityScoresGTT][SetWideMC]) {
    findDsSitesGTT(set->data, pwm, *(pwm.siteThreshold), &(set->dsSites));
    stats[QualityScoresGTT][SetWideMC] = compMeanSiteStructVec(set->dsSites.slctdSites.aboveT);
  }

  if (rqstdTests[TopK][SetWideMC]) {
    findDsTopK(set->data, pwm, K, &(set->dsSites));
    stats[TopK][SetWideMC] =compMeanSiteStructVec(set->dsSites.slctdSites.topK);
  }

  if (rqstdTests[TopPercent][SetWideMC]) {
    findDsTopK(set->data, pwm, round(Perc*set->dsSites.allSites.len), &(set->dsSites));
    stats[TopPercent][SetWideMC] = compMeanSiteStructVec(set->dsSites.slctdSites.topK);
  }

  // per sequence stats
  nSeqs = set->data.len;   // readability
  if (rqstdTests[FreqScoresGTT][PerSeqMC]) {
    findPerSeqSitesGTT(set->data, pwm, *(pwm.siteThreshold), &(set->dsSites));
    for (mean = 0, iSeq = 0; iSeq < nSeqs; iSeq++)
      mean += ((double) set->dsSites.perSeqSlctdSites.entry[iSeq].aboveT.len) 
	/ (set->dsSites.perSeqSites.entry[iSeq].len + DBL_MIN);
    stats[FreqScoresGTT][PerSeqMC] = mean / nSeqs;
  }

  if (rqstdTests[QualityScoresGTT][PerSeqMC]) {
    findPerSeqSitesGTT(set->data, pwm, *(pwm.siteThreshold), &(set->dsSites));
    for (mean = 0, iSeq = 0; iSeq < nSeqs; iSeq++)
      mean += compMeanSiteStructVec(set->dsSites.perSeqSlctdSites.entry[iSeq].aboveT);
    stats[QualityScoresGTT][PerSeqMC] = mean / nSeqs;
  }

  if (rqstdTests[TopK][PerSeqMC]) {
    assert( Ks = (void *) malloc(nSeqs * sizeof(int)) );
    for (iSeq = 0; iSeq < nSeqs; iSeq++)
      Ks[iSeq] = K;
    findPerSeqTopK(set->data, pwm, Ks, &(set->dsSites));
    for (mean = 0, iSeq = 0; iSeq < nSeqs; iSeq++)
      mean += compMeanSiteStructVec(set->dsSites.perSeqSlctdSites.entry[iSeq].topK);
    stats[TopK][PerSeqMC] = mean / nSeqs;
    free(Ks);
  }

  if (rqstdTests[TopPercent][PerSeqMC]) {
    assert( Ks = (void *) malloc(nSeqs * sizeof(int)) );
    for (iSeq = 0; iSeq < nSeqs; iSeq++)
      Ks[iSeq] = round(Perc * set->dsSites.perSeqSites.entry[iSeq].len);
    findPerSeqTopK(set->data, pwm, Ks, &(set->dsSites));
    for (mean = 0, iSeq = 0; iSeq < nSeqs; iSeq++)
      mean += compMeanSiteStructVec(set->dsSites.perSeqSlctdSites.entry[iSeq].topK);
    stats[TopPercent][PerSeqMC] = mean / nSeqs;
    free(Ks);
  }
}


// ----------- misc ---------------


void siteStructVecs2realVecs(siteStructVec sv1, realVec *rv1, siteStructVec sv2, realVec *rv2)
     // Creates a realVec from the score entry of a siteStructVec (X 2 for the two sets)
{
  siteStructVec2realVec(sv1, rv1);
  siteStructVec2realVec(sv2, rv2);
}


void siteStructVec2realVec(siteStructVec sv, realVec *rv)
     // Creates a realVec from the score entry of a siteStructVec
{
  int i;

  if (rv->len > 0)
    free(rv->entry);
  *rv = alloc_realVec(sv.len);
  for (i = 0; i < sv.len; i++)
    rv->entry[i] = sv.entry[i].score;
}


double compMeanSiteStructVec(siteStructVec sv)
     // Returns the mean of a siteStructVec's scores
{
  int i;
  double sum = 0;
  for (i = 0; i < sv.len; i++)
    sum += sv.entry[i].score;
  return (sv.len > 0) ? (sum / sv.len) : 0;
}


// ----------- Setting user input -----------------


int set_feature_test(char **testRecord, int nParms)
     /*
       Parse and record a test request originally of the form: feature <parameter> [ tests ].
       The parameter is not always given.
     */
{
  Features feature;
  Tests test;
  int i, testsStart = 1;

  feature = findStringOnce(testRecord[0], featureNames);
  switch (feature) // some fetures require special settings
    {
    case TopK :       // catch following K
      K = my_atol(testRecord[1]);
      testsStart++;
      checkRangeI(K, 0, INT_MAX, "K");
      break;
    case TopPercent : // catch following %
      Perc = my_atod(testRecord[1]);
      testsStart++;
      checkRangeD(Perc, 0, 1, "TopPercent");
      break;
    case HandT : // make sure that both H & t-tests are selected
      rqstdTests[FreqScoresGTT][Hypergeometric] = TRUE;
      rqstdTests[QualityScoresGTT][T_Test] = TRUE;
      break;
    case HandMW : // make sure that both H & MW tests are selected
      rqstdTests[FreqScoresGTT][Hypergeometric] = TRUE;
      rqstdTests[QualityScoresGTT][MannWhitney] = TRUE;
      break;
    case MC_FreqMeanGTT : // make sure that both FreqScoresGTT & MeanScoresGTT are SetWideMC-tested
      rqstdTests[FreqScoresGTT][SetWideMC] = TRUE;
      rqstdTests[QualityScoresGTT][SetWideMC] = TRUE;
      break;
    }
  if (nParms <= testsStart)
    ERROR(("Feature %s is not followed by any test!", testRecord[0]))
  for (i = testsStart; i < nParms; i++) {
    test = findStringOnce(testRecord[i], testNames);
    if (availableTests[feature][test]) {
      rqstdTests[feature][test] = TRUE;
      switch (test) // properly set all non-standard tests
	{
	case MC :  // MC is a special "test" which is made of 1-2 tests -- can you do something more elegant?
	  if (MC_scope == SetWide || MC_scope == SetAndPerSeq)
	    rqstdTests[feature][SetWideMC] = TRUE;
	  if (MC_scope == PerSeq || MC_scope == SetAndPerSeq)
	    rqstdTests[feature][PerSeqMC] = TRUE;
	  break;
	}
    }
    else
      ERROR(("Feature %s cannot be tested with Test %s", testRecord[0], testRecord[i]))
  }
}


void set_numRandomSets(char *value) {
  numRandomSets = my_atol(value);
  checkRangeI(numRandomSets, 0, INT_MAX, "-numRandomSets");
}


void set_MC_scope(char *value) 
{
  if (strcmp(value, "setWide") == 0)
    MC_scope = SetWide;
  else if (strcmp(value, "perSeq") == 0)
    MC_scope = PerSeq;
  else if (strcmp(value, "setAndPerSeq") == 0)
    MC_scope = SetAndPerSeq;
  else
    ERROR(("Value %s of MCstatScope is undefined", value));
}


void set_Kvalue(char *value) {  K = my_atol(value);}


void set_PercValue(char *value) {  Perc = my_atod(value);}


void set_MWsigEval(char *value) {
  char *methodNames[] = {"NULL", "exact", "normal"};
  stringVec methods = {3, methodNames};
  if (findStringOnce(value, methods) == 0)  // checking the method is recognized
    value = NULL;
  MWsigEval = value;
}


Boolean MCtestsRqstd()
     // TRUE iff some MC test was requested
{
 int i, j;

 for (i = 0; i < N_MC_FEATURES; i++)
   for (j = 0; j < N_MC_TESTS; j++)
     if (rqstdTests[mcFeatures[i]][mcTests[j]])
       return TRUE;
 return FALSE;
}


Boolean GTTscoresRqstd()
     // TRUE iff tests involving GTT scores were requested
{
 int j;

 for (j = 0; j < N_TESTS; j++)
   if (rqstdTests[FreqScoresGTT][j] || rqstdTests[QualityScoresGTT][j])
     return TRUE;
 return FALSE;
}


// ---------------  memory management ------------------


analyzedSetType *alloc_analyzedSetType(chunksSeqVec dataSet)
		// Allocate a site-augmented data set "around" the given input set initializing the necessary values
{
	analyzedSetType *analyzedSet;

	assert( analyzedSet = calloc(1, sizeof(analyzedSetType)) );
	analyzedSet->data = dataSet;
	assert( analyzedSet->randomGenerator = (void *) calloc(1, sizeof(RandomDsCreatorType)));

	return analyzedSet;
}


void free_analyzedSetType(analyzedSetType *set)
		// Frees the allocated arrays across all sub-structures
{
	free_RandomDsCreatorType(set->randomGenerator);
	free_dsSitesStruct(&(set->dsSites));
	free_chunksSeqVec(set->data);
	free(set);
}
