/*
  Generate MC images of the input sets using either a Markov model or boostrap.
  Bootstrap can protect sites. This is particularly important for longer PWMs
  that could easily be chopped if the block size is small relative to the width.
*/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <float.h>
#include <assert.h>
#include "my_types.h"
#include "misc_functions.h"
#include "data_interface.h"
#include "markov.h"
#include "pwms.h"
#include "sitePrtctdBtstrp.h"
#include "sequenceResample.h"
#include "mc_sets.h"



  // --> Note that the modeles below have to be in the same order as their names!!!
typedef enum {Bootstrap=0, Markov, WholeSeqResampRep, WholeSeqResampNoRep, N_MCmodelTypes} MCmodelType; 
static char *MCmodelNamesD[] = {"bootstrap", "markov", "wholeSeqResampRep", "wholeSeqResampNoRep"};
static stringVec MCmodelNames = {N_MCmodelTypes, MCmodelNamesD};
  // <-- Note that the above models have to be in the same order as their names!!!


static int markovOrder=2;
static double pseudoCount = 1;

static MCmodelType mc_model; // which MC model are we using



void gen_mc_2ds(chunksSeqVec moldSet1, chunksSeqVec moldSet2, RandomDsCreatorType *rdc1, 
		RandomDsCreatorType *rdc2, PWM_Struct pwm, chunksSeqVec *rSet1, chunksSeqVec *rSet2)
     // given the two datasets and a PWM return an MC image of the data part
     // pwm is only used if bootProtectsSites is set
{
  if (blockMCmodel()) {
    *rSet1 = gen_mc_ds(moldSet1, rdc1, pwm);
    *rSet2 = gen_mc_ds(moldSet2, rdc2, pwm);
  }
  else if (mc_model == WholeSeqResampRep) {
    *rSet1 = wholeSeqResampRep(moldSet1, moldSet2, moldSet1.len);
    *rSet2 = wholeSeqResampRep(moldSet1, moldSet2, moldSet2.len);
  }
  else
    wholeSeqResampNoRep(moldSet1, moldSet2, rSet1, rSet2);
}

chunksSeqVec gen_mc_ds(chunksSeqVec moldSet, RandomDsCreatorType *rdc, PWM_Struct pwm)
     // given a dataset and a PWM return an MC image of the data part
     // pwm is only used if bootProtectsSites is set
{
  chunksSeqVec mc_set;
  int totLen;
  iLetterVec randChunk;

  totLen = chunkSeqVecTotalLen(moldSet);
  randChunk = gen_MCblock(rdc, pwm, totLen);  
  mc_set = iLetterVec2chunksSeqVec(randChunk, moldSet);// reshape randChunk to an MC set image of *set
  FREE_ATOMS_VEC(randChunk);

  return mc_set;
}


iLetterVec gen_MCblock(RandomDsCreatorType *rdc, PWM_Struct pwm, int len)
     // given a dataset and a PWM return an MC block of length len
     // pwm is only used if bootProtectsSites is set
{
  iLetterVec randChunk;

  if (mc_model == Bootstrap) { // bootstrap
    if (rdc->bootBlocks.pwmID != pwm.id || rdc->bootBlocks.len == 0) // do we need to construct the blocks?
       gen_bootBlocksDB(rdc->trainData, pwm, &(rdc->bootBlocks));
    randChunk = gen_ranBootBlock(rdc->trainData, rdc->bootBlocks, len);
  }
  else // Markov
    randChunk = genMarkovBlock(rdc->markovModel, len);

  return randChunk;
}


int set_mcModel(char *mc_modelName, char **parameters, int nParms)
     /*
       Sets the MC model that will be used to generate random sets.
     */
{
  Boolean bootProtectsSites = FALSE;
  int bootNominalBlockSize;

  mc_model = findStringOnce(mc_modelName, MCmodelNames);
  switch (mc_model)
    {
    case Bootstrap :       // bootstrap
      if (nParms <= 0 || nParms > 2 || (nParms == 2 && strcmp(parameters[1], "protectSites") != 0))
	errorMessage("The MC bootstrap model parameters are #(nominal chunk size) ['protectSites']");
      bootNominalBlockSize = my_atol(parameters[0]);
      checkRangeI(bootNominalBlockSize, 0, INT_MAX, "MCmodel bootstrap nominal chunk size");
      if (nParms == 2)
	bootProtectsSites = TRUE;
      set_bootParms(bootProtectsSites, bootNominalBlockSize);
      return 0;
    case Markov :         // Markov
      if (nParms <= 0 || nParms > 2)
	errorMessage("The MC Markov model parameters are #(order) [#(pseudocount)]");
      markovOrder = my_atol(parameters[0]);
      checkRangeI(markovOrder, 0, INT_MAX, "MCmodel markov order");
      if (nParms == 2) {
	pseudoCount = my_atod(parameters[1]);
	checkRangeD(pseudoCount, 0, DBL_MAX, "MCmodel markov pseudo count");
      }
      break;
    }
  return 1; // the only value that matter is Bootstrap = 0
}


Boolean blockMCmodel()
     // Returns TRUE iff a block MCmodel is set
{
  if (mc_model == Bootstrap || mc_model == Markov)
    return TRUE;
  else
    return FALSE;
}


RandomDsCreatorType *alloc_RandomDsCreator(iLetterVec trainData)
     // Returns a pointer to an allocated structure (for block methods)
{
  RandomDsCreatorType *randDsCreator;

  assert( randDsCreator = (void *) calloc(1, sizeof(RandomDsCreatorType)) );

  randDsCreator->trainData = trainData;
  //  randDsCreator->randModel = mc_model;
  if (mc_model == Markov)
    randDsCreator->markovModel = trainMarkovModel(randDsCreator->trainData, markovOrder, pseudoCount);
  else
    randDsCreator->bootBlocks.pwmID = -1; // make sure these will be recalculated on the fly if Bootstrap

  return randDsCreator;
}


void free_RandomDsCreatorType(RandomDsCreatorType *rdc)
		// frees the allocated arrays
{
	FREE_ATOMS_VEC(rdc->trainData);
	freeMarkovModel(rdc->markovModel);
	free_bootBlocks(&(rdc->bootBlocks));
}
