#include "SVMClassifier.hpp"


namespace mgx
{

    bool copyParms(svm_parameter parms1, svm_parameter& parms2)
    {

      parms2.svm_type = parms1.svm_type;

      parms2.kernel_type = parms1.kernel_type;

      parms2.degree = parms1.degree;
      parms2.gamma = parms1.gamma;
      parms2.coef0 = parms1.coef0;
      parms2.nu = parms1.nu;
      parms2.cache_size = parms1.cache_size;
      parms2.C = parms1.C;
      parms2.eps = parms1.eps;
      parms2.p = parms1.p;
      parms2.shrinking = parms1.shrinking;
      parms2.probability = parms1.probability;
      parms2.nr_weight = parms1.nr_weight;
      parms2.weight_label = parms1.weight_label;
      parms2.weight = parms1.weight;   

      return true;
    }



    std::vector<std::set<QString> > SVMClassifier::featureSubsetCreation(std::set<QString> allFeatures)
    {
      uint l = allFeatures.size();

      std::vector<std::set<QString> > featureVectorOneOutNew;

      for(uint i = 0; i < l; i++){
        std::set<QString> newEntry;
        for(uint j = 0; j < l; j++){
          std::set<QString>::iterator it = allFeatures.begin();
          for(uint k = 0; k < j; k++) it++;
          if(j != i){
            newEntry.insert(*it);
            qDebug() << "e " << *it;
          }
        }
        //std::cout << " end " << std::endl;
        featureVectorOneOutNew.push_back(newEntry);
      }
      // also add the full list?

      return featureVectorOneOutNew;

    }

    std::vector<std::set<QString> > SVMClassifier::featureEnlargedSetCreation(std::set<QString> allCurrentFeatures, std::set<QString> allFeatures)
    {
      uint l = allFeatures.size();

      std::vector<std::set<QString> > featureVectorOneOutNew;

      for(uint i = 0; i < l; i++){
        std::set<QString> newEntry;

        // copy current features
        forall(QString p, allCurrentFeatures){
          newEntry.insert(p);
        }

        std::set<QString>::iterator it = allFeatures.begin();
        for(uint j = 0; j < i; j++) it++;

        if(newEntry.find(*it) == newEntry.end()){
          newEntry.insert(*it);
          featureVectorOneOutNew.push_back(newEntry);
        }

        //std::cout << " end " << std::endl;
        
      }
      // also add the full list?

      return featureVectorOneOutNew;

    }

    void SVMClassifier::attemptFeatureRem(std::set<QString> currentFeatureSet, std::set<QString> allFeatures, bool fullParameterSearch,
    	double& bestCV, std::set<QString>& bestFeatureSet)
    {


    // reduce feature set by one, create new CVs
    std::vector<std::set<QString> > oneOutFeatures = featureSubsetCreation(currentFeatureSet);
    std::cout << "one outs  " << oneOutFeatures.size() << std::endl;
    double bestOneOutCV = 0.0;
    std::set<QString> bestOneOutFeatureSet;

    std::map<int, double> cvValues;
    // TODO Openmp
    #pragma omp parallel for
    for(uint i = 0; i<oneOutFeatures.size(); i++){
      std::cout << "l " << oneOutFeatures.size() << std::endl;
      //forall(QString f, oneOutFeatures[i]) qDebug() << f << "/";
      //double cv = 0.0;

      svm_problem probTest;
      svm_node *x_spaceTest;
      svm_parameter parmsTest;
      copyParms(param, parmsTest);
      generateDataSVM(allData, probTest, oneOutFeatures[i], x_spaceTest);

      //double *target = Malloc(double, probTest.l);
      std::cout << "now cv  " << std::endl;
      //svm_cross_validation(&probTest, &param, nr_fold, target);   //Call to cross validation function in SVM.cpp. Refer SVM.cpp in libsvm
      if(fullParameterSearch){
        cvValues[i] = optimizeSVMParameter(probTest, parmsTest);
      } else {
      	cvValues[i] = doCrossValidation(probTest, parmsTest, nr_fold);
      }

      //std::cout << "S" << oneOutFeatures[i].size() << "CV " << cv << std::endl;
      // calc CV

    }

    for(uint i = 0; i<oneOutFeatures.size(); i++){
      if(bestOneOutCV < cvValues[i]){
        bestOneOutCV = cvValues[i];
        bestOneOutFeatureSet = oneOutFeatures[i];
      }
    }
    bestCV = bestOneOutCV;
    bestFeatureSet = bestOneOutFeatureSet;

    }

    std::set<QString> SVMClassifier::featureReduction(std::set<QString> allFeatures, bool fullParameterSearch)
    {
      std::cout << "feature Reduction ----------------" << std::endl;

      // optimize C/gamma for full feature set, obtain CV
      std::set<QString> currentFeatureSet = allFeatures;
      std::cout << "all feat  " << allFeatures.size() << std::endl;
      // CV!

      svm_problem probAll;
      svm_node *x_spaceAll;
      generateDataSVM(allData, probAll, allFeatures, x_spaceAll);

      const char *error_msg;
      error_msg = svm_check_parameter(&probAll, &param);
      if(error_msg) std::cout<<"ERROR: \n" << *error_msg;

      //double *target = Malloc(double, probAll.l);

      double currentCV = doCrossValidation(probAll, param, nr_fold);

      bool loopAgain = false;
      //double eps = 0.01; // max worse cv when using fewer features.

      do {
        /*
        // reduce feature set by one, create new CVs
        std::vector<std::set<QString> > oneOutFeatures = featureSubsetCreation(currentFeatureSet);
        std::cout << "one outs  " << oneOutFeatures.size() << std::endl;
        double bestOneOutCV = 0.0;
        std::set<QString> bestOneOutFeatureSet;
        for(uint i = 0; i<oneOutFeatures.size(); i++){
          std::cout << "l " << oneOutFeatures.size() << std::endl;
          double cv = 0.0;

          svm_problem probTest;
          svm_node *x_spaceTest;
          generateDataSVM(allData, probTest, oneOutFeatures[i], x_spaceTest);

          //double *target = Malloc(double, probTest.l);
          std::cout << "now cv  " << std::endl;
          //svm_cross_validation(&probTest, &param, nr_fold, target);   //Call to cross validation function in SVM.cpp. Refer SVM.cpp in libsvm
          if(fullParameterSearch){
            cv = optimizeSVMParameter(probTest, param);
          } else {
          	cv = doCrossValidation(probTest, param);
          }
          
          std::cout << "S" << oneOutFeatures[i].size() << "CV " << cv << std::endl;
          // calc CV
          if(bestOneOutCV < cv){
            bestOneOutCV = cv;
            bestOneOutFeatureSet = oneOutFeatures[i];
          }
        }
        */
        double bestCV = 0.0;
        std::set<QString> bestFeatureSet;
        attemptFeatureRem(currentFeatureSet, allFeatures, fullParameterSearch, bestCV, bestFeatureSet);

        std::cout << "prev best  " << currentCV << " / new best " << bestCV << std::endl;
        if(bestCV > currentCV - featureThreshold){ // found sth better
          std::cout << "new round " << featureThreshold << std::endl;
          currentFeatureSet = bestFeatureSet;
          currentCV = bestCV;
          if(currentFeatureSet.size() > 1) loopAgain = true;
        } else { // nothing better
          loopAgain = false;
        }

      } while(loopAgain); // loop until no improvement

      modelFeatures = currentFeatureSet;
      return currentFeatureSet;

    }

    void SVMClassifier::attemptFeatureAdd(std::set<QString> currentFeatureSet, std::set<QString> allFeatures, bool fullParameterSearch,
    	double& bestCV, std::set<QString>& bestFeatureSet)
    {


    // reduce feature set by one, create new CVs
    std::vector<std::set<QString> > oneOutFeatures = featureEnlargedSetCreation(currentFeatureSet, allFeatures);
    std::cout << "one outs  " << oneOutFeatures.size() << std::endl;
    double bestOneOutCV = 0.0;
    std::set<QString> bestOneOutFeatureSet;

    std::map<int, double> cvValues;
    // TODO Openmp
    #pragma omp parallel for
    for(uint i = 0; i<oneOutFeatures.size(); i++){
      //std::cout << "l " << oneOutFeatures.size() << std::endl;
      //forall(QString f, oneOutFeatures[i]) qDebug() << f << "/";
      //double cv = 0.0;

      svm_problem probTest;
      svm_node *x_spaceTest;
      svm_parameter parmsTest;
      std::cout << "copy " << i << std::endl;
      copyParms(param, parmsTest); 
      std::cout << "gen " << i << std::endl;
      generateDataSVM(allData, probTest, oneOutFeatures[i], x_spaceTest);

      //double *target = Malloc(double, probTest.l);
      //std::cout << "now cv  " << std::endl;
      //svm_cross_validation(&probTest, &param, nr_fold, target);   //Call to cross validation function in SVM.cpp. Refer SVM.cpp in libsvm
      std::cout << "parm " << i << std::endl;
      if(fullParameterSearch){
        cvValues[i] = optimizeSVMParameter(probTest, parmsTest);
      } else {
      	cvValues[i] = doCrossValidation(probTest, parmsTest, nr_fold);
      }
std::cout << "done  " << i << std::endl;
      //std::cout << "S" << oneOutFeatures[i].size() << "CV " << cv << std::endl;
      // calc CV

    }

    for(uint i = 0; i<oneOutFeatures.size(); i++){
      if(bestOneOutCV < cvValues[i]){
        bestOneOutCV = cvValues[i];
        bestOneOutFeatureSet = oneOutFeatures[i];
      }
    }
    bestCV = bestOneOutCV;
    bestFeatureSet = bestOneOutFeatureSet;

    }



    std::set<QString> SVMClassifier::featureInclusion(std::set<QString> allFeatures, bool fullParameterSearch)
    {
      std::cout << "feature Inclusion ----------------" << std::endl;

      // optimize C/gamma for full feature set, obtain CV
      std::set<QString> currentFeatureSet;
      std::cout << "all feat  " << allFeatures.size() << std::endl;
      // CV!

      //const char *error_msg;
      //error_msg = svm_check_parameter(&probAll, &param);
      //if(error_msg) std::cout<<"ERROR: \n" << *error_msg;

      //double *target = Malloc(double, probAll.l);

      double currentCV = 0.0;//doCrossValidation(probAll, param);

      bool loopAgain = false;
      //double eps = 0.01; // max worse cv when using fewer features.

      do {
        std::cout << "new loop " << std::endl;
        /*
        // reduce feature set by one, create new CVs
        std::vector<std::set<QString> > oneOutFeatures = featureEnlargedSetCreation(currentFeatureSet, allFeatures);
        std::cout << "one outs  " << oneOutFeatures.size() << std::endl;
        double bestOneOutCV = 0.0;
        std::set<QString> bestOneOutFeatureSet;

        std::map<int, double> cvValues;
        // TODO Openmp
        #pragma omp parallel for
        for(uint i = 0; i<oneOutFeatures.size(); i++){
          std::cout << "l " << oneOutFeatures.size() << std::endl;
          //forall(QString f, oneOutFeatures[i]) qDebug() << f << "/";
          double cv = 0.0;

          svm_problem probTest;
          svm_node *x_spaceTest;
          generateDataSVM(allData, probTest, oneOutFeatures[i], x_spaceTest);

          //double *target = Malloc(double, probTest.l);
          std::cout << "now cv  " << std::endl;
          //svm_cross_validation(&probTest, &param, nr_fold, target);   //Call to cross validation function in SVM.cpp. Refer SVM.cpp in libsvm
          if(fullParameterSearch){
            cvValues[i] = optimizeSVMParameter(probTest, param);
          } else {
          	cvValues[i] = doCrossValidation(probTest, param);
          }

          //std::cout << "S" << oneOutFeatures[i].size() << "CV " << cv << std::endl;
          // calc CV

        }

        for(uint i = 0; i<oneOutFeatures.size(); i++){
          if(bestOneOutCV < cvValues[i]){
            bestOneOutCV = cvValues[i];
            bestOneOutFeatureSet = oneOutFeatures[i];
          }
        }
        */
        double bestCV = 0.0;
        std::set<QString> bestFeatureSet;
        attemptFeatureAdd(currentFeatureSet, allFeatures, fullParameterSearch, bestCV, bestFeatureSet);

        std::cout << "prev best  " << currentCV << " / new best " << bestCV << std::endl;
        if(bestCV > currentCV){ // found sth better
          std::cout << "new round " << featureThreshold << std::endl;
          loopAgain = true;
          currentFeatureSet = bestFeatureSet;
          currentCV = bestCV;
          if(currentCV > 0.9999) loopAgain = false;
        } else { // nothing better
          loopAgain = false;
        }
        std::cout << "end loop " << std::endl;
      } while(loopAgain); // loop until no improvement

      modelFeatures = currentFeatureSet;
      return currentFeatureSet;

    }


    std::set<QString> SVMClassifier::geneticFeatureSelection(std::set<QString> allFeatures)
    {
      std::cout << "genetic feature Selection ----------------" << std::endl;

      // optimize C/gamma for full feature set, obtain CV
      std::set<QString> currentFeatureSet;
      std::cout << "all feat  " << allFeatures.size() << std::endl;
      // CV!

      double currentCV = 0.0;

      //bool loopAgain = false;
      //double eps = 0.01; // max worse cv when using fewer features.

      // select a random subset of the features
      std::map<double, QString> randomSelector;
      forall(QString f, allFeatures){
      	randomSelector[std::rand()] = f;
      }

      std::map<double, QString>::iterator it = randomSelector.begin();
      while(currentFeatureSet.size() < 5 and it != randomSelector.end()){
        it++;
        currentFeatureSet.insert((*it).second);
      }

      int loopsNoChange = 0;

      do {
        // try to add a feature
        double bestCV = 0.0;
        std::set<QString> bestFeatureSet;
        attemptFeatureAdd(currentFeatureSet, allFeatures, false, bestCV, bestFeatureSet);
        if(bestCV > currentCV){ // found sth better
          currentFeatureSet = bestFeatureSet;
          currentCV = bestCV;
          loopsNoChange = 0;
        } else {
          loopsNoChange++;
        }
          forall(QString q, currentFeatureSet){
            qDebug() << q;
          }

        // try to remove a feature
        bestCV = 0.0;
        //std::set<QString> bestFeatureSet;
        attemptFeatureRem(currentFeatureSet, allFeatures, false, bestCV, bestFeatureSet);
        if(bestCV > currentCV){ // found sth better
          currentFeatureSet = bestFeatureSet;
          currentCV = bestCV;
          loopsNoChange = 0;
        } else {
          loopsNoChange++;
        }
          forall(QString q, currentFeatureSet){
            qDebug() << q;
          }
        // try to adjust C/gamma
        svm_problem probTest;
        svm_node *x_spaceTest;
        generateDataSVM(allData, probTest, currentFeatureSet, x_spaceTest);

        optimizeSVMParameter(probTest, param, 3, param.C/2., param.C*2.,
    	param.gamma/2., param.gamma*2.);

        std::cout << "param  " << param.C << "/" << param.gamma << std::endl;

      } while(loopsNoChange < 10); // loop until no improvement

      modelFeatures = currentFeatureSet;
      return currentFeatureSet;

    }


    // initialize parms
    bool SVMClassifier::initialize(QString svmType, QString kernelType, double pGamma, double pC, double featureTh, int kFold)
    {

      nr_fold = kFold;
      featureThreshold = featureTh;

      if(svmType == "nu_svc") param.svm_type = NU_SVC;
      else if(svmType == "one_class") param.svm_type = ONE_CLASS;
      else if(svmType == "epsilon_svr") param.svm_type = EPSILON_SVR;
      else if(svmType == "nu_svr") param.svm_type = NU_SVR;
      else  param.svm_type = C_SVC;

      if(kernelType == "precomputed") param.kernel_type = PRECOMPUTED;
      else if(kernelType == "linear") param.kernel_type = LINEAR;
      else if(kernelType == "polynomial") param.kernel_type = POLY;
      else if(kernelType == "sigmoid") param.kernel_type = SIGMOID;
      else param.kernel_type = RBF;

      param.degree = 3;
      param.gamma = pGamma;   //gamma value for Cost SVM
      param.coef0 = 0;
      param.nu = 0.5;
      param.cache_size = 100;
      param.C = pC;
      param.eps = 1e-3;
      param.p = 0.1;
      param.shrinking = 1;
      param.probability = 1;
      param.nr_weight = 0;
      param.weight_label = NULL;
      param.weight = NULL;   

      return true;
    }



    double SVMClassifier::optimizeSVMParameter(svm_problem& svmProb, svm_parameter& svmParam)
    {
      double cStart = 1;
      double cStop = 10000;

      double gammaStart = 0.01;
      double gammaStop = 5;

      return optimizeSVMParameter(svmProb, svmParam, optGridSize, cStart, cStop, gammaStart, gammaStop);
    }
    double SVMClassifier::optimizeSVMParameter(svm_problem& svmProb, svm_parameter& svmParam, int gridSize, double cStart, double cStop,
    	double gammaStart, double gammaStop)
    {

          // Tune the SVM model by changing C and gamma
          double cross_validation = 0;
          double Optimized_C = HUGE_VAL, max_cv = 0, Optimized_gamma = 0;

          double cFactor = std::pow(cStop/cStart, 1/(double)gridSize);
          double gammaFactor = std::pow(gammaStop/gammaStart, 1/(double)gridSize);

          //if(optimizeParms){
            for(double c = cStart; c < cStop; c*= cFactor){
              for(double gamma = gammaStart; gamma < gammaStop; gamma*= gammaFactor){
                svmParam.C = c;
                svmParam.gamma = gamma;
                //std::cout << "cross val " << std::endl;
                cross_validation = doCrossValidation(svmProb, svmParam, nr_fold);       //Use cross validation to find the best parameters
                if(cross_validation > max_cv){
                  max_cv = cross_validation;
                  Optimized_C = svmParam.C;      //Select the optimized C and gamma
                  Optimized_gamma = svmParam.gamma;
                }
              }
            }
            if(Optimized_C > 0){        //If the model is optimized
              svmParam.C = Optimized_C;
              svmParam.gamma = Optimized_gamma;
            }
          //} else { // dont optimize, use user specified values
          //}
      std::cout << "Final C/Gamma//Cross validation "<< svmParam.C << " / "<< svmParam.gamma <<" // "<< max_cv << std::endl;
      if(max_cv < 0.6){
        std::cout << "Warning: the cross validation accuracy is very low. Consider re-training!" << std::endl;
      }
      return max_cv;
    }


    double SVMClassifier::doCrossValidation(svm_problem& svmProb, svm_parameter& svmParam, int nrFold)
      {
        double cross_validation = 0.;
        int total_correct = 0;
        double total_error = 0;
        double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
        double *target = Malloc(double, svmProb.l);
//std::cout << "doCrossValidation" << std::endl;
        svm_cross_validation(&svmProb, &svmParam, nrFold, target);   //Call to cross validation function in SVM.cpp. Refer SVM.cpp in libsvm
//std::cout << "end doCrossValidation" << std::endl;
        if(svmParam.svm_type == EPSILON_SVR || svmParam.svm_type == NU_SVR) {
            for(int i=0;i<svmProb.l;i++) {
              double y = svmProb.y[i];
              double v = target[i];
              total_error += (v-y)*(v-y);
              sumv += v;
              sumy += y;
              sumvv += v*v;
              sumyy += y*y;
              sumvy += v*y;
            }
            std::cout<<"Cross Validation Mean squared error = " << total_error/svmProb.l << std::endl;
            std::cout<<"Cross Validation Squared correlation coefficient = " <<  ((svmProb.l*sumvy-sumv*sumy)*(svmProb.l*sumvy-sumv*sumy))/
            ((svmProb.l*sumvv-sumv*sumv)*(svmProb.l*sumyy-sumy*sumy)) << std::endl;
        } else {   //SVM type is C_SVC so this part is processed. Refer SVM.cpp in libsvm
            for(int i=0;i<svmProb.l;i++){
                if(target[i] == svmProb.y[i])
                    ++total_correct;
            }
            cross_validation = (double)total_correct/double(svmProb.l);
            //std::cout<<"Cross Validation Accuracy = "<<(100.0*total_correct/svmProb.l) << " for gamma= " << svmParam.gamma << " /C= " << svmParam.C << std::endl;
        }
        free(target);
        return cross_validation;    //Return the cross validation accruacy 
      }

    void SVMClassifier::freeMemory()
    {
      if(prob.y)
        free(prob.y);
      if(prob.x)
        free(prob.x);
      if(x_space)
        free(x_space);
      if(param.C)
        svm_destroy_param(&param);
    }

  // write the file data into the libSVM object
  bool SVMClassifier::generateDataSVM(FileData& fd, svm_problem& svmProb, std::set<QString>& fileFeatures, svm_node *x_spaceProb)
  {
  modelFeatures = fileFeatures;


  size_t dataEntries = fd.data.size();
  size_t totalMeasureNr = fd.measures.size();

  svmProb.l = fd.size;
  svmProb.y = Malloc(double, svmProb.l);
  svmProb.x = Malloc(struct svm_node *, svmProb.l);

  for(size_t i = 0; i < dataEntries; i++){
    std::vector<double> v = fd.data[i];
    x_spaceProb = Malloc(struct svm_node, svmProb.l);

    svmProb.x[i] = &x_spaceProb[0];
    svmProb.y[i] = fd.dataLabels[i];

    int counter = 0;
    for(size_t j = 0; j< totalMeasureNr; j++){
      if(fileFeatures.find(fd.measures[j])!=fileFeatures.end()){
      	//qDebug() << "Found " << fd.measures[j] << "\n";
        x_spaceProb[counter].index = counter;
        x_spaceProb[counter].value = v[j];
        counter++;
      } else {
      	//qDebug() << "Not Found: " << fd.measures[j] << "\n";
      }
    }
    x_spaceProb[counter+1].index = -1;
  }

  return true;
  }



  bool FileHandlerSVM::readTrainingFile(FileData& fd, QString filename/*, svm_problem& prob, std::set<QString>& fileFeatures, svm_node *x_space*/)
  {
    // overwrite fd
    FileData fdNew;
    fd = fdNew;

    QFile file(filename);
    if(!file.open(QIODevice::ReadOnly))
    {
        return false;//setErrorMessage("Process aborted: Could not open file.");
    }
    std::cout << "Train Init Debug: " << __LINE__ << std::endl;
    QTextStream in(&file);
    int rowSize = 0;

    int size = in.readLine().toInt();
    std::cout<<"size of features\t" << size << std::endl;
    if(size){
        while(size > 0){     //Define the number of rows in the training data file
            QString line = in.readLine();
            fd.measures.push_back(line);
            size--;
        }
        forall(const QString &name, fd.measures)
          qDebug() << "feature train" << name << "\n";
    }
    std::cout << "Train Read File:" << __LINE__ << std::endl;
    while(!in.atEnd()){
        QString line = in.readLine();
        rowSize++;
    }
    file.close();
    fd.size = rowSize;

    std::cout << "Train rowsize: " << rowSize << std::endl;
    //std::cout<<"\nDebug: "<<__LINE__<<"\n";
    int i = 0;
    bool ok;
    if(!file.open(QIODevice::ReadOnly)) return false;

    QString line = in.readLine();
    if((uint)line.toInt() != fd.measures.size()) std::cout << "wrong!" << std::endl;

    for(uint k = 0; k < fd.measures.size(); k++){    //Read out the parameters as they are not needed
        QString line = in.readLine();
        qDebug()<<"line "<< line;
    }
            std::cout << "Train Read File Values: " << fd.measures.size() << std::endl;
    while(!in.atEnd()){     //Read the training data file
    //std::cout << "Read: " << __LINE__ << "/" << rowSize; 
      QString line = in.readLine();
      QStringList fields = line.split(" ");  
      if(!line.size()) continue;
      //int j = 0, flag = 0;
      
      //x_space = Malloc(struct svm_node, prob.l);
      //prob.x[i] = &x_space[j];        //Prepare training data vectors
      //prob.y[i] = fields[0].toInt(&ok);       //Mention labels of the training data
      int label = fields[0].toInt(&ok);
      fd.labels.insert(label);
      std::vector<double> newEntry;
      fd.dataLabels.push_back(label);
      //newEntry.push_back(label);
      //std::cout << " / " << ok << std::endl;//" / " << prob.x[i] << "/" << prob.y[i] << "/" << fields.size() << std::endl;              
      for(int p = 1; p < fields.size() - 1; p++){ 
       // std::cout << " field " << p << "/" << fields[p].toUtf8().constData(); 
          //if(cc.eliFtrs.find(p) != cc.eliFtrs.end()) continue;    
          QStringList val = fields[p].split(":");
          //std::cout << " kkk " << val[0].toUtf8().constData() << "/" << val[1].toUtf8().constData(); 
          double value = val[1].toDouble(&ok);
          newEntry.push_back(value);
          //++j;   
          //flag = 1;
      }
      if(fields.size()>1){
        //std::cout << " here " << std::endl; 
          //x_space[j++].index = -1;
          i++;
          //std::cout << " done " << std::endl;
          fd.data.push_back(newEntry);
      }
    }
    std::cout << "Close File " << __LINE__ << std::endl;
    file.close();       //Close the file
    std::cout << "Train File Done: " << __LINE__ << std::endl;
    return true;
  }

  bool FileHandlerSVM::balanceTrainingFile(FileData& fd)
  {

    size_t dataEntries = fd.data.size();

    std::map<int,int> labelReqEntriesMap;
    std::map<int,std::vector<std::vector<double> > > labelEntries;

    std::vector<std::vector<double> > newEntries;
    std::vector<int> newLabels;

    // go through existing data, count how many we have already
    for(size_t i = 0; i < dataEntries; i++){
      labelReqEntriesMap[fd.dataLabels[i]]++;
      labelEntries[fd.dataLabels[i]].push_back(fd.data[i]);
    }

    // now go through map and fill missing data
    typedef std::pair<int,int> IntInt;
    int maxCount = -1;
    forall(const IntInt& p, labelReqEntriesMap){
      if(p.second  > maxCount) maxCount = p.second;
    }

    forall(const IntInt& p, labelReqEntriesMap){
      int maxCountLocal = maxCount;
      while(p.second < maxCountLocal){
        // add a random entry

        int randIdx = std::rand() % p.second;
        newLabels.push_back(p.first);
        newEntries.push_back(labelEntries[p.first][randIdx]);
        maxCountLocal--;
      }
    }

    // now add the new entries to the data
    for(size_t i = 0; i< newLabels.size(); i++){
      fd.data.push_back(newEntries[i]);
      fd.dataLabels.push_back(newLabels[i]);
    }
    return true;
  }

  bool FileHandlerSVM::augmentTrainingFile(FileData& fd, int repeats, double noiseFactor)
  {

    //int sampleSize = fd.data.size();
    //int nrOfTypes = fd.labels.size();

    size_t dataEntries = fd.data.size();
    //size_t measureNr = fd.measures.size();

    std::map<int,int> labelReqEntriesMap;
    std::map<int,std::vector<std::vector<double> > > labelEntries;

    std::vector<std::vector<double> > newEntries;
    std::vector<int> newLabels;

    // go through existing data, count how many we have already
    for(size_t i = 0; i < dataEntries; i++){
      for(int r = 0; r < repeats; r++){
        std::vector<double> newEntry;
        forall(double v, fd.data[i]){
          double randomFac = (std::rand() % 200 - 100) * noiseFactor/100. + 1.;
          newEntry.push_back(v * randomFac);
        }
        fd.data.push_back(newEntry);
        fd.dataLabels.push_back(fd.dataLabels[i]);
      }
    }

    return true;
  }

  void FileHandlerSVM::shuffleTrainingFile(FileData& fd)
  {
    size_t dataEntries = fd.data.size();
    std::vector<int> permutationVec(dataEntries);
    for(uint i = 0; i < dataEntries; i++){
      permutationVec[i] = i;
    }
    std::random_shuffle ( permutationVec.begin(), permutationVec.end() );

    FileData fdNew;
    fdNew.measures = fd.measures;
    fdNew.labels = fd.labels;
    fdNew.size = fd.size;

    for(size_t i = 0; i < dataEntries; i++){
      fdNew.data.push_back(fd.data[permutationVec[i]]);
      fdNew.dataLabels.push_back(fd.dataLabels[permutationVec[i]]);
    }
    fd = fdNew;
  }

  void FileHandlerSVM::limitTrainingFile(FileData& fd, int maxEntries)
  {
    size_t dataEntries = fd.data.size();

    FileData fdNew;
    fdNew.measures = fd.measures;
    fdNew.labels = fd.labels;
    fdNew.size = fd.size;

    std::map<int, int> nrParentEntries;

    for(size_t i = 0; i < dataEntries; i++){
      if(nrParentEntries[fd.dataLabels[i]] >= maxEntries) continue;
      fdNew.data.push_back(fd.data[i]);
      fdNew.dataLabels.push_back(fd.dataLabels[i]);
      nrParentEntries[fd.dataLabels[i]]++;
    }
    fd = fdNew;
  }


  bool FileHandlerSVM::createFileData(FileData& fd, std::set<int>& allLabels, QStringList& fileFeatures, 
    CellCluster& cc, IntFloatAttr& parentLabels)
  {

    FileData fdNew;
    fd = fdNew;

    forall(const QString &name, fileFeatures){
      fd.measures.push_back(name);
    }

    fd.size = allLabels.size();

    // now go through all selected cells and write the values
    forall(int label, allLabels){
      std::vector<double> currentDataPoint;
      fd.labels.insert(parentLabels[label]);
      if(label < 1) continue;     
      //if(!QString::compare(trainTest, "Train", Qt::CaseSensitive)){       //Training data format
      if(parentLabels[label]){
        forall(const QString &name, fileFeatures){
          //qDebug() << "write " << name << "/" << cc.measures[name].size() << "\n";
          double val = cc.measures[name][label];
          currentDataPoint.push_back(val);
        }           
      }
      fd.data.push_back(currentDataPoint);
      fd.dataLabels.push_back(parentLabels[label]);

    }

    return true;

  }

  bool FileHandlerSVM::writeTrainingFile(FileData& fd, QString filename, bool append)
  {

    std::set<QString> fileFeatures;

    if(filename.isEmpty())
        return false;
    if(!filename.endsWith(".txt", Qt::CaseInsensitive))
        filename += ".txt";
    QFile file(filename);
    // first get the selected measures
    if(!append){   //Depending on choice create or append file
        if(!file.open(QIODevice::WriteOnly)) 
            std::cout<<"Cannot Create a new file";
    } else { // Append
        if(!file.open(QIODevice::ReadOnly))
            std::cout<<"Cannot append to the file";
        //cc.SelectedFeaturesName.clear();
        // grab the selected measures from the existing file
        QTextStream in(&file);
        int size = in.readLine().toInt();
        //std::cout<<"\nsize of features\t" << size;
        if(size){
          while(size > 0){     //Define the number of rows in the training data file
              QString line = in.readLine();
              //cc.SelectedFeaturesName.insert(line);
              fileFeatures.insert(line);
              size--;
          }
          forall(const QString &name, fileFeatures/*cc.SelectedFeaturesName*/){
              qDebug() << "\n Append feature " << name << "\n";
          }
        } 
        file.close();
        if(!file.open(QIODevice::WriteOnly | QIODevice::Append))
            std::cout<<"Cannot append to the file";

    }

    // if new file write the selected measure names at the top
    QTextStream out(&file);
    if(!append){
        out << fd.measures.size() << endl;      //Write the row size as the first line
        forall(const QString &name, fd.measures){
            out << name << endl;
        }
    }

    // now go through all selected cells and write the values
    for(size_t i = 0; i< fd.dataLabels.size(); i++){
        out << fd.dataLabels[i] <<" ";
        for(size_t j = 0; j< fd.data[i].size(); j++){
          out << j << ":"<< fd.data[i][j] <<" ";
        }
      out << fd.data[i].size() << ":"<< "-1";
      out << endl;               
    }
    file.close();  
    
    return true;   

  }

}