#include "paraminbfgs.h"
// Check compute direction vector, how use invhess[i][j], i, j OK???

// ********************************************************
// functions for class ParaminBFGS
// ********************************************************
ParaminBFGS::ParaminBFGS(NetInterface* netInt) : ParaminSearch(netInt) {
  type = OPTBFGS;
    // Vector temp(numvar);
  lineS = new Armijo();   // use lineS to do linesearch
  grad = new NetGradient(numvar);            // use grad to compute gradient
  // not sure I need to resize here, need to check...
  deltax.resize(numvar, 0.0);                          // xi-xim1
  h.resize(numvar, 0.0);                               // the line search vector
  gim1.resize(numvar, 0.0);                            // store previous gradient
  invhess.AddRows(numvar, numvar, 0.0);
      /*invhess = new double*[numvar];
  int i;
  for (i = 0; i < numvar; i++)
    invhess[i] = new double[numvar];
      */
  // xopt.resize(numvar, 1);
  // iter = 0;
  // default values
  shannonScaling = 0;
  bfgs_constant = 1;
  armijoproblem = 0;
  to_print = 0;
  errortol = 0.01;
  xtol = 0.000001;
  maxrounds = 5;
  initial_difficultgrad = 1;
  // If want to set default value separately
  maxiterations = 200;
  // converged = 0;
}

ParaminBFGS::~ParaminBFGS() {
  int i;
  /*
  for (i = 0; i < numvar; i++)
    delete[] invhess[i];
  delete[] invhess;
  */
  delete grad;
  delete lineS;
}

void ParaminBFGS::read(CommentStream& infile, char* text) {
  int i = 0;
  double temp;

  while (!infile.eof() && strcasecmp(text, "seed") && strcasecmp(text, "[hooke]") && strcasecmp(text, "[simann]")) {
    infile >> ws;
    if (strcasecmp(text, "shannonscaling") == 0) {
      infile >> shannonScaling;

    } else if (strcasecmp(text, "difficultgrad") == 0) {
      infile >> initial_difficultgrad;

    } else if (strcasecmp(text, "bfgspar") == 0) {
      infile >> bfgs_constant;

    } else if ((strcasecmp(text, "maxiterations") == 0) || (strcasecmp(text, "maxiter") == 0) || (strcasecmp(text, "bfgsiter") == 0))  {
      infile >> maxiterations;

    } else if ((strcasecmp(text, "eps") == 0) || (strcasecmp(text, "bfgseps") == 0) || (strcasecmp(text, "errortol") == 0)) {
      infile >> errortol;

    } else if (strcasecmp(text, "xtol") == 0) {
      infile >> xtol;

    } else if ((strcasecmp(text, "maxrounds") == 0) || (strcasecmp(text, "bfgsrounds") == 0)) {
      infile >> maxrounds;

    } else if (strcasecmp(text, "printing") == 0) {
      infile >> to_print;

    } else if (strcasecmp(text, "sigma") == 0) {
      infile >> temp;
      lineS->setSigma(temp);

    } else if (strcasecmp(text, "beta") == 0) {
      infile >> temp;
      lineS->setBeta(temp);

    } else if ((strcasecmp(text, "gradacc") == 0) || (strcasecmp(text, "gradstep") == 0) || (strcasecmp(text, "st") == 0) || (strcasecmp(text, "step") == 0) || (strcasecmp(text, "scale") == 0)) {
      cout << "BFGS - " << text << " is not used in paramin" << endl;
      infile >> ws;

    } else {
      cerr << "Error while reading optinfo for bfgs - unknown option " << text << endl;
      exit(EXIT_FAILURE);
    }
    i++;
    infile >> text;
  }

  if (i == 0)
    cerr << "Warning - no optinfo give for bfgs in file - using default parameters" << endl;
}

void ParaminBFGS::iteration() {
  double alpha;
  doLineseek();

  //JMB - changed to check if a double is very close to zero
  alpha = lineS->getAlpha();
  if (isZero(alpha)) {
    armijoproblem++;
    difficultgrad++;
    bfgsFail = -2;
    if (armijoproblem > 1)
      bfgsFail = 6;
  }

  if (alpha < 0.0) {
    // negative alpha indicates wrong derivative - Hessian wrong
    bfgsFail = 1;
  } else {
    UpdateXandGrad();
    // prevy = y;
    ComputeGradient();

    normgrad = grad->getNormGrad();
    //cout << "got normgrad: " << normgrad << endl;
    gi = grad->getGradient();

    if (difficultgrad >= 1)
      diaghess = grad->getDiagonalHessian();

    error = normgrad / (1.0 + fabs(bestf));
    if (bfgsFail != 6 && bfgsFail != -2) {
      if (bfgs_constant == 1) {
        int update = bfgsUpdate();
        if (update == 5)
          bfgsFail = update;
      }
    }
    normx = sqrt(normx);
    // relchng = (prevy - y) / (1.0 + ABSOFNUM(prevy));
  }

  if (bfgsFail <= 0) {
    if (error <= errortol) {
      bfgsFail = 0;
    } else if ((normdeltax < xtol) && (bfgsFail != -2)) {
      bfgsFail = 2;
    } else
      bfgsFail = -1;
  }
}

void ParaminBFGS::ComputeGradient() {
  int i;

  i = grad->computeGradient(net, bestx, bestf, difficultgrad);
  int tmp = grad->getDifficultGrad();
  difficultgrad = tmp;
  if (i == net->netError()) {
    cerr << "Error in BFGS - did not receive gradient data\n";
    this->printX(net->unscaleX(bestx));
    net->stopNetComm();
    exit(EXIT_FAILURE);
  }
}

void ParaminBFGS::doLineseek() {
  s = -1e10;
  double low = GetS(0);
  double upp = GetS(1);
  if (upp <= 0.0) {
    cerr << "Warning in BFGS - upperbound for alpha is negative\n";
    upp = 1.0;
  }
  s = upp;
  if (shannonScaling == 1) {
    if (iters == 0 || upp < 1.0)
      s = upp;
    else
      s = 1.0;
  }
  lineS->doArmijo(bestx, bestf, dery, h, net, min(s, 1.0));
}

void ParaminBFGS::ScaleDirectionVector() {
  int i;
  dery *= normdeltax / normh;
  for (i = 0; i < numvar; i++)
    h[i] *= normdeltax / normh;
}

void ParaminBFGS::ComputeDirectionVector() {
  int i, j;
  for (i = 0; i < numvar; i++) {
    h[i] = 0;
    for (j = 0; j < numvar; j++)
      h[i] -= invhess[i][j] * gi[j];
  }
}

void ParaminBFGS::DefineHessian() {
  int i, j;
  for (i = 0; i < numvar; i++) {
    // start with the identity matrix
    for (j = 0; j < numvar; j++)
      invhess[i][j] = 0.0;

    // set diagonal only if didn't get from grad
    if ((difficultgrad >= 1) && (bfgs_constant == 1)) {
      if (diaghess[i] > 0)
        invhess[i][i] = 1 / diaghess[i];
      else
        invhess[i][i] = 1.0;

    } else
      invhess[i][i] = 1.0;

    if (invhess[i][i] < 0.0) {
      cerr << "Error in BFGS - negative inverse hessian\n";
      break;
    }
  }
}

void ParaminBFGS::UpdateXandGrad() {
  int i;
  double xi;
  normdeltax = 0.0;
  DoubleVector temp(lineS->getBestX());
  for (i = 0; i < numvar; i++) 
      deltax[i] = temp[i] - bestx[i];
  bestx = temp;
  bestf = lineS->getBestF();
  // temp = gi;
  // gim1 = temp;
  gim1 = gi;
  // must check if this is still behaving correctly with double vector..
  normdeltax = (deltax * deltax);
}

int ParaminBFGS::bfgsUpdate() {
  int i, j;

   /* prepare the BFGS update */
  double deltaxg = 0.0;
  double deltaghg = 0.0;
  DoubleVector hg(numvar);
  DoubleVector deltag(numvar);
  normx = 0.0;

  for (i = 0; i < numvar; i++) {
    hg[i] = 0.0;
    deltag[i] = gi[i] - gim1[i];
    deltaxg += deltax[i] * deltag[i];
    normx += bestx[i] * bestx[i];
  }

  if (deltaxg <= 0.0)
    cerr << "Warning in BFGS - instability error\n";
  // Must check that invhess is used correctly [i][j] or [j][i]
  for (i = 0; i < numvar; i++) {
    for (j = 0; j < numvar; j++)
      hg[i] += invhess[i][j] * deltag[j];

    deltaghg += deltag[i] * hg[i];
  }

  /* do the BFGS update */
  for (i = 0; i < numvar; i++) {
    for (j = 0; j < numvar; j++) {
      invhess[i][j] += (deltax[i] * deltax[j] / deltaxg) - (hg[i] * hg[j] /
        deltaghg) + deltaghg * (deltax[i] / deltaxg - hg[i] / deltaghg) *
        (deltax[j] / deltaxg - hg[j] / deltaghg);
    }
  }

  if (deltaxg <= 0)
    return 5;
  else
    return -999;
}

double ParaminBFGS::norm() {
  int i;
  double normx;
  normx = 0.0;
  /*
  for (i = 0; i < numvar; i++)
    normx += bestx[i] * bestx[i];
  */
  normx = bestx*bestx;
  normx = sqrt(normx);
  return normx;
}

void ParaminBFGS::printGradient() {
    int i;
  ofstream outputfile;
  outputfile.open("gradient");
  if (!outputfile) {
    cerr << "Error in BFGS - could not print gradient data\n";
    printX(gi);
    net->stopNetComm();
    exit(EXIT_FAILURE);
  }
  printX(outputfile, gi);
  
  outputfile.close();
}

void ParaminBFGS::printInverseHessian() {
  int i, j;
  ofstream outputfile;
  outputfile.open("hessian");
  if (!outputfile) {
    cerr << "Error in BFGS - could not print hessian\n";
    exit(EXIT_FAILURE);
  }
  invhess.Print(outputfile);
  /*
  for (i = 0; i < numvar; i++) {
    for (j = 0; j < numvar; j++)
      outputfile << invhess[i][j] << " ";
    outputfile << endl;
  }
  */
  outputfile.close();
}

double ParaminBFGS::GetS(int get) {
  double b = 1.e69;
  double a = -1.e69;

  DoubleVector alpha_l(numvar);
  DoubleVector alpha_u(numvar);

  int i;
  for (i = 0; i < numvar; i++) {
    if (h[i] > 0) {
      alpha_l[i] = (lowerbound[i] - bestx[i]) / h[i];
      alpha_u[i] = (upperbound[i] - bestx[i]) / h[i];
    } else if (h[i] < 0) {
      alpha_u[i] = (lowerbound[i] - bestx[i]) / h[i];
      alpha_l[i] = (upperbound[i] - bestx[i]) / h[i];
    } else {
      alpha_u[i] = b;
      alpha_l[i] = a;
    }
    a = max(a, alpha_l[i]);
    b = min(b, alpha_u[i]);
  }

  if (get == 0)
    return a;
  else if (get == 1)
    return b;
  else {
    cerr << "Error in BFGS - unrecognised return value\n";
    exit(EXIT_FAILURE);
  }
}


void ParaminBFGS::SetInitialValues() {
  // For the first iteration, define the parameters
  // and the Hessian matrix, the gradient and the direction vector
  armijoproblem = 0;
  if (difficultgrad != initial_difficultgrad) {
    difficultgrad = initial_difficultgrad;
    grad->initializeDiagonalHessian();
    ComputeGradient();
    normgrad = grad->getNormGrad();

    if (difficultgrad >= 1)
      diaghess = grad->getDiagonalHessian();

    gi = grad->getGradient();
    normx = norm();
  }
  DefineHessian();
}

void ParaminBFGS::UpdateValues() {
  bfgsFail = 0;
  int i;

  normdeltax = 1.0;
  ComputeDirectionVector();
  dery = 0.0;
  normh = 0.0;
  for (i = 0; i < numvar; i++) {
    normh += h[i] * h[i];
    dery += gi[i] * h[i];
  }

  // if scaling of direction vector is to be done, then do it here.
  if (iters <= numvar && shannonScaling == 1)
    ScaleDirectionVector();
  if (dery > 0) {
    cerr << "Error in BFGS - the derivative is positive\n";
    bfgsFail = 4;
  }
  error = normgrad / (1.0 + fabs(bestf));
}

void ParaminBFGS::OptimiseLikelihood() {
  int rounds = 0;
  int numrounds = 0;

  bestx = net->getInitialX();
  bestf = net->getScore();
  difficultgrad = -1;

  // Loop over BFGS iterations
  // continue until tolerance criterion is satisfied
  // or give up because x's do not move -- only valid if function does not
  // change either and must do one iteration

  for (rounds = 0; rounds < maxrounds; rounds++) {
    bfgsFail = -1;
    iters = 0;
    SetInitialValues();
    while ((iters < maxiterations) && (bfgsFail < 0)) {
      this->UpdateValues();
      this->iteration();
      
      net->setInitialScore(bestx, bestf);
      iters++;
    }

    if ((iters == maxiterations) && (bfgsFail != 0))
      cout << "During BFGS - Quit this round because have reached maxiterations." << endl;

    if (bfgsFail == 0) {
      numrounds = rounds;
      rounds = maxrounds - 1;
    }
    if (bfgsFail == 1)
      cerr << "BFGS - failed because alpha < 0" << endl;;
    if (bfgsFail == 2)
      cerr << "BFGS - failed because normdeltax < xtol" << endl;
    if (bfgsFail == 4)
      cerr << "BFGS - failed because dery > 0" << endl;
    if (bfgsFail == 5)
      cerr << "BFGS - failed because of instability error" << endl;
    if (bfgsFail == 6)
      cerr << "BFGS - failed because could not get alpha from linesearch, alpha == 0" << endl;
    if (bfgsFail == -2)
      cerr << "BFGS - Could not get alpha from linesearch but will continue and increase difficultgrad" << endl;

    if (rounds < (maxrounds - 1))
      cout << "Starting a new round. Still have " << maxrounds - rounds - 1 << " to go." << endl;
  }

  if ((rounds >= maxrounds) &&  (bfgsFail != 0)) {
    cout << "\nBFGS optimisation completed after " << rounds << " rounds (max " << maxrounds << ") and " << iters << " iterations (max " << maxiterations << ")\nThe model terminated because the maximum number of rounds was reached\n";
  } else if (bfgsFail == 0)  {
    cout << "\nStopping BFGS \n\nThe optimisation stopped after " << numrounds << " rounds (max " << maxrounds << ") and " << iters << " iterations  (max " << maxiterations << ")\nThe optimisation stopped because an optimum was found for this run\n";
    converge = 1;
  }
  score = bestf;
  if (to_print) {
    this->printGradient();
    this->printInverseHessian();
  }
}
void ParaminBFGS::Print(ofstream& outfile, int prec) {
	outfile << "; BFGS algorithm ran for " << iters
	    << " function evaluations\n; and stopped when the likelihood value was "
	    << setprecision(prec) << score;
	  if (converge == -1)
	    outfile << "\n; because an error occured during the optimisation\n";
	  else if (converge == 1)
	    outfile << "\n; because the convergence criteria were met\n";
	  else
	    outfile << "\n; because the maximum number of function evaluations was reached\n";
}