#include "paraminbfgs.h"
// Check compute direction vector, how use invhess[i][j], i, j OK???
// ********************************************************
// functions for class ParaminBFGS
// ********************************************************
ParaminBFGS::ParaminBFGS(NetInterface* netInt) : ParaminSearch(netInt) {
type = OPTBFGS;
// Vector temp(numvar);
lineS = new Armijo(); // use lineS to do linesearch
grad = new NetGradient(numvar); // use grad to compute gradient
// not sure I need to resize here, need to check...
deltax.resize(numvar, 0.0); // xi-xim1
h.resize(numvar, 0.0); // the line search vector
gim1.resize(numvar, 0.0); // store previous gradient
invhess.AddRows(numvar, numvar, 0.0);
/*invhess = new double*[numvar];
int i;
for (i = 0; i < numvar; i++)
invhess[i] = new double[numvar];
*/
// xopt.resize(numvar, 1);
// iter = 0;
// default values
shannonScaling = 0;
bfgs_constant = 1;
armijoproblem = 0;
to_print = 0;
errortol = 0.01;
xtol = 0.000001;
maxrounds = 5;
initial_difficultgrad = 1;
// If want to set default value separately
maxiterations = 200;
// converged = 0;
}
ParaminBFGS::~ParaminBFGS() {
int i;
/*
for (i = 0; i < numvar; i++)
delete[] invhess[i];
delete[] invhess;
*/
delete grad;
delete lineS;
}
void ParaminBFGS::read(CommentStream& infile, char* text) {
int i = 0;
double temp;
while (!infile.eof() && strcasecmp(text, "seed") && strcasecmp(text, "[hooke]") && strcasecmp(text, "[simann]")) {
infile >> ws;
if (strcasecmp(text, "shannonscaling") == 0) {
infile >> shannonScaling;
} else if (strcasecmp(text, "difficultgrad") == 0) {
infile >> initial_difficultgrad;
} else if (strcasecmp(text, "bfgspar") == 0) {
infile >> bfgs_constant;
} else if ((strcasecmp(text, "maxiterations") == 0) || (strcasecmp(text, "maxiter") == 0) || (strcasecmp(text, "bfgsiter") == 0)) {
infile >> maxiterations;
} else if ((strcasecmp(text, "eps") == 0) || (strcasecmp(text, "bfgseps") == 0) || (strcasecmp(text, "errortol") == 0)) {
infile >> errortol;
} else if (strcasecmp(text, "xtol") == 0) {
infile >> xtol;
} else if ((strcasecmp(text, "maxrounds") == 0) || (strcasecmp(text, "bfgsrounds") == 0)) {
infile >> maxrounds;
} else if (strcasecmp(text, "printing") == 0) {
infile >> to_print;
} else if (strcasecmp(text, "sigma") == 0) {
infile >> temp;
lineS->setSigma(temp);
} else if (strcasecmp(text, "beta") == 0) {
infile >> temp;
lineS->setBeta(temp);
} else if ((strcasecmp(text, "gradacc") == 0) || (strcasecmp(text, "gradstep") == 0) || (strcasecmp(text, "st") == 0) || (strcasecmp(text, "step") == 0) || (strcasecmp(text, "scale") == 0)) {
cout << "BFGS - " << text << " is not used in paramin" << endl;
infile >> ws;
} else {
cerr << "Error while reading optinfo for bfgs - unknown option " << text << endl;
exit(EXIT_FAILURE);
}
i++;
infile >> text;
}
if (i == 0)
cerr << "Warning - no optinfo give for bfgs in file - using default parameters" << endl;
}
void ParaminBFGS::iteration() {
double alpha;
doLineseek();
//JMB - changed to check if a double is very close to zero
alpha = lineS->getAlpha();
if (isZero(alpha)) {
armijoproblem++;
difficultgrad++;
bfgsFail = -2;
if (armijoproblem > 1)
bfgsFail = 6;
}
if (alpha < 0.0) {
// negative alpha indicates wrong derivative - Hessian wrong
bfgsFail = 1;
} else {
UpdateXandGrad();
// prevy = y;
ComputeGradient();
normgrad = grad->getNormGrad();
//cout << "got normgrad: " << normgrad << endl;
gi = grad->getGradient();
if (difficultgrad >= 1)
diaghess = grad->getDiagonalHessian();
error = normgrad / (1.0 + fabs(bestf));
if (bfgsFail != 6 && bfgsFail != -2) {
if (bfgs_constant == 1) {
int update = bfgsUpdate();
if (update == 5)
bfgsFail = update;
}
}
normx = sqrt(normx);
// relchng = (prevy - y) / (1.0 + ABSOFNUM(prevy));
}
if (bfgsFail <= 0) {
if (error <= errortol) {
bfgsFail = 0;
} else if ((normdeltax < xtol) && (bfgsFail != -2)) {
bfgsFail = 2;
} else
bfgsFail = -1;
}
}
void ParaminBFGS::ComputeGradient() {
int i;
i = grad->computeGradient(net, bestx, bestf, difficultgrad);
int tmp = grad->getDifficultGrad();
difficultgrad = tmp;
if (i == net->netError()) {
cerr << "Error in BFGS - did not receive gradient data\n";
this->printX(net->unscaleX(bestx));
net->stopNetComm();
exit(EXIT_FAILURE);
}
}
void ParaminBFGS::doLineseek() {
s = -1e10;
double low = GetS(0);
double upp = GetS(1);
if (upp <= 0.0) {
cerr << "Warning in BFGS - upperbound for alpha is negative\n";
upp = 1.0;
}
s = upp;
if (shannonScaling == 1) {
if (iters == 0 || upp < 1.0)
s = upp;
else
s = 1.0;
}
lineS->doArmijo(bestx, bestf, dery, h, net, min(s, 1.0));
}
void ParaminBFGS::ScaleDirectionVector() {
int i;
dery *= normdeltax / normh;
for (i = 0; i < numvar; i++)
h[i] *= normdeltax / normh;
}
void ParaminBFGS::ComputeDirectionVector() {
int i, j;
for (i = 0; i < numvar; i++) {
h[i] = 0;
for (j = 0; j < numvar; j++)
h[i] -= invhess[i][j] * gi[j];
}
}
void ParaminBFGS::DefineHessian() {
int i, j;
for (i = 0; i < numvar; i++) {
// start with the identity matrix
for (j = 0; j < numvar; j++)
invhess[i][j] = 0.0;
// set diagonal only if didn't get from grad
if ((difficultgrad >= 1) && (bfgs_constant == 1)) {
if (diaghess[i] > 0)
invhess[i][i] = 1 / diaghess[i];
else
invhess[i][i] = 1.0;
} else
invhess[i][i] = 1.0;
if (invhess[i][i] < 0.0) {
cerr << "Error in BFGS - negative inverse hessian\n";
break;
}
}
}
void ParaminBFGS::UpdateXandGrad() {
int i;
double xi;
normdeltax = 0.0;
DoubleVector temp(lineS->getBestX());
for (i = 0; i < numvar; i++)
deltax[i] = temp[i] - bestx[i];
bestx = temp;
bestf = lineS->getBestF();
// temp = gi;
// gim1 = temp;
gim1 = gi;
// must check if this is still behaving correctly with double vector..
normdeltax = (deltax * deltax);
}
int ParaminBFGS::bfgsUpdate() {
int i, j;
/* prepare the BFGS update */
double deltaxg = 0.0;
double deltaghg = 0.0;
DoubleVector hg(numvar);
DoubleVector deltag(numvar);
normx = 0.0;
for (i = 0; i < numvar; i++) {
hg[i] = 0.0;
deltag[i] = gi[i] - gim1[i];
deltaxg += deltax[i] * deltag[i];
normx += bestx[i] * bestx[i];
}
if (deltaxg <= 0.0)
cerr << "Warning in BFGS - instability error\n";
// Must check that invhess is used correctly [i][j] or [j][i]
for (i = 0; i < numvar; i++) {
for (j = 0; j < numvar; j++)
hg[i] += invhess[i][j] * deltag[j];
deltaghg += deltag[i] * hg[i];
}
/* do the BFGS update */
for (i = 0; i < numvar; i++) {
for (j = 0; j < numvar; j++) {
invhess[i][j] += (deltax[i] * deltax[j] / deltaxg) - (hg[i] * hg[j] /
deltaghg) + deltaghg * (deltax[i] / deltaxg - hg[i] / deltaghg) *
(deltax[j] / deltaxg - hg[j] / deltaghg);
}
}
if (deltaxg <= 0)
return 5;
else
return -999;
}
double ParaminBFGS::norm() {
int i;
double normx;
normx = 0.0;
/*
for (i = 0; i < numvar; i++)
normx += bestx[i] * bestx[i];
*/
normx = bestx*bestx;
normx = sqrt(normx);
return normx;
}
void ParaminBFGS::printGradient() {
int i;
ofstream outputfile;
outputfile.open("gradient");
if (!outputfile) {
cerr << "Error in BFGS - could not print gradient data\n";
printX(gi);
net->stopNetComm();
exit(EXIT_FAILURE);
}
printX(outputfile, gi);
outputfile.close();
}
void ParaminBFGS::printInverseHessian() {
int i, j;
ofstream outputfile;
outputfile.open("hessian");
if (!outputfile) {
cerr << "Error in BFGS - could not print hessian\n";
exit(EXIT_FAILURE);
}
invhess.Print(outputfile);
/*
for (i = 0; i < numvar; i++) {
for (j = 0; j < numvar; j++)
outputfile << invhess[i][j] << " ";
outputfile << endl;
}
*/
outputfile.close();
}
double ParaminBFGS::GetS(int get) {
double b = 1.e69;
double a = -1.e69;
DoubleVector alpha_l(numvar);
DoubleVector alpha_u(numvar);
int i;
for (i = 0; i < numvar; i++) {
if (h[i] > 0) {
alpha_l[i] = (lowerbound[i] - bestx[i]) / h[i];
alpha_u[i] = (upperbound[i] - bestx[i]) / h[i];
} else if (h[i] < 0) {
alpha_u[i] = (lowerbound[i] - bestx[i]) / h[i];
alpha_l[i] = (upperbound[i] - bestx[i]) / h[i];
} else {
alpha_u[i] = b;
alpha_l[i] = a;
}
a = max(a, alpha_l[i]);
b = min(b, alpha_u[i]);
}
if (get == 0)
return a;
else if (get == 1)
return b;
else {
cerr << "Error in BFGS - unrecognised return value\n";
exit(EXIT_FAILURE);
}
}
void ParaminBFGS::SetInitialValues() {
// For the first iteration, define the parameters
// and the Hessian matrix, the gradient and the direction vector
armijoproblem = 0;
if (difficultgrad != initial_difficultgrad) {
difficultgrad = initial_difficultgrad;
grad->initializeDiagonalHessian();
ComputeGradient();
normgrad = grad->getNormGrad();
if (difficultgrad >= 1)
diaghess = grad->getDiagonalHessian();
gi = grad->getGradient();
normx = norm();
}
DefineHessian();
}
void ParaminBFGS::UpdateValues() {
bfgsFail = 0;
int i;
normdeltax = 1.0;
ComputeDirectionVector();
dery = 0.0;
normh = 0.0;
for (i = 0; i < numvar; i++) {
normh += h[i] * h[i];
dery += gi[i] * h[i];
}
// if scaling of direction vector is to be done, then do it here.
if (iters <= numvar && shannonScaling == 1)
ScaleDirectionVector();
if (dery > 0) {
cerr << "Error in BFGS - the derivative is positive\n";
bfgsFail = 4;
}
error = normgrad / (1.0 + fabs(bestf));
}
void ParaminBFGS::OptimiseLikelihood() {
int rounds = 0;
int numrounds = 0;
bestx = net->getInitialX();
bestf = net->getScore();
difficultgrad = -1;
// Loop over BFGS iterations
// continue until tolerance criterion is satisfied
// or give up because x's do not move -- only valid if function does not
// change either and must do one iteration
for (rounds = 0; rounds < maxrounds; rounds++) {
bfgsFail = -1;
iters = 0;
SetInitialValues();
while ((iters < maxiterations) && (bfgsFail < 0)) {
this->UpdateValues();
this->iteration();
net->setInitialScore(bestx, bestf);
iters++;
}
if ((iters == maxiterations) && (bfgsFail != 0))
cout << "During BFGS - Quit this round because have reached maxiterations." << endl;
if (bfgsFail == 0) {
numrounds = rounds;
rounds = maxrounds - 1;
}
if (bfgsFail == 1)
cerr << "BFGS - failed because alpha < 0" << endl;;
if (bfgsFail == 2)
cerr << "BFGS - failed because normdeltax < xtol" << endl;
if (bfgsFail == 4)
cerr << "BFGS - failed because dery > 0" << endl;
if (bfgsFail == 5)
cerr << "BFGS - failed because of instability error" << endl;
if (bfgsFail == 6)
cerr << "BFGS - failed because could not get alpha from linesearch, alpha == 0" << endl;
if (bfgsFail == -2)
cerr << "BFGS - Could not get alpha from linesearch but will continue and increase difficultgrad" << endl;
if (rounds < (maxrounds - 1))
cout << "Starting a new round. Still have " << maxrounds - rounds - 1 << " to go." << endl;
}
if ((rounds >= maxrounds) && (bfgsFail != 0)) {
cout << "\nBFGS optimisation completed after " << rounds << " rounds (max " << maxrounds << ") and " << iters << " iterations (max " << maxiterations << ")\nThe model terminated because the maximum number of rounds was reached\n";
} else if (bfgsFail == 0) {
cout << "\nStopping BFGS \n\nThe optimisation stopped after " << numrounds << " rounds (max " << maxrounds << ") and " << iters << " iterations (max " << maxiterations << ")\nThe optimisation stopped because an optimum was found for this run\n";
converge = 1;
}
score = bestf;
if (to_print) {
this->printGradient();
this->printInverseHessian();
}
}
void ParaminBFGS::Print(ofstream& outfile, int prec) {
outfile << "; BFGS algorithm ran for " << iters
<< " function evaluations\n; and stopped when the likelihood value was "
<< setprecision(prec) << score;
if (converge == -1)
outfile << "\n; because an error occured during the optimisation\n";
else if (converge == 1)
outfile << "\n; because the convergence criteria were met\n";
else
outfile << "\n; because the maximum number of function evaluations was reached\n";
}