[mareframe] Annotation of /trunk/paramin-beta/paraminbfgs.cc

Annotation of /trunk/paramin-beta/paraminbfgs.cc

1 :	agomez	1	#include "paraminbfgs.h"
2 :			// Check compute direction vector, how use invhess[i][j], i, j OK???
3 :
4 :			// ********************************************************
5 :			// functions for class ParaminBFGS
6 :			// ********************************************************
7 :			ParaminBFGS::ParaminBFGS(NetInterface* netInt) : ParaminSearch(netInt) {
8 :			type = OPTBFGS;
9 :			// Vector temp(numvar);
10 :			lineS = new Armijo(); // use lineS to do linesearch
11 :			grad = new NetGradient(numvar); // use grad to compute gradient
12 :			// not sure I need to resize here, need to check...
13 :			deltax.resize(numvar, 0.0); // xi-xim1
14 :			h.resize(numvar, 0.0); // the line search vector
15 :			gim1.resize(numvar, 0.0); // store previous gradient
16 :			invhess.AddRows(numvar, numvar, 0.0);
17 :			/invhess = new double[numvar];
18 :			int i;
19 :			for (i = 0; i < numvar; i++)
20 :			invhess[i] = new double[numvar];
21 :			*/
22 :			// xopt.resize(numvar, 1);
23 :			// iter = 0;
24 :			// default values
25 :			shannonScaling = 0;
26 :			bfgs_constant = 1;
27 :			armijoproblem = 0;
28 :			to_print = 0;
29 :			errortol = 0.01;
30 :			xtol = 0.000001;
31 :			maxrounds = 5;
32 :			initial_difficultgrad = 1;
33 :			// If want to set default value separately
34 :			maxiterations = 200;
35 :			// converged = 0;
36 :			}
37 :
38 :			ParaminBFGS::~ParaminBFGS() {
39 :			int i;
40 :			/*
41 :			for (i = 0; i < numvar; i++)
42 :			delete[] invhess[i];
43 :			delete[] invhess;
44 :			*/
45 :			delete grad;
46 :			delete lineS;
47 :			}
48 :
49 :			void ParaminBFGS::read(CommentStream& infile, char* text) {
50 :			int i = 0;
51 :			double temp;
52 :
53 :			while (!infile.eof() && strcasecmp(text, "seed") && strcasecmp(text, "[hooke]") && strcasecmp(text, "[simann]")) {
54 :			infile >> ws;
55 :			if (strcasecmp(text, "shannonscaling") == 0) {
56 :			infile >> shannonScaling;
57 :
58 :			} else if (strcasecmp(text, "difficultgrad") == 0) {
59 :			infile >> initial_difficultgrad;
60 :
61 :			} else if (strcasecmp(text, "bfgspar") == 0) {
62 :			infile >> bfgs_constant;
63 :
64 :			} else if ((strcasecmp(text, "maxiterations") == 0) \|\| (strcasecmp(text, "maxiter") == 0) \|\| (strcasecmp(text, "bfgsiter") == 0)) {
65 :			infile >> maxiterations;
66 :
67 :			} else if ((strcasecmp(text, "eps") == 0) \|\| (strcasecmp(text, "bfgseps") == 0) \|\| (strcasecmp(text, "errortol") == 0)) {
68 :			infile >> errortol;
69 :
70 :			} else if (strcasecmp(text, "xtol") == 0) {
71 :			infile >> xtol;
72 :
73 :			} else if ((strcasecmp(text, "maxrounds") == 0) \|\| (strcasecmp(text, "bfgsrounds") == 0)) {
74 :			infile >> maxrounds;
75 :
76 :			} else if (strcasecmp(text, "printing") == 0) {
77 :			infile >> to_print;
78 :
79 :			} else if (strcasecmp(text, "sigma") == 0) {
80 :			infile >> temp;
81 :			lineS->setSigma(temp);
82 :
83 :			} else if (strcasecmp(text, "beta") == 0) {
84 :			infile >> temp;
85 :			lineS->setBeta(temp);
86 :
87 :			} else if ((strcasecmp(text, "gradacc") == 0) \|\| (strcasecmp(text, "gradstep") == 0) \|\| (strcasecmp(text, "st") == 0) \|\| (strcasecmp(text, "step") == 0) \|\| (strcasecmp(text, "scale") == 0)) {
88 :			cout << "BFGS - " << text << " is not used in paramin" << endl;
89 :			infile >> ws;
90 :
91 :			} else {
92 :			cerr << "Error while reading optinfo for bfgs - unknown option " << text << endl;
93 :			exit(EXIT_FAILURE);
94 :			}
95 :			i++;
96 :			infile >> text;
97 :			}
98 :
99 :			if (i == 0)
100 :			cerr << "Warning - no optinfo give for bfgs in file - using default parameters" << endl;
101 :			}
102 :
103 :			void ParaminBFGS::iteration() {
104 :			double alpha;
105 :			doLineseek();
106 :
107 :			//JMB - changed to check if a double is very close to zero
108 :			alpha = lineS->getAlpha();
109 :			if (isZero(alpha)) {
110 :			armijoproblem++;
111 :			difficultgrad++;
112 :			bfgsFail = -2;
113 :			if (armijoproblem > 1)
114 :			bfgsFail = 6;
115 :			}
116 :
117 :			if (alpha < 0.0) {
118 :			// negative alpha indicates wrong derivative - Hessian wrong
119 :			bfgsFail = 1;
120 :			} else {
121 :			UpdateXandGrad();
122 :			// prevy = y;
123 :			ComputeGradient();
124 :
125 :			normgrad = grad->getNormGrad();
126 :			//cout << "got normgrad: " << normgrad << endl;
127 :			gi = grad->getGradient();
128 :
129 :			if (difficultgrad >= 1)
130 :			diaghess = grad->getDiagonalHessian();
131 :
132 :			error = normgrad / (1.0 + fabs(bestf));
133 :			if (bfgsFail != 6 && bfgsFail != -2) {
134 :			if (bfgs_constant == 1) {
135 :			int update = bfgsUpdate();
136 :			if (update == 5)
137 :			bfgsFail = update;
138 :			}
139 :			}
140 :			normx = sqrt(normx);
141 :			// relchng = (prevy - y) / (1.0 + ABSOFNUM(prevy));
142 :			}
143 :
144 :			if (bfgsFail <= 0) {
145 :			if (error <= errortol) {
146 :			bfgsFail = 0;
147 :			} else if ((normdeltax < xtol) && (bfgsFail != -2)) {
148 :			bfgsFail = 2;
149 :			} else
150 :			bfgsFail = -1;
151 :			}
152 :			}
153 :
154 :			void ParaminBFGS::ComputeGradient() {
155 :			int i;
156 :
157 :			i = grad->computeGradient(net, bestx, bestf, difficultgrad);
158 :			int tmp = grad->getDifficultGrad();
159 :			difficultgrad = tmp;
160 :			if (i == net->netError()) {
161 :			cerr << "Error in BFGS - did not receive gradient data\n";
162 :			this->printX(net->unscaleX(bestx));
163 :			net->stopNetComm();
164 :			exit(EXIT_FAILURE);
165 :			}
166 :			}
167 :
168 :			void ParaminBFGS::doLineseek() {
169 :			s = -1e10;
170 :			double low = GetS(0);
171 :			double upp = GetS(1);
172 :			if (upp <= 0.0) {
173 :			cerr << "Warning in BFGS - upperbound for alpha is negative\n";
174 :			upp = 1.0;
175 :			}
176 :			s = upp;
177 :			if (shannonScaling == 1) {
178 :			if (iters == 0 \|\| upp < 1.0)
179 :			s = upp;
180 :			else
181 :			s = 1.0;
182 :			}
183 :			lineS->doArmijo(bestx, bestf, dery, h, net, min(s, 1.0));
184 :			}
185 :
186 :			void ParaminBFGS::ScaleDirectionVector() {
187 :			int i;
188 :			dery *= normdeltax / normh;
189 :			for (i = 0; i < numvar; i++)
190 :			h[i] *= normdeltax / normh;
191 :			}
192 :
193 :			void ParaminBFGS::ComputeDirectionVector() {
194 :			int i, j;
195 :			for (i = 0; i < numvar; i++) {
196 :			h[i] = 0;
197 :			for (j = 0; j < numvar; j++)
198 :			h[i] -= invhess[i][j] * gi[j];
199 :			}
200 :			}
201 :
202 :			void ParaminBFGS::DefineHessian() {
203 :			int i, j;
204 :			for (i = 0; i < numvar; i++) {
205 :			// start with the identity matrix
206 :			for (j = 0; j < numvar; j++)
207 :			invhess[i][j] = 0.0;
208 :
209 :			// set diagonal only if didn't get from grad
210 :			if ((difficultgrad >= 1) && (bfgs_constant == 1)) {
211 :			if (diaghess[i] > 0)
212 :			invhess[i][i] = 1 / diaghess[i];
213 :			else
214 :			invhess[i][i] = 1.0;
215 :
216 :			} else
217 :			invhess[i][i] = 1.0;
218 :
219 :			if (invhess[i][i] < 0.0) {
220 :			cerr << "Error in BFGS - negative inverse hessian\n";
221 :			break;
222 :			}
223 :			}
224 :			}
225 :
226 :			void ParaminBFGS::UpdateXandGrad() {
227 :			int i;
228 :			double xi;
229 :			normdeltax = 0.0;
230 :			DoubleVector temp(lineS->getBestX());
231 :			for (i = 0; i < numvar; i++)
232 :			deltax[i] = temp[i] - bestx[i];
233 :			bestx = temp;
234 :			bestf = lineS->getBestF();
235 :			// temp = gi;
236 :			// gim1 = temp;
237 :			gim1 = gi;
238 :			// must check if this is still behaving correctly with double vector..
239 :			normdeltax = (deltax * deltax);
240 :			}
241 :
242 :			int ParaminBFGS::bfgsUpdate() {
243 :			int i, j;
244 :
245 :			/* prepare the BFGS update */
246 :			double deltaxg = 0.0;
247 :			double deltaghg = 0.0;
248 :			DoubleVector hg(numvar);
249 :			DoubleVector deltag(numvar);
250 :			normx = 0.0;
251 :
252 :			for (i = 0; i < numvar; i++) {
253 :			hg[i] = 0.0;
254 :			deltag[i] = gi[i] - gim1[i];
255 :			deltaxg += deltax[i] * deltag[i];
256 :			normx += bestx[i] * bestx[i];
257 :			}
258 :
259 :			if (deltaxg <= 0.0)
260 :			cerr << "Warning in BFGS - instability error\n";
261 :			// Must check that invhess is used correctly [i][j] or [j][i]
262 :			for (i = 0; i < numvar; i++) {
263 :			for (j = 0; j < numvar; j++)
264 :			hg[i] += invhess[i][j] * deltag[j];
265 :
266 :			deltaghg += deltag[i] * hg[i];
267 :			}
268 :
269 :			/* do the BFGS update */
270 :			for (i = 0; i < numvar; i++) {
271 :			for (j = 0; j < numvar; j++) {
272 :			invhess[i][j] += (deltax[i] * deltax[j] / deltaxg) - (hg[i] * hg[j] /
273 :			deltaghg) + deltaghg * (deltax[i] / deltaxg - hg[i] / deltaghg) *
274 :			(deltax[j] / deltaxg - hg[j] / deltaghg);
275 :			}
276 :			}
277 :
278 :			if (deltaxg <= 0)
279 :			return 5;
280 :			else
281 :			return -999;
282 :			}
283 :
284 :			double ParaminBFGS::norm() {
285 :			int i;
286 :			double normx;
287 :			normx = 0.0;
288 :			/*
289 :			for (i = 0; i < numvar; i++)
290 :			normx += bestx[i] * bestx[i];
291 :			*/
292 :			normx = bestx*bestx;
293 :			normx = sqrt(normx);
294 :			return normx;
295 :			}
296 :
297 :			void ParaminBFGS::printGradient() {
298 :			int i;
299 :			ofstream outputfile;
300 :			outputfile.open("gradient");
301 :			if (!outputfile) {
302 :			cerr << "Error in BFGS - could not print gradient data\n";
303 :			printX(gi);
304 :			net->stopNetComm();
305 :			exit(EXIT_FAILURE);
306 :			}
307 :			printX(outputfile, gi);
308 :
309 :			outputfile.close();
310 :			}
311 :
312 :			void ParaminBFGS::printInverseHessian() {
313 :			int i, j;
314 :			ofstream outputfile;
315 :			outputfile.open("hessian");
316 :			if (!outputfile) {
317 :			cerr << "Error in BFGS - could not print hessian\n";
318 :			exit(EXIT_FAILURE);
319 :			}
320 :			invhess.Print(outputfile);
321 :			/*
322 :			for (i = 0; i < numvar; i++) {
323 :			for (j = 0; j < numvar; j++)
324 :			outputfile << invhess[i][j] << " ";
325 :			outputfile << endl;
326 :			}
327 :			*/
328 :			outputfile.close();
329 :			}
330 :
331 :			double ParaminBFGS::GetS(int get) {
332 :			double b = 1.e69;
333 :			double a = -1.e69;
334 :
335 :			DoubleVector alpha_l(numvar);
336 :			DoubleVector alpha_u(numvar);
337 :
338 :			int i;
339 :			for (i = 0; i < numvar; i++) {
340 :			if (h[i] > 0) {
341 :			alpha_l[i] = (lowerbound[i] - bestx[i]) / h[i];
342 :			alpha_u[i] = (upperbound[i] - bestx[i]) / h[i];
343 :			} else if (h[i] < 0) {
344 :			alpha_u[i] = (lowerbound[i] - bestx[i]) / h[i];
345 :			alpha_l[i] = (upperbound[i] - bestx[i]) / h[i];
346 :			} else {
347 :			alpha_u[i] = b;
348 :			alpha_l[i] = a;
349 :			}
350 :			a = max(a, alpha_l[i]);
351 :			b = min(b, alpha_u[i]);
352 :			}
353 :
354 :			if (get == 0)
355 :			return a;
356 :			else if (get == 1)
357 :			return b;
358 :			else {
359 :			cerr << "Error in BFGS - unrecognised return value\n";
360 :			exit(EXIT_FAILURE);
361 :			}
362 :			}
363 :
364 :
365 :			void ParaminBFGS::SetInitialValues() {
366 :			// For the first iteration, define the parameters
367 :			// and the Hessian matrix, the gradient and the direction vector
368 :			armijoproblem = 0;
369 :			if (difficultgrad != initial_difficultgrad) {
370 :			difficultgrad = initial_difficultgrad;
371 :			grad->initializeDiagonalHessian();
372 :			ComputeGradient();
373 :			normgrad = grad->getNormGrad();
374 :
375 :			if (difficultgrad >= 1)
376 :			diaghess = grad->getDiagonalHessian();
377 :
378 :			gi = grad->getGradient();
379 :			normx = norm();
380 :			}
381 :			DefineHessian();
382 :			}
383 :
384 :			void ParaminBFGS::UpdateValues() {
385 :			bfgsFail = 0;
386 :			int i;
387 :
388 :			normdeltax = 1.0;
389 :			ComputeDirectionVector();
390 :			dery = 0.0;
391 :			normh = 0.0;
392 :			for (i = 0; i < numvar; i++) {
393 :			normh += h[i] * h[i];
394 :			dery += gi[i] * h[i];
395 :			}
396 :
397 :			// if scaling of direction vector is to be done, then do it here.
398 :			if (iters <= numvar && shannonScaling == 1)
399 :			ScaleDirectionVector();
400 :			if (dery > 0) {
401 :			cerr << "Error in BFGS - the derivative is positive\n";
402 :			bfgsFail = 4;
403 :			}
404 :			error = normgrad / (1.0 + fabs(bestf));
405 :			}
406 :
407 :			void ParaminBFGS::OptimiseLikelihood() {
408 :			int rounds = 0;
409 :			int numrounds = 0;
410 :
411 :			bestx = net->getInitialX();
412 :			bestf = net->getScore();
413 :			difficultgrad = -1;
414 :
415 :			// Loop over BFGS iterations
416 :			// continue until tolerance criterion is satisfied
417 :			// or give up because x's do not move -- only valid if function does not
418 :			// change either and must do one iteration
419 :
420 :			for (rounds = 0; rounds < maxrounds; rounds++) {
421 :			bfgsFail = -1;
422 :			iters = 0;
423 :			SetInitialValues();
424 :			while ((iters < maxiterations) && (bfgsFail < 0)) {
425 :			this->UpdateValues();
426 :			this->iteration();
427 :
428 :			net->setInitialScore(bestx, bestf);
429 :			iters++;
430 :			}
431 :
432 :			if ((iters == maxiterations) && (bfgsFail != 0))
433 :			cout << "During BFGS - Quit this round because have reached maxiterations." << endl;
434 :
435 :			if (bfgsFail == 0) {
436 :			numrounds = rounds;
437 :			rounds = maxrounds - 1;
438 :			}
439 :			if (bfgsFail == 1)
440 :			cerr << "BFGS - failed because alpha < 0" << endl;;
441 :			if (bfgsFail == 2)
442 :			cerr << "BFGS - failed because normdeltax < xtol" << endl;
443 :			if (bfgsFail == 4)
444 :			cerr << "BFGS - failed because dery > 0" << endl;
445 :			if (bfgsFail == 5)
446 :			cerr << "BFGS - failed because of instability error" << endl;
447 :			if (bfgsFail == 6)
448 :			cerr << "BFGS - failed because could not get alpha from linesearch, alpha == 0" << endl;
449 :			if (bfgsFail == -2)
450 :			cerr << "BFGS - Could not get alpha from linesearch but will continue and increase difficultgrad" << endl;
451 :
452 :			if (rounds < (maxrounds - 1))
453 :			cout << "Starting a new round. Still have " << maxrounds - rounds - 1 << " to go." << endl;
454 :			}
455 :
456 :			if ((rounds >= maxrounds) && (bfgsFail != 0)) {
457 :			cout << "\nBFGS optimisation completed after " << rounds << " rounds (max " << maxrounds << ") and " << iters << " iterations (max " << maxiterations << ")\nThe model terminated because the maximum number of rounds was reached\n";
458 :			} else if (bfgsFail == 0) {
459 :			cout << "\nStopping BFGS \n\nThe optimisation stopped after " << numrounds << " rounds (max " << maxrounds << ") and " << iters << " iterations (max " << maxiterations << ")\nThe optimisation stopped because an optimum was found for this run\n";
460 :			converge = 1;
461 :			}
462 :			score = bestf;
463 :			if (to_print) {
464 :			this->printGradient();
465 :			this->printInverseHessian();
466 :			}
467 :			}
468 :			void ParaminBFGS::Print(ofstream& outfile, int prec) {
469 :			outfile << "; BFGS algorithm ran for " << iters
470 :			<< " function evaluations\n; and stopped when the likelihood value was "
471 :			<< setprecision(prec) << score;
472 :			if (converge == -1)
473 :			outfile << "\n; because an error occured during the optimisation\n";
474 :			else if (converge == 1)
475 :			outfile << "\n; because the convergence criteria were met\n";
476 :			else
477 :			outfile << "\n; because the maximum number of function evaluations was reached\n";
478 :			}

root@forge.cesga.es	ViewVC Help
Powered by ViewVC 1.0.0