1 : |
agomez |
1 |
#include "errorhandler.h" |
2 : |
|
|
#include "optinfo.h" |
3 : |
|
|
#include "mathfunc.h" |
4 : |
|
|
#include "doublematrix.h" |
5 : |
|
|
#include "ecosystem.h" |
6 : |
|
|
#include "gadget.h" |
7 : |
|
|
#include "global.h" |
8 : |
|
|
|
9 : |
|
|
/* JMB this has been modified to work with the gadget object structure */
|
10 : |
|
|
/* This means that the function has been replaced by a call to ecosystem */
|
11 : |
|
|
/* object, and we can use the vector objects that have been defined */
|
12 : |
|
|
|
13 : |
|
|
extern Ecosystem* EcoSystem;
|
14 : |
|
|
|
15 : |
|
|
/* calculate the smallest eigenvalue of a matrix */
|
16 : |
|
|
double OptInfoBFGS::getSmallestEigenValue(DoubleMatrix M) {
|
17 : |
|
|
|
18 : |
|
|
double eigen, temp, phi, norm;
|
19 : |
|
|
int i, j, k;
|
20 : |
|
|
int nvars = M.Nrow();
|
21 : |
|
|
DoubleMatrix L(nvars, nvars, 0.0);
|
22 : |
|
|
DoubleVector xo(nvars, 1.0);
|
23 : |
|
|
|
24 : |
|
|
// calculate the Cholesky factor of the matrix
|
25 : |
|
|
for (k = 0; k < nvars; k++) {
|
26 : |
|
|
L[k][k] = M[k][k];
|
27 : |
|
|
for (j = 0; j < k - 1; j++)
|
28 : |
|
|
L[k][k] -= (L[k][j] * L[k][j]);
|
29 : |
|
|
for (i = k + 1; i < nvars; i++) {
|
30 : |
|
|
L[i][k] = M[i][k];
|
31 : |
|
|
for (j = 0; j < k - 1; j++)
|
32 : |
|
|
L[i][k] -= (L[i][j] * L[k][j]);
|
33 : |
|
|
|
34 : |
|
|
if (isZero(L[k][k])) {
|
35 : |
|
|
handle.logMessage(LOGINFO, "Error in BFGS - divide by zero when calculating smallest eigen value");
|
36 : |
|
|
return 0.0;
|
37 : |
|
|
} else
|
38 : |
|
|
L[i][k] /= L[k][k];
|
39 : |
|
|
}
|
40 : |
|
|
}
|
41 : |
|
|
|
42 : |
|
|
temp = (double)nvars;
|
43 : |
|
|
eigen = 0.0;
|
44 : |
|
|
for (k = 0; k < nvars; k++) {
|
45 : |
|
|
for (i = 0; i < nvars; i++) {
|
46 : |
|
|
for (j = 0; j < i - 1; j++)
|
47 : |
|
|
xo[i] -= (L[i][j] * xo[j]);
|
48 : |
|
|
xo[i] /= L[i][i]; //JMB we've already checked that L[i][i] is not zero
|
49 : |
|
|
}
|
50 : |
|
|
for (i = nvars - 1; i >= 0; i--) {
|
51 : |
|
|
for (j = nvars - 1; j > i + 1; j--)
|
52 : |
|
|
xo[i] -= (L[j][i] * xo[j]);
|
53 : |
|
|
xo[i] /= L[i][i]; //JMB we've already checked that L[i][i] is not zero
|
54 : |
|
|
}
|
55 : |
|
|
|
56 : |
|
|
phi = 0.0;
|
57 : |
|
|
norm = 0.0;
|
58 : |
|
|
for (i = 0; i < nvars; i++) {
|
59 : |
|
|
phi += xo[i];
|
60 : |
|
|
norm += (xo[i] * xo[i]);
|
61 : |
|
|
}
|
62 : |
|
|
|
63 : |
|
|
if (isZero(norm) || isZero(temp) || isZero(phi)) {
|
64 : |
|
|
handle.logMessage(LOGINFO, "Error in BFGS - divide by zero when calculating smallest eigen value");
|
65 : |
|
|
return 0.0;
|
66 : |
|
|
}
|
67 : |
|
|
|
68 : |
|
|
for (i = 0; i < nvars; i++)
|
69 : |
|
|
xo[i] /= norm;
|
70 : |
|
|
eigen = phi / temp;
|
71 : |
|
|
temp = phi;
|
72 : |
|
|
}
|
73 : |
|
|
|
74 : |
|
|
if (isZero(eigen)) {
|
75 : |
|
|
handle.logMessage(LOGINFO, "Error in BFGS - divide by zero when calculating smallest eigen value");
|
76 : |
|
|
return 0.0;
|
77 : |
|
|
}
|
78 : |
|
|
return 1.0 / eigen;
|
79 : |
|
|
}
|
80 : |
|
|
|
81 : |
|
|
/* calculate the gradient of a function at a given point */
|
82 : |
|
|
/* based on the forward difference gradient approximation (A5.6.3 FDGRAD) */
|
83 : |
|
|
/* Numerical Methods for Unconstrained Optimization and Nonlinear Equations */
|
84 : |
|
|
/* by J E Dennis and Robert B Schnabel, published by SIAM, 1996 */
|
85 : |
|
|
void OptInfoBFGS::gradient(DoubleVector& point, double pointvalue, DoubleVector& newgrad) {
|
86 : |
|
|
|
87 : |
|
|
double ftmp, tmpacc;
|
88 : |
|
|
int i, j;
|
89 : |
|
|
int nvars = point.Size();
|
90 : |
|
|
DoubleVector gtmp(point);
|
91 : |
|
|
|
92 : |
|
|
for (i = 0; i < nvars; i++) {
|
93 : |
|
|
for (j = 0; j < nvars; j++)
|
94 : |
|
|
gtmp[j] = point[j];
|
95 : |
|
|
|
96 : |
|
|
//JMB the scaled parameter values should aways be positive
|
97 : |
|
|
if (point[i] < 0.0)
|
98 : |
|
|
handle.logMessage(LOGINFO, "Error in BFGS - negative parameter when calculating the gradient", point[i]);
|
99 : |
|
|
|
100 : |
|
|
tmpacc = gradacc * max(point[i], 1.0);
|
101 : |
|
|
gtmp[i] += tmpacc;
|
102 : |
|
|
ftmp = EcoSystem->SimulateAndUpdate(gtmp);
|
103 : |
|
|
newgrad[i] = (ftmp - pointvalue) / tmpacc;
|
104 : |
|
|
}
|
105 : |
|
|
}
|
106 : |
|
|
|
107 : |
|
|
void OptInfoBFGS::OptimiseLikelihood() {
|
108 : |
|
|
|
109 : |
|
|
double hy, yBy, temphy, tempyby, normgrad;
|
110 : |
|
|
double searchgrad, newf, tmpf, betan;
|
111 : |
|
|
int i, j, resetgrad, offset, armijo;
|
112 : |
|
|
|
113 : |
|
|
handle.logMessage(LOGINFO, "\nStarting BFGS optimisation algorithm\n");
|
114 : |
|
|
int nvars = EcoSystem->numOptVariables();
|
115 : |
|
|
DoubleVector x(nvars);
|
116 : |
|
|
DoubleVector trialx(nvars);
|
117 : |
|
|
DoubleVector bestx(nvars);
|
118 : |
|
|
DoubleVector init(nvars);
|
119 : |
|
|
DoubleVector h(nvars, 0.0);
|
120 : |
|
|
DoubleVector y(nvars, 0.0);
|
121 : |
|
|
DoubleVector By(nvars, 0.0);
|
122 : |
|
|
DoubleVector grad(nvars, 0.0);
|
123 : |
|
|
DoubleVector oldgrad(nvars, 0.0);
|
124 : |
|
|
DoubleVector search(nvars, 0.0);
|
125 : |
|
|
DoubleMatrix invhess(nvars, nvars, 0.0);
|
126 : |
|
|
|
127 : |
|
|
EcoSystem->scaleVariables(); //JMB need to scale variables
|
128 : |
|
|
EcoSystem->getOptScaledValues(x);
|
129 : |
|
|
EcoSystem->getOptInitialValues(init);
|
130 : |
|
|
|
131 : |
|
|
for (i = 0; i < nvars; i++) {
|
132 : |
|
|
trialx[i] = x[i];
|
133 : |
|
|
bestx[i] = x[i];
|
134 : |
|
|
}
|
135 : |
|
|
|
136 : |
|
|
newf = EcoSystem->SimulateAndUpdate(trialx);
|
137 : |
|
|
if (newf != newf) { // check for NaN
|
138 : |
|
|
handle.logMessage(LOGINFO, "Error starting BFGS optimisation with f(x) = infinity");
|
139 : |
|
|
converge = -1;
|
140 : |
|
|
iters = 1;
|
141 : |
|
|
return;
|
142 : |
|
|
}
|
143 : |
|
|
|
144 : |
|
|
this->gradient(trialx, newf, grad);
|
145 : |
|
|
tmpf = newf;
|
146 : |
|
|
offset = EcoSystem->getFuncEval(); // number of function evaluations done before loop
|
147 : |
|
|
sigma = -sigma; //JMB change sign of sigma (and consequently searchgrad)
|
148 : |
|
|
resetgrad = 0;
|
149 : |
|
|
for (i = 0; i < nvars; i++) {
|
150 : |
|
|
oldgrad[i] = grad[i];
|
151 : |
|
|
invhess[i][i] = 1.0;
|
152 : |
|
|
}
|
153 : |
|
|
|
154 : |
|
|
while (1) {
|
155 : |
|
|
iters = EcoSystem->getFuncEval() - offset;
|
156 : |
|
|
if (isZero(newf)) {
|
157 : |
|
|
handle.logMessage(LOGINFO, "Error in BFGS optimisation after", iters, "function evaluations, f(x) = 0");
|
158 : |
|
|
converge = -1;
|
159 : |
|
|
return;
|
160 : |
|
|
}
|
161 : |
|
|
|
162 : |
|
|
// terminate the algorithm if too many function evaluations occur
|
163 : |
|
|
if (iters > bfgsiter) {
|
164 : |
|
|
handle.logMessage(LOGINFO, "\nStopping BFGS optimisation algorithm\n");
|
165 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped after", iters, "function evaluations");
|
166 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped because the maximum number of function evaluations");
|
167 : |
|
|
handle.logMessage(LOGINFO, "was reached and NOT because an optimum was found for this run");
|
168 : |
|
|
|
169 : |
|
|
score = EcoSystem->SimulateAndUpdate(bestx);
|
170 : |
|
|
handle.logMessage(LOGINFO, "\nBFGS finished with a likelihood score of", score);
|
171 : |
|
|
return;
|
172 : |
|
|
}
|
173 : |
|
|
|
174 : |
|
|
if (resetgrad) {
|
175 : |
|
|
// terminate the algorithm if the gradient accuracy required has got too small
|
176 : |
|
|
if (gradacc < gradeps) {
|
177 : |
|
|
handle.logMessage(LOGINFO, "\nStopping BFGS optimisation algorithm\n");
|
178 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped after", iters, "function evaluations");
|
179 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped because the accuracy required for the gradient");
|
180 : |
|
|
handle.logMessage(LOGINFO, "calculation is too small and NOT because an optimum was found for this run");
|
181 : |
|
|
|
182 : |
|
|
converge = 2;
|
183 : |
|
|
score = EcoSystem->SimulateAndUpdate(bestx);
|
184 : |
|
|
handle.logMessage(LOGINFO, "\nBFGS finished with a likelihood score of", score);
|
185 : |
|
|
return;
|
186 : |
|
|
}
|
187 : |
|
|
|
188 : |
|
|
resetgrad = 0;
|
189 : |
|
|
// make the step size when calculating the gradient smaller
|
190 : |
|
|
gradacc *= gradstep;
|
191 : |
|
|
handle.logMessage(LOGINFO, "Warning in BFGS - resetting search algorithm after", iters, "function evaluations");
|
192 : |
|
|
|
193 : |
|
|
for (i = 0; i < nvars; i++) {
|
194 : |
|
|
for (j = 0; j < nvars; j++)
|
195 : |
|
|
invhess[i][j] = 0.0;
|
196 : |
|
|
invhess[i][i] = 1.0;
|
197 : |
|
|
}
|
198 : |
|
|
}
|
199 : |
|
|
|
200 : |
|
|
for (i = 0; i < nvars; i++) {
|
201 : |
|
|
search[i] = 0.0;
|
202 : |
|
|
for (j = 0; j < nvars; j++)
|
203 : |
|
|
search[i] -= (invhess[i][j] * grad[j]);
|
204 : |
|
|
}
|
205 : |
|
|
|
206 : |
|
|
// do armijo calculation
|
207 : |
|
|
searchgrad = 0.0;
|
208 : |
|
|
for (i = 0; i < nvars; i++)
|
209 : |
|
|
searchgrad += (grad[i] * search[i]);
|
210 : |
|
|
searchgrad *= sigma;
|
211 : |
|
|
|
212 : |
|
|
armijo = 0;
|
213 : |
|
|
betan = step;
|
214 : |
|
|
if (searchgrad > verysmall) {
|
215 : |
|
|
while ((armijo == 0) && (betan > rathersmall)) {
|
216 : |
|
|
for (i = 0; i < nvars; i++)
|
217 : |
|
|
trialx[i] = x[i] + (betan * search[i]);
|
218 : |
|
|
|
219 : |
|
|
tmpf = EcoSystem->SimulateAndUpdate(trialx);
|
220 : |
|
|
if ((newf > tmpf) && ((newf - tmpf) > (betan * searchgrad)))
|
221 : |
|
|
armijo = 1;
|
222 : |
|
|
else
|
223 : |
|
|
betan *= beta;
|
224 : |
|
|
}
|
225 : |
|
|
}
|
226 : |
|
|
|
227 : |
|
|
if (armijo) {
|
228 : |
|
|
this->gradient(trialx, tmpf, grad);
|
229 : |
|
|
} else {
|
230 : |
|
|
resetgrad = 1;
|
231 : |
|
|
this->gradient(x, newf, grad);
|
232 : |
|
|
continue;
|
233 : |
|
|
}
|
234 : |
|
|
|
235 : |
|
|
normgrad = 0.0;
|
236 : |
|
|
for (i = 0; i < nvars; i++) {
|
237 : |
|
|
h[i] = betan * search[i];
|
238 : |
|
|
x[i] += h[i];
|
239 : |
|
|
y[i] = grad[i] - oldgrad[i];
|
240 : |
|
|
oldgrad[i] = grad[i];
|
241 : |
|
|
normgrad += grad[i] * grad[i];
|
242 : |
|
|
}
|
243 : |
|
|
normgrad = sqrt(normgrad);
|
244 : |
|
|
newf = EcoSystem->SimulateAndUpdate(x);
|
245 : |
|
|
for (i = 0; i < nvars; i++) {
|
246 : |
|
|
bestx[i] = x[i];
|
247 : |
|
|
trialx[i] = x[i] * init[i];
|
248 : |
|
|
}
|
249 : |
|
|
|
250 : |
|
|
iters = EcoSystem->getFuncEval() - offset;
|
251 : |
|
|
EcoSystem->storeVariables(newf, trialx);
|
252 : |
|
|
handle.logMessage(LOGINFO, "\nNew optimum found after", iters, "function evaluations");
|
253 : |
|
|
handle.logMessage(LOGINFO, "The likelihood score is", newf, "at the point");
|
254 : |
|
|
EcoSystem->writeBestValues();
|
255 : |
|
|
|
256 : |
|
|
// terminate the algorithm if the convergence criteria has been met
|
257 : |
|
|
if ((normgrad / (1.0 + newf)) < bfgseps) {
|
258 : |
|
|
handle.logMessage(LOGINFO, "\nStopping BFGS optimisation algorithm\n");
|
259 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped after", iters, "function evaluations");
|
260 : |
|
|
handle.logMessage(LOGINFO, "The optimisation stopped because an optimum was found for this run");
|
261 : |
|
|
|
262 : |
|
|
converge = 1;
|
263 : |
|
|
score = EcoSystem->SimulateAndUpdate(bestx);
|
264 : |
|
|
tmpf = this->getSmallestEigenValue(invhess);
|
265 : |
|
|
if (!isZero(tmpf))
|
266 : |
|
|
handle.logMessage(LOGINFO, "The smallest eigenvalue of the inverse Hessian matrix is", tmpf);
|
267 : |
|
|
handle.logMessage(LOGINFO, "\nBFGS finished with a likelihood score of", score);
|
268 : |
|
|
return;
|
269 : |
|
|
}
|
270 : |
|
|
|
271 : |
|
|
// update the estimate for the inverse hessian matrix
|
272 : |
|
|
hy = 0.0;
|
273 : |
|
|
yBy = 0.0;
|
274 : |
|
|
for (i = 0; i < nvars; i++) {
|
275 : |
|
|
hy += h[i] * y[i];
|
276 : |
|
|
By[i] = 0.0;
|
277 : |
|
|
for (j = 0; j < nvars; j++)
|
278 : |
|
|
By[i] += (invhess[i][j] * y[j]);
|
279 : |
|
|
yBy += y[i] * By[i];
|
280 : |
|
|
}
|
281 : |
|
|
|
282 : |
|
|
if ((isZero(hy)) || (isZero(yBy))) {
|
283 : |
|
|
resetgrad = 1;
|
284 : |
|
|
} else {
|
285 : |
|
|
temphy = 1.0 / hy;
|
286 : |
|
|
tempyby = 1.0 / yBy;
|
287 : |
|
|
for (i = 0; i < nvars; i++)
|
288 : |
|
|
for (j = 0; j < nvars; j++)
|
289 : |
|
|
invhess[i][j] += (h[i] * h[j] * temphy) - (By[i] * By[j] * tempyby)
|
290 : |
|
|
+ yBy * (h[i] * temphy - By[i] * tempyby) * (h[j] * temphy - By[j] * tempyby);
|
291 : |
|
|
}
|
292 : |
|
|
}
|
293 : |
|
|
}
|
294 : |
ulcessvp |
11 |
|
295 : |
ulcessvp |
12 |
/* bfgs isn't optimized with OpenMP, this method only call the sequential method*/
|
296 : |
agomez |
20 |
#ifdef _OPENMP
|
297 : |
|
|
//#ifdef SPECULATIVE
|
298 : |
ulcessvp |
11 |
void OptInfoBFGS::OptimiseLikelihoodOMP() {
|
299 : |
|
|
OptimiseLikelihood();
|
300 : |
|
|
}
|
301 : |
agomez |
20 |
void OptInfoBFGS::OptimiseLikelihoodREP() {
|
302 : |
|
|
OptimiseLikelihood();
|
303 : |
|
|
}
|
304 : |
ulcessvp |
11 |
#endif
|