#include "surveydistribution.h" #include "readfunc.h" #include "readword.h" #include "readaggregation.h" #include "errorhandler.h" #include "mathfunc.h" #include "areatime.h" #include "stock.h" #include "suitfuncptrvector.h" #include "gadget.h" #include "global.h" SurveyDistribution::SurveyDistribution(CommentStream& infile, const AreaClass* const Area, const TimeClass* const TimeInfo, Keeper* const keeper, double weight, const char* name) : Likelihood(SURVEYDISTRIBUTIONLIKELIHOOD, weight, name), alptr(0) { int i, j; char text[MaxStrLength]; strncpy(text, "", MaxStrLength); int numarea = 0, numage = 0, numlen = 0; char datafilename[MaxStrLength]; char aggfilename[MaxStrLength]; strncpy(datafilename, "", MaxStrLength); strncpy(aggfilename, "", MaxStrLength); ifstream datafile; CommentStream subdata(datafile); readWordAndValue(infile, "datafile", datafilename); timeindex = 0; fittype = new char[MaxStrLength]; strncpy(fittype, "", MaxStrLength); liketype = new char[MaxStrLength]; strncpy(liketype, "", MaxStrLength); //read in area aggregation from file readWordAndValue(infile, "areaaggfile", aggfilename); datafile.open(aggfilename, ios::in); handle.checkIfFailure(datafile, aggfilename); handle.Open(aggfilename); numarea = readAggregation(subdata, areas, areaindex); handle.Close(); datafile.close(); datafile.clear(); //Must change from outer areas to inner areas. for (i = 0; i < areas.Nrow(); i++) for (j = 0; j < areas.Ncol(i); j++) areas[i][j] = Area->getInnerArea(areas[i][j]); //read in length aggregation from file readWordAndValue(infile, "lenaggfile", aggfilename); datafile.open(aggfilename, ios::in); handle.checkIfFailure(datafile, aggfilename); handle.Open(aggfilename); numlen = readLengthAggregation(subdata, lengths, lenindex); handle.Close(); datafile.close(); datafile.clear(); LgrpDiv = new LengthGroupDivision(lengths); if (LgrpDiv->Error()) handle.logMessage(LOGFAIL, "Error in surveydistribution - failed to create length group"); //read in age aggregation from file readWordAndValue(infile, "ageaggfile", aggfilename); datafile.open(aggfilename, ios::in); handle.checkIfFailure(datafile, aggfilename); handle.Open(aggfilename); numage = readAggregation(subdata, ages, ageindex); handle.Close(); datafile.close(); datafile.clear(); //read in the stocknames i = 0; infile >> text >> ws; if (strcasecmp(text, "stocknames") != 0) handle.logFileUnexpected(LOGFAIL, "stocknames", text); infile >> text; while (!infile.eof() && (strcasecmp(text, "fittype") != 0)) { infile >> ws; stocknames.resize(new char[strlen(text) + 1]); strcpy(stocknames[i++], text); infile >> text; } if (stocknames.Size() == 0) handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - failed to read stocks"); handle.logMessage(LOGMESSAGE, "Read stock data - number of stocks", stocknames.Size()); infile >> fittype >> ws; fitnumber = 0; if (strcasecmp(fittype, "linearfit") == 0) { fitnumber = 1; } else if (strcasecmp(fittype, "powerfit") == 0) { fitnumber = 2; } else handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised fittype", fittype); parameters.resize(2, keeper); infile >> text >> ws; if (strcasecmp(text, "parameters") != 0) handle.logFileUnexpected(LOGFAIL, "parameters", text); parameters.read(infile, TimeInfo, keeper); suit.resize(LgrpDiv->numLengthGroups(), 0.0); infile >> text >> ws; if ((strcasecmp(text, "function") == 0)) { //read suitability function infile >> text >> ws; SuitFuncPtrVector tempsuitfunc; tempsuitfunc.readSuitFunction(infile, text, TimeInfo, keeper); suitfunction = tempsuitfunc[0]; if (suitfunction->usesPredLength()) suitfunction->setPredLength(0.0); for (i = 0; i < LgrpDiv->numLengthGroups(); i++) { if (suitfunction->usesPreyLength()) suitfunction->setPreyLength(LgrpDiv->meanLength(i)); suit[i] = suitfunction->calculate(); } } else if (strcasecmp(text, "suitfile") == 0) { //read values from file for (i = 0; i < LgrpDiv->numLengthGroups(); i++) infile >> suit[i]; } else { handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised suitability", text); } //JMB - changed to make the reading of epsilon optional infile >> ws; char c = infile.peek(); if ((c == 'e') || (c == 'E')) readWordAndVariable(infile, "epsilon", epsilon); else epsilon = 1.0; if (epsilon < verysmall) { handle.logFileMessage(LOGWARN, "epsilon should be a positive integer - set to default value 1"); epsilon = 1.0; } readWordAndValue(infile, "likelihoodtype", liketype); likenumber = 0; if (strcasecmp(liketype, "pearson") == 0) likenumber = 1; else if (strcasecmp(liketype, "multinomial") == 0) likenumber = 2; else if (strcasecmp(liketype, "gamma") == 0) likenumber = 3; else if (strcasecmp(liketype, "log") == 0) likenumber = 4; else handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised likelihoodtype", liketype); //read the survey distribution data from the datafile datafile.open(datafilename, ios::in); handle.checkIfFailure(datafile, datafilename); handle.Open(datafilename); readDistributionData(subdata, TimeInfo, numarea, numage, numlen); handle.Close(); datafile.close(); datafile.clear(); //prepare for next likelihood component infile >> ws; if (!infile.eof()) { infile >> text >> ws; if (strcasecmp(text, "[component]") != 0) handle.logFileUnexpected(LOGFAIL, "[component]", text); } } void SurveyDistribution::readDistributionData(CommentStream& infile, const TimeClass* TimeInfo, int numarea, int numage, int numlen) { double tmpnumber; char tmparea[MaxStrLength], tmpage[MaxStrLength], tmplen[MaxStrLength]; strncpy(tmparea, "", MaxStrLength); strncpy(tmpage, "", MaxStrLength); strncpy(tmplen, "", MaxStrLength); int keepdata, timeid, areaid, ageid, lenid; int i, year, step, count, reject; //Check the number of columns in the inputfile if (countColumns(infile) != 6) handle.logFileMessage(LOGFAIL, "wrong number of columns in inputfile - should be 6"); year = step = count = reject = 0; while (!infile.eof()) { keepdata = 1; infile >> year >> step >> tmparea >> tmpage >> tmplen >> tmpnumber >> ws; //crude check to see if something has gone wrong and avoid infinite loops if (strlen(tmparea) == 0) handle.logFileMessage(LOGFAIL, "failed to read data from file"); //if tmparea is in areaindex keep data, else dont keep the data areaid = -1; for (i = 0; i < areaindex.Size(); i++) if (strcasecmp(areaindex[i], tmparea) == 0) areaid = i; if (areaid == -1) keepdata = 0; //if tmpage is in ageindex keep data, else dont keep the data ageid = -1; for (i = 0; i < ageindex.Size(); i++) if (strcasecmp(ageindex[i], tmpage) == 0) ageid = i; if (ageid == -1) keepdata = 0; //if tmplen is in lenindex keep data, else dont keep the data lenid = -1; for (i = 0; i < lenindex.Size(); i++) if (strcasecmp(lenindex[i], tmplen) == 0) lenid = i; if (lenid == -1) keepdata = 0; //check if the year and step are in the simulation timeid = -1; if ((TimeInfo->isWithinPeriod(year, step)) && (keepdata == 1)) { for (i = 0; i < Years.Size(); i++) if ((Years[i] == year) && (Steps[i] == step)) timeid = i; if (timeid == -1) { Years.resize(1, year); Steps.resize(1, step); timeid = (Years.Size() - 1); obsDistribution.resize(); modelDistribution.resize(); likelihoodValues.AddRows(1, numarea, 0.0); for (i = 0; i < numarea; i++) { obsDistribution[timeid].resize(new DoubleMatrix(numage, numlen, 0.0)); modelDistribution[timeid].resize(new DoubleMatrix(numage, numlen, 0.0)); } } } else keepdata = 0; if (keepdata == 1) { //survey distribution data is required, so store it count++; (*obsDistribution[timeid][areaid])[ageid][lenid] = tmpnumber; } else reject++; //count number of rejected data points read from file } AAT.addActions(Years, Steps, TimeInfo); if (count == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - found no data in the data file for", this->getName()); if (Steps.Size() > 0) { //JMB - to be comparable, this should only take place on the same step in each year step = Steps[0]; timeid = 0; for (i = 1; i < Steps.Size(); i++) if (Steps[i] != step) timeid++; if (timeid != 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - differing timesteps for", this->getName()); } if (reject != 0) handle.logMessage(LOGMESSAGE, "Discarded invalid surveydistribution data - number of invalid entries", reject); handle.logMessage(LOGMESSAGE, "Read surveydistribution data file - number of entries", count); } SurveyDistribution::~SurveyDistribution() { int i, j; for (i = 0; i < stocknames.Size(); i++) delete[] stocknames[i]; for (i = 0; i < areaindex.Size(); i++) delete[] areaindex[i]; for (i = 0; i < ageindex.Size(); i++) delete[] ageindex[i]; for (i = 0; i < lenindex.Size(); i++) delete[] lenindex[i]; if (suitfunction != NULL) { delete suitfunction; suitfunction = NULL; } for (i = 0; i < obsDistribution.Nrow(); i++) { for (j = 0; j < obsDistribution.Ncol(i); j++) { delete obsDistribution[i][j]; delete modelDistribution[i][j]; } } if (aggregator != 0) delete aggregator; delete LgrpDiv; delete[] fittype; delete[] liketype; } void SurveyDistribution::Reset(const Keeper* const keeper) { Likelihood::Reset(keeper); if (isZero(weight)) handle.logMessage(LOGWARN, "Warning in surveydistribution - zero weight for", this->getName()); int i, j; for (i = 0; i < modelDistribution.Nrow(); i++) for (j = 0; j < modelDistribution.Ncol(i); j++) (*modelDistribution[i][j]).setToZero(); if (handle.getLogLevel() >= LOGMESSAGE) handle.logMessage(LOGMESSAGE, "Reset surveydistribution component", this->getName()); } void SurveyDistribution::Print(ofstream& outfile) const { int i; outfile << "\nSurvey Distribution " << this->getName() << " - likelihood value " << likelihood << "\n\tFunction " << liketype << "\n\tStock names:"; for (i = 0; i < stocknames.Size(); i++) outfile << sep << stocknames[i]; outfile << endl; aggregator->Print(outfile); outfile.flush(); } void SurveyDistribution::printLikelihood(ofstream& outfile, const TimeClass* const TimeInfo) { if (!AAT.atCurrentTime(TimeInfo)) return; int i, area, age, len; timeindex = -1; for (i = 0; i < Years.Size(); i++) if ((Years[i] == TimeInfo->getYear()) && (Steps[i] == TimeInfo->getStep())) timeindex = i; if (timeindex == -1) handle.logMessage(LOGFAIL, "Error in surveydistribution - invalid timestep"); for (area = 0; area < modelDistribution.Ncol(timeindex); area++) { for (age = 0; age < modelDistribution[timeindex][area]->Nrow(); age++) { for (len = 0; len < modelDistribution[timeindex][area]->Ncol(age); len++) { outfile << setw(lowwidth) << Years[timeindex] << sep << setw(lowwidth) << Steps[timeindex] << sep << setw(printwidth) << areaindex[area] << sep << setw(printwidth) << ageindex[age] << sep << setw(printwidth) << lenindex[len] << sep << setprecision(largeprecision) << setw(largewidth); //JMB crude filter to remove the 'silly' values from the output if ((*modelDistribution[timeindex][area])[age][len] < rathersmall) outfile << 0 << endl; else outfile << (*modelDistribution[timeindex][area])[age][len] << endl; } } } } void SurveyDistribution::setFleetsAndStocks(FleetPtrVector& Fleets, StockPtrVector& Stocks) { int i, j, k, found, minage, maxage; StockPtrVector stocks; for (i = 0; i < stocknames.Size(); i++) { found = 0; for (j = 0; j < Stocks.Size(); j++) { if (strcasecmp(stocknames[i], Stocks[j]->getName()) == 0) { found++; stocks.resize(Stocks[j]); } } if (found == 0) handle.logMessage(LOGFAIL, "Error in surveydistribution - failed to match stock", stocknames[i]); } for (i = 0; i < stocks.Size(); i++) for (j = 0; j < stocks.Size(); j++) if ((strcasecmp(stocks[i]->getName(), stocks[j]->getName()) == 0) && (i != j)) handle.logMessage(LOGFAIL, "Error in surveydistribution - repeated stock", stocks[i]->getName()); //check areas, ages and lengths if (handle.getLogLevel() >= LOGWARN) { for (j = 0; j < areas.Nrow(); j++) { found = 0; for (i = 0; i < stocks.Size(); i++) for (k = 0; k < areas.Ncol(j); k++) if (stocks[i]->isInArea(areas[j][k])) found++; if (found == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - stock not defined on all areas"); } minage = 9999; maxage = 0; for (i = 0; i < ages.Nrow(); i++) { for (j = 0; j < ages.Ncol(i); j++) { minage = min(ages[i][j], minage); maxage = max(ages[i][j], maxage); } } found = 0; for (i = 0; i < stocks.Size(); i++) if (minage >= stocks[i]->minAge()) found++; if (found == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - minimum age less than stock age"); found = 0; for (i = 0; i < stocks.Size(); i++) if (maxage <= stocks[i]->maxAge()) found++; if (found == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - maximum age greater than stock age"); found = 0; for (i = 0; i < stocks.Size(); i++) if (LgrpDiv->maxLength(0) > stocks[i]->getLengthGroupDiv()->minLength()) found++; if (found == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - minimum length group less than stock length"); found = 0; for (i = 0; i < stocks.Size(); i++) if (LgrpDiv->minLength(LgrpDiv->numLengthGroups()) < stocks[i]->getLengthGroupDiv()->maxLength()) found++; if (found == 0) handle.logMessage(LOGWARN, "Warning in surveydistribution - maximum length group greater than stock length"); } aggregator = new StockAggregator(stocks, LgrpDiv, areas, ages); } void SurveyDistribution::calcIndex(const TimeClass* const TimeInfo) { //written by kgf 13/10 98 int area, age, len; if (suitfunction != NULL) { suitfunction->updateConstants(TimeInfo); if ((timeindex == 0) || (suitfunction->didChange(TimeInfo))) { if (suitfunction->usesPredLength()) suitfunction->setPredLength(0.0); for (len = 0; len < LgrpDiv->numLengthGroups(); len++) { if (suitfunction->usesPreyLength()) suitfunction->setPreyLength(LgrpDiv->meanLength(len)); suit[len] = suitfunction->calculate(); } } } parameters.Update(TimeInfo); switch (fitnumber) { case 1: for (area = 0; area < areas.Nrow(); area++) for (age = (*alptr)[area].minAge(); age <= (*alptr)[area].maxAge(); age++) for (len = (*alptr)[area].minLength(age); len < (*alptr)[area].maxLength(age); len++) (*modelDistribution[timeindex][area])[age][len] = parameters[0] * suit[len] * (((*alptr)[area][age][len]).N + parameters[1]); break; case 2: for (area = 0; area < areas.Nrow(); area++) for (age = (*alptr)[area].minAge(); age <= (*alptr)[area].maxAge(); age++) for (len = (*alptr)[area].minLength(age); len < (*alptr)[area].maxLength(age); len++) (*modelDistribution[timeindex][area])[age][len] = parameters[0] * suit[len] * pow(((*alptr)[area][age][len]).N, parameters[1]); break; default: handle.logMessage(LOGWARN, "Warning in surveydistribution - unrecognised fittype", fittype); break; } } void SurveyDistribution::addLikelihood(const TimeClass* const TimeInfo) { if ((!(AAT.atCurrentTime(TimeInfo))) || (isZero(weight))) return; int i; timeindex = -1; for (i = 0; i < Years.Size(); i++) if ((Years[i] == TimeInfo->getYear()) && (Steps[i] == TimeInfo->getStep())) timeindex = i; if (timeindex == -1) handle.logMessage(LOGFAIL, "Error in surveydistribution - invalid timestep"); double l = 0.0; aggregator->Sum(); if (handle.getLogLevel() >= LOGMESSAGE) handle.logMessage(LOGMESSAGE, "Calculating likelihood score for surveydistribution component", this->getName()); alptr = &aggregator->getSum(); this->calcIndex(TimeInfo); switch (likenumber) { case 1: l = calcLikPearson(); break; case 2: l = calcLikMultinomial(); break; case 3: l = calcLikGamma(); break; case 4: l = calcLikLog(); break; default: handle.logMessage(LOGWARN, "Warning in surveydistribution - unrecognised likelihoodtype", liketype); break; } if (handle.getLogLevel() >= LOGMESSAGE) handle.logMessage(LOGMESSAGE, "The likelihood score for this component on this timestep is", l); likelihood += l; } double SurveyDistribution::calcLikMultinomial() { //written by kgf 30/10 98 //Multinomial formula from H J Skaug double temp, total, obstotal, modtotal; int area, age, len; total = 0.0; for (area = 0; area < areas.Nrow(); area++) { temp = 0.0; obstotal = 0.0; modtotal = 0.0; for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) { for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) { temp -= (*obsDistribution[timeindex][area])[age][len] * log(((*modelDistribution[timeindex][area])[age][len]) + epsilon); obstotal += (*obsDistribution[timeindex][area])[age][len]; modtotal += ((*modelDistribution[timeindex][area])[age][len] + epsilon); } } if ((modtotal < verysmall) && (!(isZero(obstotal)))) { likelihoodValues[timeindex][area] = 0.0; } else { temp /= obstotal; temp += log(modtotal); likelihoodValues[timeindex][area] = temp; } total += likelihoodValues[timeindex][area]; } return total; } double SurveyDistribution::calcLikPearson() { //written by kgf 13/10 98 double temp, total, diff; int area, age, len; total = 0.0; for (area = 0; area < areas.Nrow(); area++) { temp = 0.0; for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) { for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) { diff = ((*modelDistribution[timeindex][area])[age][len] - (*obsDistribution[timeindex][area])[age][len]); diff *= diff; diff /= ((*modelDistribution[timeindex][area])[age][len] + epsilon); temp += diff; } } likelihoodValues[timeindex][area] = temp; total += likelihoodValues[timeindex][area]; } return total; } double SurveyDistribution::calcLikGamma() { //written by kgf 24/5 00 //The gamma likelihood function as described by //Hans J Skaug 15/3 00, at present without internal weighting. double total, temp; int area, age, len; total = 0.0; for (area = 0; area < areas.Nrow(); area++) { temp = 0.0; for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) temp += (((*obsDistribution[timeindex][area])[age][len] / ((*modelDistribution[timeindex][area])[age][len] + epsilon)) + log((*modelDistribution[timeindex][area])[age][len] + epsilon)); likelihoodValues[timeindex][area] = temp; total += likelihoodValues[timeindex][area]; } return total; } double SurveyDistribution::calcLikLog() { //corrected by kgf 27/11 98 to sum first and then take the logarithm double total, obstotal, modtotal; int area, age, len; total = 0.0; for (area = 0; area < areas.Nrow(); area++) { obstotal = 0.0; modtotal = 0.0; for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) { for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) { modtotal += (*modelDistribution[timeindex][area])[age][len]; obstotal += (*obsDistribution[timeindex][area])[age][len]; } } if (!(isZero(modtotal))) likelihoodValues[timeindex][area] = (log(obstotal / modtotal) * log(obstotal / modtotal)); else likelihoodValues[timeindex][area] = verybig; total += likelihoodValues[timeindex][area]; } return total; } void SurveyDistribution::printSummary(ofstream& outfile) { int year, area; for (year = 0; year < likelihoodValues.Nrow(); year++) { for (area = 0; area < likelihoodValues.Ncol(year); area++) { outfile << setw(lowwidth) << Years[year] << sep << setw(lowwidth) << Steps[year] << sep << setw(printwidth) << areaindex[area] << sep << setw(largewidth) << this->getName() << sep << setprecision(smallprecision) << setw(smallwidth) << weight << sep << setprecision(largeprecision) << setw(largewidth) << likelihoodValues[year][area] << endl; } } outfile.flush(); }