Log In | Get Help   
Home My Page Projects Code Snippets Project Openings Mareframe
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files
[mareframe] View of /trunk/gadget/surveydistribution.cc
[mareframe] / trunk / gadget / surveydistribution.cc Repository:
ViewVC logotype

View of /trunk/gadget/surveydistribution.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (download) (annotate)
Mon Feb 10 17:09:07 2014 UTC (10 years, 4 months ago) by agomez
File size: 21426 byte(s)
Initial version based on Gadget 2.2.00
#include "surveydistribution.h"
#include "readfunc.h"
#include "readword.h"
#include "readaggregation.h"
#include "errorhandler.h"
#include "mathfunc.h"
#include "areatime.h"
#include "stock.h"
#include "suitfuncptrvector.h"
#include "gadget.h"
#include "global.h"

SurveyDistribution::SurveyDistribution(CommentStream& infile, const AreaClass* const Area,
  const TimeClass* const TimeInfo, Keeper* const keeper, double weight, const char* name)
  : Likelihood(SURVEYDISTRIBUTIONLIKELIHOOD, weight, name), alptr(0) {

  int i, j;
  char text[MaxStrLength];
  strncpy(text, "", MaxStrLength);
  int numarea = 0, numage = 0, numlen = 0;

  char datafilename[MaxStrLength];
  char aggfilename[MaxStrLength];
  strncpy(datafilename, "", MaxStrLength);
  strncpy(aggfilename, "", MaxStrLength);
  ifstream datafile;
  CommentStream subdata(datafile);
  readWordAndValue(infile, "datafile", datafilename);

  timeindex = 0;
  fittype = new char[MaxStrLength];
  strncpy(fittype, "", MaxStrLength);
  liketype = new char[MaxStrLength];
  strncpy(liketype, "", MaxStrLength);

  //read in area aggregation from file
  readWordAndValue(infile, "areaaggfile", aggfilename);
  datafile.open(aggfilename, ios::in);
  handle.checkIfFailure(datafile, aggfilename);
  handle.Open(aggfilename);
  numarea = readAggregation(subdata, areas, areaindex);
  handle.Close();
  datafile.close();
  datafile.clear();

  //Must change from outer areas to inner areas.
  for (i = 0; i < areas.Nrow(); i++)
    for (j = 0; j < areas.Ncol(i); j++)
      areas[i][j] = Area->getInnerArea(areas[i][j]);

  //read in length aggregation from file
  readWordAndValue(infile, "lenaggfile", aggfilename);
  datafile.open(aggfilename, ios::in);
  handle.checkIfFailure(datafile, aggfilename);
  handle.Open(aggfilename);
  numlen = readLengthAggregation(subdata, lengths, lenindex);
  handle.Close();
  datafile.close();
  datafile.clear();

  LgrpDiv = new LengthGroupDivision(lengths);
  if (LgrpDiv->Error())
    handle.logMessage(LOGFAIL, "Error in surveydistribution - failed to create length group");

  //read in age aggregation from file
  readWordAndValue(infile, "ageaggfile", aggfilename);
  datafile.open(aggfilename, ios::in);
  handle.checkIfFailure(datafile, aggfilename);
  handle.Open(aggfilename);
  numage = readAggregation(subdata, ages, ageindex);
  handle.Close();
  datafile.close();
  datafile.clear();

  //read in the stocknames
  i = 0;
  infile >> text >> ws;
  if (strcasecmp(text, "stocknames") != 0)
    handle.logFileUnexpected(LOGFAIL, "stocknames", text);
  infile >> text;
  while (!infile.eof() && (strcasecmp(text, "fittype") != 0)) {
    infile >> ws;
    stocknames.resize(new char[strlen(text) + 1]);
    strcpy(stocknames[i++], text);
    infile >> text;
  }
  if (stocknames.Size() == 0)
    handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - failed to read stocks");
  handle.logMessage(LOGMESSAGE, "Read stock data - number of stocks", stocknames.Size());

  infile >> fittype >> ws;
  fitnumber = 0;
  if (strcasecmp(fittype, "linearfit") == 0) {
    fitnumber = 1;
  } else if (strcasecmp(fittype, "powerfit") == 0) {
    fitnumber = 2;
  } else
    handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised fittype", fittype);

  parameters.resize(2, keeper);
  infile >> text >> ws;
  if (strcasecmp(text, "parameters") != 0)
    handle.logFileUnexpected(LOGFAIL, "parameters", text);
  parameters.read(infile, TimeInfo, keeper);

  suit.resize(LgrpDiv->numLengthGroups(), 0.0);
  infile >> text >> ws;
  if ((strcasecmp(text, "function") == 0)) {
    //read suitability function
    infile >> text >> ws;
    SuitFuncPtrVector tempsuitfunc;
    tempsuitfunc.readSuitFunction(infile, text, TimeInfo, keeper);
    suitfunction = tempsuitfunc[0];
    if (suitfunction->usesPredLength())
      suitfunction->setPredLength(0.0);

    for (i = 0; i < LgrpDiv->numLengthGroups(); i++) {
      if (suitfunction->usesPreyLength())
        suitfunction->setPreyLength(LgrpDiv->meanLength(i));

      suit[i] = suitfunction->calculate();
    }

  } else if (strcasecmp(text, "suitfile") == 0) {
    //read values from file
    for (i = 0; i < LgrpDiv->numLengthGroups(); i++)
      infile >> suit[i];

  } else {
    handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised suitability", text);
  }

  //JMB - changed to make the reading of epsilon optional
  infile >> ws;
  char c = infile.peek();
  if ((c == 'e') || (c == 'E'))
    readWordAndVariable(infile, "epsilon", epsilon);
  else
    epsilon = 1.0;

  if (epsilon < verysmall) {
    handle.logFileMessage(LOGWARN, "epsilon should be a positive integer - set to default value 1");
    epsilon = 1.0;
  }

  readWordAndValue(infile, "likelihoodtype", liketype);
  likenumber = 0;
  if (strcasecmp(liketype, "pearson") == 0)
    likenumber = 1;
  else if (strcasecmp(liketype, "multinomial") == 0)
    likenumber = 2;
  else if (strcasecmp(liketype, "gamma") == 0)
    likenumber = 3;
  else if (strcasecmp(liketype, "log") == 0)
    likenumber = 4;
  else
    handle.logFileMessage(LOGFAIL, "\nError in surveydistribution - unrecognised likelihoodtype", liketype);

  //read the survey distribution data from the datafile
  datafile.open(datafilename, ios::in);
  handle.checkIfFailure(datafile, datafilename);
  handle.Open(datafilename);
  readDistributionData(subdata, TimeInfo, numarea, numage, numlen);
  handle.Close();
  datafile.close();
  datafile.clear();

  //prepare for next likelihood component
  infile >> ws;
  if (!infile.eof()) {
    infile >> text >> ws;
    if (strcasecmp(text, "[component]") != 0)
      handle.logFileUnexpected(LOGFAIL, "[component]", text);
  }
}

void SurveyDistribution::readDistributionData(CommentStream& infile,
  const TimeClass* TimeInfo, int numarea, int numage, int numlen) {

  double tmpnumber;
  char tmparea[MaxStrLength], tmpage[MaxStrLength], tmplen[MaxStrLength];
  strncpy(tmparea, "", MaxStrLength);
  strncpy(tmpage, "", MaxStrLength);
  strncpy(tmplen, "", MaxStrLength);
  int keepdata, timeid, areaid, ageid, lenid;
  int i, year, step, count, reject;

  //Check the number of columns in the inputfile
  if (countColumns(infile) != 6)
    handle.logFileMessage(LOGFAIL, "wrong number of columns in inputfile - should be 6");

  year = step = count = reject = 0;
  while (!infile.eof()) {
    keepdata = 1;
    infile >> year >> step >> tmparea >> tmpage >> tmplen >> tmpnumber >> ws;

    //crude check to see if something has gone wrong and avoid infinite loops
    if (strlen(tmparea) == 0)
      handle.logFileMessage(LOGFAIL, "failed to read data from file");

    //if tmparea is in areaindex keep data, else dont keep the data
    areaid = -1;
    for (i = 0; i < areaindex.Size(); i++)
      if (strcasecmp(areaindex[i], tmparea) == 0)
        areaid = i;

    if (areaid == -1)
      keepdata = 0;

    //if tmpage is in ageindex keep data, else dont keep the data
    ageid = -1;
    for (i = 0; i < ageindex.Size(); i++)
      if (strcasecmp(ageindex[i], tmpage) == 0)
        ageid = i;

    if (ageid == -1)
      keepdata = 0;

    //if tmplen is in lenindex keep data, else dont keep the data
    lenid = -1;
    for (i = 0; i < lenindex.Size(); i++)
      if (strcasecmp(lenindex[i], tmplen) == 0)
        lenid = i;

    if (lenid == -1)
      keepdata = 0;

    //check if the year and step are in the simulation
    timeid = -1;
    if ((TimeInfo->isWithinPeriod(year, step)) && (keepdata == 1)) {
      for (i = 0; i < Years.Size(); i++)
        if ((Years[i] == year) && (Steps[i] == step))
          timeid = i;

      if (timeid == -1) {
        Years.resize(1, year);
        Steps.resize(1, step);
        timeid = (Years.Size() - 1);

        obsDistribution.resize();
        modelDistribution.resize();
        likelihoodValues.AddRows(1, numarea, 0.0);
        for (i = 0; i < numarea; i++) {
          obsDistribution[timeid].resize(new DoubleMatrix(numage, numlen, 0.0));
          modelDistribution[timeid].resize(new DoubleMatrix(numage, numlen, 0.0));
        }
      }

    } else
      keepdata = 0;


    if (keepdata == 1) {
      //survey distribution data is required, so store it
      count++;
      (*obsDistribution[timeid][areaid])[ageid][lenid] = tmpnumber;
    } else
      reject++;  //count number of rejected data points read from file
  }

  AAT.addActions(Years, Steps, TimeInfo);
  if (count == 0)
    handle.logMessage(LOGWARN, "Warning in surveydistribution - found no data in the data file for", this->getName());

  if (Steps.Size() > 0) {
    //JMB - to be comparable, this should only take place on the same step in each year
    step = Steps[0];
    timeid = 0;
    for (i = 1; i < Steps.Size(); i++)
      if (Steps[i] != step)
        timeid++;

    if (timeid != 0)
      handle.logMessage(LOGWARN, "Warning in surveydistribution - differing timesteps for", this->getName());
  }

  if (reject != 0)
    handle.logMessage(LOGMESSAGE, "Discarded invalid surveydistribution data - number of invalid entries", reject);
  handle.logMessage(LOGMESSAGE, "Read surveydistribution data file - number of entries", count);
}

SurveyDistribution::~SurveyDistribution() {
  int i, j;
  for (i = 0; i < stocknames.Size(); i++)
    delete[] stocknames[i];
  for (i = 0; i < areaindex.Size(); i++)
    delete[] areaindex[i];
  for (i = 0; i < ageindex.Size(); i++)
    delete[] ageindex[i];
  for (i = 0; i < lenindex.Size(); i++)
    delete[] lenindex[i];

  if (suitfunction != NULL) {
    delete suitfunction;
    suitfunction = NULL;
  }
  for (i = 0; i < obsDistribution.Nrow(); i++) {
    for (j = 0; j < obsDistribution.Ncol(i); j++) {
      delete obsDistribution[i][j];
      delete modelDistribution[i][j];
    }
  }

  if (aggregator != 0)
    delete aggregator;
  delete LgrpDiv;
  delete[] fittype;
  delete[] liketype;
}

void SurveyDistribution::Reset(const Keeper* const keeper) {
  Likelihood::Reset(keeper);
  if (isZero(weight))
    handle.logMessage(LOGWARN, "Warning in surveydistribution - zero weight for", this->getName());
  int i, j;
  for (i = 0; i < modelDistribution.Nrow(); i++)
    for (j = 0; j < modelDistribution.Ncol(i); j++)
      (*modelDistribution[i][j]).setToZero();
  if (handle.getLogLevel() >= LOGMESSAGE)
    handle.logMessage(LOGMESSAGE, "Reset surveydistribution component", this->getName());
}

void SurveyDistribution::Print(ofstream& outfile) const {
  int i;
  outfile << "\nSurvey Distribution " << this->getName() << " - likelihood value " << likelihood
    << "\n\tFunction " << liketype << "\n\tStock names:";
  for (i = 0; i < stocknames.Size(); i++)
    outfile << sep << stocknames[i];
  outfile << endl;
  aggregator->Print(outfile);
  outfile.flush();
}

void SurveyDistribution::printLikelihood(ofstream& outfile, const TimeClass* const TimeInfo) {

  if (!AAT.atCurrentTime(TimeInfo))
    return;

  int i, area, age, len;
  timeindex = -1;
  for (i = 0; i < Years.Size(); i++)
    if ((Years[i] == TimeInfo->getYear()) && (Steps[i] == TimeInfo->getStep()))
      timeindex = i;
  if (timeindex == -1)
    handle.logMessage(LOGFAIL, "Error in surveydistribution - invalid timestep");

  for (area = 0; area < modelDistribution.Ncol(timeindex); area++) {
    for (age = 0; age < modelDistribution[timeindex][area]->Nrow(); age++) {
      for (len = 0; len < modelDistribution[timeindex][area]->Ncol(age); len++) {
        outfile << setw(lowwidth) << Years[timeindex] << sep << setw(lowwidth)
          << Steps[timeindex] << sep << setw(printwidth) << areaindex[area] << sep
          << setw(printwidth) << ageindex[age] << sep << setw(printwidth)
          << lenindex[len] << sep << setprecision(largeprecision) << setw(largewidth);

        //JMB crude filter to remove the 'silly' values from the output
        if ((*modelDistribution[timeindex][area])[age][len] < rathersmall)
          outfile << 0 << endl;
        else
          outfile << (*modelDistribution[timeindex][area])[age][len] << endl;
      }
    }
  }
}

void SurveyDistribution::setFleetsAndStocks(FleetPtrVector& Fleets, StockPtrVector& Stocks) {

  int i, j, k, found, minage, maxage;
  StockPtrVector stocks;

  for (i = 0; i < stocknames.Size(); i++) {
    found = 0;
    for (j = 0; j < Stocks.Size(); j++) {
      if (strcasecmp(stocknames[i], Stocks[j]->getName()) == 0) {
        found++;
        stocks.resize(Stocks[j]);
      }
    }
    if (found == 0)
      handle.logMessage(LOGFAIL, "Error in surveydistribution - failed to match stock", stocknames[i]);
  }

  for (i = 0; i < stocks.Size(); i++)
    for (j = 0; j < stocks.Size(); j++)
      if ((strcasecmp(stocks[i]->getName(), stocks[j]->getName()) == 0) && (i != j))
        handle.logMessage(LOGFAIL, "Error in surveydistribution - repeated stock", stocks[i]->getName());

  //check areas, ages and lengths
  if (handle.getLogLevel() >= LOGWARN) {
    for (j = 0; j < areas.Nrow(); j++) {
      found = 0;
      for (i = 0; i < stocks.Size(); i++)
        for (k = 0; k < areas.Ncol(j); k++)
          if (stocks[i]->isInArea(areas[j][k]))
            found++;
      if (found == 0)
        handle.logMessage(LOGWARN, "Warning in surveydistribution - stock not defined on all areas");
    }

    minage = 9999;
    maxage = 0;
    for (i = 0; i < ages.Nrow(); i++) {
      for (j = 0; j < ages.Ncol(i); j++) {
        minage = min(ages[i][j], minage);
        maxage = max(ages[i][j], maxage);
      }
    }

    found = 0;
    for (i = 0; i < stocks.Size(); i++)
      if (minage >= stocks[i]->minAge())
        found++;
    if (found == 0)
      handle.logMessage(LOGWARN, "Warning in surveydistribution - minimum age less than stock age");

    found = 0;
    for (i = 0; i < stocks.Size(); i++)
      if (maxage <= stocks[i]->maxAge())
        found++;
    if (found == 0)
      handle.logMessage(LOGWARN, "Warning in surveydistribution - maximum age greater than stock age");

    found = 0;
    for (i = 0; i < stocks.Size(); i++)
      if (LgrpDiv->maxLength(0) > stocks[i]->getLengthGroupDiv()->minLength())
        found++;
    if (found == 0)
      handle.logMessage(LOGWARN, "Warning in surveydistribution - minimum length group less than stock length");

    found = 0;
    for (i = 0; i < stocks.Size(); i++)
      if (LgrpDiv->minLength(LgrpDiv->numLengthGroups()) < stocks[i]->getLengthGroupDiv()->maxLength())
        found++;
    if (found == 0)
      handle.logMessage(LOGWARN, "Warning in surveydistribution - maximum length group greater than stock length");
  }

  aggregator = new StockAggregator(stocks, LgrpDiv, areas, ages);
}

void SurveyDistribution::calcIndex(const TimeClass* const TimeInfo) {
  //written by kgf 13/10 98

  int area, age, len;
  if (suitfunction != NULL) {
    suitfunction->updateConstants(TimeInfo);
    if ((timeindex == 0) || (suitfunction->didChange(TimeInfo))) {
      if (suitfunction->usesPredLength())
        suitfunction->setPredLength(0.0);

      for (len = 0; len < LgrpDiv->numLengthGroups(); len++) {
        if (suitfunction->usesPreyLength())
          suitfunction->setPreyLength(LgrpDiv->meanLength(len));

        suit[len] = suitfunction->calculate();
      }
    }
  }

  parameters.Update(TimeInfo);
  switch (fitnumber) {
    case 1:
      for (area = 0; area < areas.Nrow(); area++)
        for (age = (*alptr)[area].minAge(); age <= (*alptr)[area].maxAge(); age++)
          for (len = (*alptr)[area].minLength(age); len < (*alptr)[area].maxLength(age); len++)
            (*modelDistribution[timeindex][area])[age][len] = parameters[0] * suit[len] * (((*alptr)[area][age][len]).N + parameters[1]);
      break;
    case 2:
      for (area = 0; area < areas.Nrow(); area++)
        for (age = (*alptr)[area].minAge(); age <= (*alptr)[area].maxAge(); age++)
          for (len = (*alptr)[area].minLength(age); len < (*alptr)[area].maxLength(age); len++)
            (*modelDistribution[timeindex][area])[age][len] = parameters[0] * suit[len] * pow(((*alptr)[area][age][len]).N, parameters[1]);
      break;
    default:
      handle.logMessage(LOGWARN, "Warning in surveydistribution - unrecognised fittype", fittype);
      break;
  }
}

void SurveyDistribution::addLikelihood(const TimeClass* const TimeInfo) {

  if ((!(AAT.atCurrentTime(TimeInfo))) || (isZero(weight)))
    return;

  int i;
  timeindex = -1;
  for (i = 0; i < Years.Size(); i++)
    if ((Years[i] == TimeInfo->getYear()) && (Steps[i] == TimeInfo->getStep()))
      timeindex = i;
  if (timeindex == -1)
    handle.logMessage(LOGFAIL, "Error in surveydistribution - invalid timestep");

  double l = 0.0;
  aggregator->Sum();
  if (handle.getLogLevel() >= LOGMESSAGE)
    handle.logMessage(LOGMESSAGE, "Calculating likelihood score for surveydistribution component", this->getName());

  alptr = &aggregator->getSum();
  this->calcIndex(TimeInfo);
  switch (likenumber) {
    case 1:
      l = calcLikPearson();
      break;
    case 2:
      l = calcLikMultinomial();
      break;
    case 3:
      l = calcLikGamma();
      break;
    case 4:
      l = calcLikLog();
      break;
    default:
      handle.logMessage(LOGWARN, "Warning in surveydistribution - unrecognised likelihoodtype", liketype);
      break;
  }

  if (handle.getLogLevel() >= LOGMESSAGE)
    handle.logMessage(LOGMESSAGE, "The likelihood score for this component on this timestep is", l);
  likelihood += l;
}

double SurveyDistribution::calcLikMultinomial() {
  //written by kgf 30/10 98
  //Multinomial formula from H J Skaug

  double temp, total, obstotal, modtotal;
  int area, age, len;

  total = 0.0;
  for (area = 0; area < areas.Nrow(); area++) {
    temp = 0.0;
    obstotal = 0.0;
    modtotal = 0.0;
    for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) {
      for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) {
        temp -= (*obsDistribution[timeindex][area])[age][len] *
                 log(((*modelDistribution[timeindex][area])[age][len]) + epsilon);
        obstotal += (*obsDistribution[timeindex][area])[age][len];
        modtotal += ((*modelDistribution[timeindex][area])[age][len] + epsilon);
      }
    }

    if ((modtotal < verysmall) && (!(isZero(obstotal)))) {
      likelihoodValues[timeindex][area] = 0.0;
    } else {
      temp /= obstotal;
      temp += log(modtotal);
      likelihoodValues[timeindex][area] = temp;
    }
    total += likelihoodValues[timeindex][area];
  }
  return total;
}

double SurveyDistribution::calcLikPearson() {
  //written by kgf 13/10 98

  double temp, total, diff;
  int area, age, len;

  total = 0.0;
  for (area = 0; area < areas.Nrow(); area++) {
    temp = 0.0;
    for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) {
      for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) {
        diff = ((*modelDistribution[timeindex][area])[age][len] - (*obsDistribution[timeindex][area])[age][len]);
        diff *= diff;
        diff /= ((*modelDistribution[timeindex][area])[age][len] + epsilon);
        temp += diff;
      }
    }
    likelihoodValues[timeindex][area] = temp;
    total += likelihoodValues[timeindex][area];
  }
  return total;
}

double SurveyDistribution::calcLikGamma() {
  //written by kgf 24/5 00
  //The gamma likelihood function as described by
  //Hans J Skaug 15/3 00, at present without internal weighting.

  double total, temp;
  int area, age, len;

  total = 0.0;
  for (area = 0; area < areas.Nrow(); area++) {
    temp = 0.0;
    for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++)
      for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++)
        temp += (((*obsDistribution[timeindex][area])[age][len] /
                 ((*modelDistribution[timeindex][area])[age][len] + epsilon)) +
                 log((*modelDistribution[timeindex][area])[age][len] + epsilon));

    likelihoodValues[timeindex][area] = temp;
    total += likelihoodValues[timeindex][area];
  }
  return total;
}

double SurveyDistribution::calcLikLog() {
  //corrected by kgf 27/11 98 to sum first and then take the logarithm
  double total, obstotal, modtotal;
  int area, age, len;

  total = 0.0;
  for (area = 0; area < areas.Nrow(); area++) {
    obstotal = 0.0;
    modtotal = 0.0;
    for (age = 0; age < (*obsDistribution[timeindex][area]).Nrow(); age++) {
      for (len = 0; len < (*obsDistribution[timeindex][area]).Ncol(age); len++) {
        modtotal += (*modelDistribution[timeindex][area])[age][len];
        obstotal += (*obsDistribution[timeindex][area])[age][len];
      }
    }

    if (!(isZero(modtotal)))
      likelihoodValues[timeindex][area] = (log(obstotal / modtotal) * log(obstotal / modtotal));
    else
      likelihoodValues[timeindex][area] = verybig;

    total += likelihoodValues[timeindex][area];
  }
  return total;
}

void SurveyDistribution::printSummary(ofstream& outfile) {
  int year, area;

  for (year = 0; year < likelihoodValues.Nrow(); year++) {
    for (area = 0; area < likelihoodValues.Ncol(year); area++) {
      outfile << setw(lowwidth) << Years[year] << sep << setw(lowwidth)
        << Steps[year] << sep << setw(printwidth) << areaindex[area] << sep
        << setw(largewidth) << this->getName() << sep << setprecision(smallprecision)
        << setw(smallwidth) << weight << sep << setprecision(largeprecision)
        << setw(largewidth) << likelihoodValues[year][area] << endl;
    }
  }
  outfile.flush();
}

root@forge.cesga.es
ViewVC Help
Powered by ViewVC 1.0.0  

Powered By FusionForge