Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
weka.classifiers.timeseries.WekaForecaster Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* WekaForecaster.java
* Copyright (C) 2010-2016 University of Waikato, Hamilton, New Zealand
*/
package weka.classifiers.timeseries;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.evaluation.NumericPrediction;
import weka.classifiers.functions.LinearRegression;
import weka.classifiers.timeseries.core.*;
import weka.filters.supervised.attribute.TSLagMaker;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.logging.Logger;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.RemoveType;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.Vector;
/**
* Class that implements time series forecasting using a Weka regression scheme.
* Makes use of the TSLagMaker class to handle all lagged attribute creation,
* periodic attributes etc.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 52593 $
*/
public class WekaForecaster extends AbstractForecaster implements TSLagUser,
ConfidenceIntervalForecaster, OverlayForecaster, IncrementallyPrimeable,
OptionHandler, Serializable {
/** For serialization */
private static final long serialVersionUID = 5562710925011828590L;
/** The format of the original incoming instances */
protected Instances m_originalHeader;
/**
* A temporary header used when updating base learners that implement
* PrimingDataLearner
*/
protected Instances m_tempHeader;
/** A copy of the input data provided to primeForecaster() */
protected transient Instances m_primedInput;
/** The format of the transformed data */
protected Instances m_transformedHeader;
/** The base regression scheme to use */
protected Classifier m_forecaster = new LinearRegression();
/** The individual forecasters for each target */
protected List m_singleTargetForecasters;
/** True if the forecaster has been built */
protected boolean m_modelBuilt = false;
/** True if an artificial time index has been added to the data */
protected boolean m_useArtificialTimeIndex = false;
/**
* The estimator used for calculating confidence limits.
*/
protected ErrorBasedConfidenceIntervalEstimator m_confidenceLimitEstimator;
/**
* Number of steps ahead to calculate confidence limits for (0 = don't
* calculate confidence limits
*/
protected int m_calculateConfLimitsSteps = 0;
/** Confidence level to compute confidence limits at */
protected double m_confidenceLevel = 0.95;
/**
* For removing any date attributes (TSLagMaker will remap date timestamps to
* numeric)
*/
protected RemoveType m_dateRemover;
/**
* Holds a list of training instance indexes that contained missing target
* values that were replaced via interpolation
*/
protected List m_missingTargetList;
/**
* Holds a list of training instance indexes that contained missing date
* values (if a date time stamp is being used)
*/
protected List m_missingTimeStampList;
protected List m_missingTimeStampRows;
/**
* Logging object
*/
protected Logger m_log;
/** The lag maker to use */
TSLagMaker m_lagMaker = new TSLagMaker();
// used by the incremental method when detecting missing values in
// targets/date
private transient Instance m_previousPrimeInstance = null;
private transient Instances m_missingBuffer = null;
private transient boolean m_hadLeadingMissingPrime = false;
private transient boolean m_first = false;
private transient boolean m_atLeastOneNonMissingTimeStamp = false;
/**
* Main method for running this class from the command line
*
* @param args general and scheme-specific command line arguments
*/
public static void main(String[] args) {
try {
/*
* Instances train = new Instances(new BufferedReader(new
* FileReader(args[0]))); WekaForecaster wf = new WekaForecaster();
* ArrayList fieldsToForecast = new ArrayList();
* fieldsToForecast.add(args[1]);
* wf.setFieldsToForecast(fieldsToForecast);
* wf.setPrimaryPeriodicFieldName(args[2]); Instances trans =
* wf.getTransformedData(train); System.out.println(trans);
*/
WekaForecaster fs = new WekaForecaster();
fs.runForecaster(fs, args);
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Check whether the base learner requires special serialization
*
* @return true if base learner requires special serialization, false otherwise
*/
public boolean baseModelHasSerializer() {
return m_forecaster instanceof BaseModelSerializer;
}
/**
* Save underlying classifier
*
* @param filepath the path of the file to save the base model to
* @throws Exception
*/
public void saveBaseModel(String filepath) throws Exception {
if (baseModelHasSerializer()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((BaseModelSerializer) m_singleTargetForecasters.get(i).getWrappedClassifier()).serializeModel(filepath + ".base" + i);
}
}
/**
* Load serialized classifier
*
* @param filepath the path of the file to load the base model from
* @throws Exception
*/
public void loadBaseModel(String filepath) throws Exception {
if (baseModelHasSerializer()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((BaseModelSerializer) m_singleTargetForecasters.get(i).getWrappedClassifier()).loadSerializedModel(filepath + ".base" + i);
}
}
/**
* Serialize model state
*
* @param filepath the path of the file to save the model state to
* @throws Exception
*/
public void serializeState(String filepath) throws Exception {
if (usesState()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier()).serializeState(filepath + ".state" + i);
}
}
/**
* Load serialized model state
*
* @param filepath the path of the file to save the model state from
* @throws Exception
*/
public void loadSerializedState(String filepath) throws Exception {
if (usesState()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier()).loadSerializedState(filepath + ".state" + i);
}
}
/**
* Check whether the base learner requires operations regarding state
*
* @return true if base learner uses state-based predictions, false otherwise
*/
public boolean usesState() {
return m_forecaster instanceof StateDependentPredictor;
}
/**
* Reset model state.
*/
public void clearPreviousState() {
if (usesState()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier()).clearPreviousState();
}
}
/**
* Load state into model.
*/
public void setPreviousState(List previousState) {
if (usesState()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier()).setPreviousState(previousState.get(i));
}
}
/**
* Get the last set state of the model.
*
* @return the state of the model to be used in next prediction
*/
public List getPreviousState() {
List state = new ArrayList<>();
if (usesState()) {
for (int i = 0; i < m_singleTargetForecasters.size(); i++)
state.add(i, ((StateDependentPredictor) m_singleTargetForecasters.get(i).getWrappedClassifier()).getPreviousState());
}
return state;
}
/**
* Provides a short name that describes the underlying algorithm in some way.
*
* @return a short description of this forecaster.
*/
@Override
public String getAlgorithmName() {
if (m_forecaster != null) {
String spec = getForecasterSpec();
spec = spec.replace("weka.classifiers.", "");
spec = spec.replace("functions.", "");
spec = spec.replace("bayes.", "");
spec = spec.replace("rules.", "");
spec = spec.replace("trees.", "");
spec = spec.replace("meta.", "");
spec = spec.replace("lazy.", "");
spec = spec.replace("supportVector.", "");
return spec;
}
return "";
}
/**
* Get the TSLagMaker that we are using. All options pertaining to lag
* creation, periodic attributes etc. are set via the lag maker.
*
* @return the TSLagMaker that we are using.
*/
@Override
public TSLagMaker getTSLagMaker() {
return m_lagMaker;
}
/**
* Set the TSLagMaker to use. All options pertaining to lag creation, periodic
* attributes etc. are set via the lag maker.
*
* @param lagMaker the TSLagMaker to use.
*/
@Override
public void setTSLagMaker(TSLagMaker lagMaker) {
m_lagMaker = lagMaker;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration listOptions() {
Vector newVector = new Vector ();
newVector.add(new Option("\tSet the fields to forecast.", "F", 1,
"-F "));
newVector.add(new Option("\tSet the fields to be considered "
+ "as overlay data.", "overlay", 1,
"-overlay "));
newVector.add(new Option("\tSet the minimum lag length to generate."
+ "\n\t(default = 1)", "L", 1, "-L "));
newVector.add(new Option("\tSet the maximum lag length to generate."
+ "\n\t(default = 12)", "M", 1, "-M "));
newVector.add(new Option("\tRemove leading instances where the values "
+ "of lagged variables are unknown", "trim-leading", 0, "-trim-leading"));
newVector.add(new Option("\tFine tune selection of lags within min and "
+ "max by specifying" + " ranges", "R", 1, "-R "));
newVector.add(new Option("\tAverage consecutive long lags.", "A", 0, "-A"));
newVector.add(new Option("\tAverage those lags longer than this number of"
+ "time steps.\n\tUse in conjuction with -A.\n\t" + "(default = 2)", "B",
1, "-B "));
newVector.add(new Option("\tAverage this many consecutive long lags.\n\t"
+ "Use in conjuction with -B (default = 2)", "C", 1, "-C "));
newVector.add(new Option("\tDon't adjust for trends.", "Z", 0, "-Z"));
newVector.add(new Option("\tDon't include time lag products",
"-no-time-lag-products", 1, "-no-time-lag-products"));
newVector.add(new Option("\tDon't include time powers of time",
"no-powers-of-time", 1, "-no-powers-of-time"));
newVector.add(new Option("\tSpecify the name of the timestamp field", "G",
1, "-G "));
newVector.add(new Option("\tAdjust for variance.", "V", 0, "-V"));
newVector.add(new Option("\tSpecify the primary periodic field, "
+ "\n\tif one exists already in the data "
+ "(e.g. day, month, quarter etc.\n\tIf there is more than"
+ "one such field, choose the one with the finest granularity.\n\t"
+ "This field must be" + "cyclic and declared as nominal.", "periodic",
1, "-periodic "));
newVector.add(new Option(
"\tCalculate confidence limits for predictions\n\t"
+ "(based on errors) for up to, and including, "
+ "the specified\n\tnumber of time steps"
+ "into the future\n\t(default = 0 (don't compute conf. levels)).",
"conf", 1, "-conf "));
newVector.add(new Option(
"\tConfidence level for computing confidence limits.\n\t"
+ "Use in conjunction with -conf.\n\t(default = 0.95).", "P", 1,
"-P "));
newVector.add(new Option("\tSpecify the base regression scheme to use.\n\t"
+ "Supply a fully qualified name, along with options, enclosed in\n\t"
+ "quotes (e.g. \"weka.classifiers.functions.SMOreg -R 0.5\")."
+ "\n\t(default = weka.classifiers.functions.SMOreg)", "W", 1, "-W"));
newVector.add(new Option(
"\tAdd an AM/PM indicator (requires a date timestamp)", "am-pm", 0,
"-am-pm"));
newVector.add(new Option("\tAdd a day of the week field (requres a date"
+ " timestamp)", "day", 0, "-dayofweek"));
newVector.add(new Option("\tAdd a day of the month field (requres a date"
+ " timestamp)", "dayofmonth", 0, "-dayofmonth"));
newVector.add(new Option(
"\tAdd a number of days in the month field (requres a date"
+ " timestamp)", "numdaysinmonth", 0, "-numdaysinmonth"));
newVector.add(new Option(
"\tAdd a weekend indicator (requires a date timestamp)", "weekend", 0,
"-weekend"));
newVector.add(new Option("\tAdd a month field (requires a date timestamp)",
"month", 0, "-month"));
newVector.add(new Option("\tAdd a quarter of the year field ("
+ "requires a date timestamp)", "quarter", 0, "-quarter"));
newVector.add(new Option("\tAdd a custom date-derived boolean field ("
+ "requires a date timestamp).\n\tFormat: \"fieldName="
+ "Test Test|Test Test| ...\n\twhere "
+ "Test = OPERATORyear:month:week-of-yr:week-of-month:"
+ "day-of-yr:day-of-month:day-of-week:hour:min:second\n\te.g."
+ "XmasHoliday=>*:dec:*:*:*:24:*:*:*:* <*:jan:*:*:*:3:*:*:*:*\n\t"
+ "Legal OPERATORs are =,>,<,>=,<=. For = operator only\n\t"
+ "one Test is needed rather than a pair.\n\tThis option may"
+ " be specified more than once on the command line\n\t"
+ "in order to define multiple variables.", "custom", 1, "-custom"));
newVector.add(new Option(
"\tAdd a comma-separated 'skip' list of dates that should not\n\t"
+ "be considered as a time step. Days of the week,\n\t"
+ "months of the year, 'weekend', integers (indicating day of year\n\t"
+ ", hour of day etc.) or specific dates are all valid entries.\n\t"
+ "E.g sat,sun,27-08-2011,28-08-2011", "skip", 1, "-skip"));
return newVector.elements();
}
/**
* Gets the current settings of this Forecaster.
*
* @return an array of strings suitable for passing to setOptions
*/
@Override
public String[] getOptions() {
ArrayList options = new ArrayList();
// List fieldsToForecast = m_lagMaker.getFieldsToLag();
options.add("-F"); // options.add(fieldsToForecast.toString());
options.add(getFieldsToForecast());
if (getOverlayFields() != null && getOverlayFields().length() > 0) {
options.add("-O");
options.add(getOverlayFields());
}
if (m_lagMaker.getRemoveLeadingInstancesWithUnknownLagValues()) {
options.add("-trim-leading");
}
options.add("-L");
options.add("" + m_lagMaker.getMinLag());
options.add("-M");
options.add("" + m_lagMaker.getMaxLag());
if (m_lagMaker.getLagRange().length() > 0) {
options.add("-R");
options.add(m_lagMaker.getLagRange());
}
if (m_lagMaker.getAverageConsecutiveLongLags()) {
options.add("-A");
} else {
options.add("-B");
options.add("" + m_lagMaker.getAverageLagsAfter());
}
options.add("-C");
options.add("" + m_lagMaker.getNumConsecutiveLongLagsToAverage());
if (!m_lagMaker.getAdjustForTrends()) {
options.add("-Z");
}
if (!m_lagMaker.getIncludeTimeLagProducts()) {
options.add("-no-time-lag_products");
}
if (!m_lagMaker.getIncludePowersOfTime()) {
options.add("-no-powers-of-time");
}
if (m_lagMaker.getAdjustForVariance()) {
options.add("-V");
}
if (m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
options.add("-G");
options.add(m_lagMaker.getTimeStampField());
}
if (m_lagMaker.getAddAMIndicator()) {
options.add("-am-pm");
}
if (m_lagMaker.getAddDayOfWeek()) {
options.add("-dayofweek");
}
if (m_lagMaker.getAddDayOfMonth()) {
options.add("-dayofmonth");
}
if (m_lagMaker.getAddWeekendIndicator()) {
options.add("-weekend");
}
if (m_lagMaker.getAddMonthOfYear()) {
options.add("-month");
}
if (m_lagMaker.getAddNumDaysInMonth()) {
options.add("-numdaysinmonth");
}
if (m_lagMaker.getAddQuarterOfYear()) {
options.add("-quarter");
}
Map> customPeriodics =
m_lagMaker.getCustomPeriodics();
if (customPeriodics != null && customPeriodics.keySet().size() > 0) {
for (String name : customPeriodics.keySet()) {
List tests = customPeriodics.get(name);
options.add("-custom");
StringBuffer tempBuff = new StringBuffer();
tempBuff.append("\"");
for (int i = 0; i < tests.size(); i++) {
tempBuff.append(tests.get(i).toString());
if (i < tests.size() - 1) {
tempBuff.append("|");
} else {
tempBuff.append("\"");
}
}
options.add(tempBuff.toString());
}
}
if (m_lagMaker.getSkipEntries() != null
&& m_lagMaker.getSkipEntries().length() > 0) {
options.add("-skip");
options.add(m_lagMaker.getSkipEntries());
}
if (m_lagMaker.getPrimaryPeriodicFieldName() != null
&& m_lagMaker.getPrimaryPeriodicFieldName().length() > 0) {
options.add("-periodic");
options.add(m_lagMaker.getPrimaryPeriodicFieldName());
}
options.add("-conf");
options.add("" + getCalculateConfIntervalsForForecasts());
options.add("-P");
options.add("" + getConfidenceLevel());
options.add("-W");
options.add(getForecasterSpec());
return options.toArray(new String[1]);
}
/**
* Set the options for the forecaster
*
* @param options an array of options
* @throws Exception if unknown options are supplied
*/
@Override
public void setOptions(String[] options) throws Exception {
String fieldsToForecast = Utils.getOption('F', options);
if (fieldsToForecast.length() == 0) {
throw new Exception(
"Must specify the name of at least one field to forecast!");
}
setFieldsToForecast(fieldsToForecast);
String overlayFields = Utils.getOption("overlay", options);
if (overlayFields.length() > 0) {
setOverlayFields(overlayFields);
}
m_lagMaker.setRemoveLeadingInstancesWithUnknownLagValues(Utils.getFlag(
"trim-leading", options));
String minL = Utils.getOption('L', options);
if (minL.length() > 0) {
int mL = Integer.parseInt(minL);
// setMinLag(mL);
m_lagMaker.setMinLag(mL);
if (mL < 1) {
throw new Exception("Minimum lag can't be less than 1!");
}
}
String maxL = Utils.getOption('M', options);
if (maxL.length() > 0) {
int mL = Integer.parseInt(maxL);
// setMaxLag(mL);
m_lagMaker.setMaxLag(mL);
}
if (m_lagMaker.getMaxLag() < m_lagMaker.getMinLag()) {
throw new Exception(
"Can't have the maximum lag set lower than the minimum lag!");
}
String lagRange = Utils.getOption('R', options);
if (lagRange.length() > 0) {
m_lagMaker.setLagRange(lagRange);
}
boolean avLongLags = Utils.getFlag('A', options);
// setAverageConsecutiveLongLags(!dontAv);
m_lagMaker.setAverageConsecutiveLongLags(avLongLags);
String avLongerThan = Utils.getOption('B', options);
if (avLongerThan.length() > 0) {
int avL = Integer.parseInt(avLongerThan);
if (avL < m_lagMaker.getMinLag() || avL > m_lagMaker.getMaxLag()) {
throw new Exception("Average consecutive long lags value can't "
+ "be less than the minimum lag or greater than the "
+ "maximum lag!");
}
// setAverageLagsAfter(avL);
m_lagMaker.setAverageLagsAfter(avL);
}
String consecutiveLongLagS = Utils.getOption('C', options);
if (consecutiveLongLagS.length() > 0) {
int consecutive = Integer.parseInt(consecutiveLongLagS);
if (consecutive < 1
|| consecutive > (m_lagMaker.getMaxLag() - m_lagMaker
.getAverageLagsAfter())) {
throw new Exception("Number of consecutive long lags to average "
+ "must be greater than 0 and less than "
+ (m_lagMaker.getMaxLag() - m_lagMaker.getMinLag()));
}
// setNumConsecutiveLongLagsToAverage(consecutive);
m_lagMaker.setNumConsecutiveLongLagsToAverage(consecutive);
}
boolean dontAdjTrends = Utils.getFlag('Z', options);
// setAdjustForTrends(!dontAdjTrends);
m_lagMaker.setAdjustForTrends(!dontAdjTrends);
boolean noTimeLagProds = Utils.getFlag("no-time-lag-products", options);
m_lagMaker.setIncludeTimeLagProducts(!noTimeLagProds);
boolean noPowersOfTime = Utils.getFlag( "no-powers-of-time", options );
m_lagMaker.setIncludePowersOfTime( !noPowersOfTime );
boolean adjVariance = Utils.getFlag("V", options);
// setAdjustForVariance(!dontAdjVariance);
m_lagMaker.setAdjustForVariance(adjVariance);
String timeStampF = Utils.getOption('G', options);
if (timeStampF.length() > 0) {
m_lagMaker.setTimeStampField(timeStampF);
}
m_lagMaker.setAddAMIndicator(Utils.getFlag("am-pm", options));
m_lagMaker.setAddDayOfWeek(Utils.getFlag("dayofweek", options));
m_lagMaker.setAddWeekendIndicator(Utils.getFlag("weekend", options));
m_lagMaker.setAddMonthOfYear(Utils.getFlag("month", options));
m_lagMaker.setAddQuarterOfYear(Utils.getFlag("quarter", options));
m_lagMaker.setAddDayOfMonth(Utils.getFlag("dayofmonth", options));
m_lagMaker.setAddNumDaysInMonth(Utils.getFlag("numdaysinmonth", options));
// custom date-derived periodic fields
String customPeriodic = Utils.getOption("custom", options);
while (customPeriodic.length() > 0) {
m_lagMaker.addCustomPeriodic(customPeriodic);
customPeriodic = Utils.getOption("custom", options);
}
String primaryPeriodicN = Utils.getOption("periodic", options);
if (primaryPeriodicN.length() > 0) {
m_lagMaker.setPrimaryPeriodicFieldName(primaryPeriodicN);
}
String skipString = Utils.getOption("skip", options);
if (skipString.length() > 0) {
m_lagMaker.setSkipEntries(skipString);
}
String confSteps = Utils.getOption("conf", options);
if (confSteps.length() > 0) {
int numSteps = Integer.parseInt(confSteps);
if (numSteps < 0) {
throw new Exception("Number of steps must be >= 0");
}
setCalculateConfIntervalsForForecasts(numSteps);
}
String confLevel = Utils.getOption('P', options);
if (confLevel.length() > 0) {
double cL = Double.parseDouble(confLevel);
if (cL < 0 || cL > 1) {
throw new Exception("Confidence level must be between 0 and 1.");
}
setConfidenceLevel(cL);
}
String baseClassifierS = Utils.getOption('W', options);
if (baseClassifierS.length() == 0) {
baseClassifierS = "weka.classifiers.functions.SMOreg";
}
String[] classifierSpec = Utils.splitOptions(baseClassifierS);
if (classifierSpec.length == 0) {
throw new Exception("Invalid classifier specification.");
}
String classifierName = classifierSpec[0];
classifierSpec[0] = "";
setBaseForecaster(AbstractClassifier
.forName(classifierName, classifierSpec));
}
/**
* Set the name of the time stamp field
*
* @param name the name of the time stamp attribute
*/
/*
* public void setTimeStampField(String name) {
* m_lagMaker.setTimeStampField(name); }
*/
/**
* Get the name of the time stamp attribute
*
* @return the name of the time stamp attribute or an empty string if none has
* been specified/is in use
*/
/*
* public String getTimeStampField() { return m_lagMaker.getTimeStampField();
* }
*/
/**
* Set whether to include an AM binary indicator attribute.
*
* @param am true if a binary AM indicator attribute is to be generated. Only
* has an effect if a date time stamp is in use.
*/
/*
* public void setAddAMIndicator(boolean am) {
* m_lagMaker.setAddAMIndicator(am); }
*/
/**
* Returns true if an AM binary indicator is to be generated.
*
* @return true if an AM binary indicator is to be generated.
*/
/*
* public boolean getAddAMIndicator() { return m_lagMaker.getAddAMIndicator();
* }
*/
/**
* Set whether to include a day of the week attribute
*
* @param am true if a day of the week attribute is to be generated. Only has
* an effect if a date time stamp is in use.
*/
/*
* public void setAddDayOfWeek(boolean d) { m_lagMaker.setAddDayOfWeek(d); }
*/
/**
* Returns true if a day of the week attribute is to be generated.
*
* @return true if a day of the week attribute is to be generated.
*/
/*
* public boolean getAddDayOfWeek() { return m_lagMaker.getAddDayOfWeek(); }
*/
/**
* Set whether to include a weekend indicator attribute.
*
* @param am true if a binary weekend indicator attribute is to be generated.
* Only has an effect if a date time stamp is in use.
*/
/*
* public void setAddWeekendIndicator(boolean w) {
* m_lagMaker.setAddWeekendIndicator(w); }
*/
/**
* Returns true if a weekend binary indicator attribute is to be generated.
*
* @return true if a weekend binary indicator attribute is to be generated.
*/
/*
* public boolean getAddWeekendIndicator() { return
* m_lagMaker.getAddWeekendIndicator(); }
*/
/**
* Set whether to include a month of the year attribute.
*
* @param am true if a month of the year attribute is to be generated. Only
* has an effect if a date time stamp is in use.
*/
/*
* public void setAddMonthOfYear(boolean m) { m_lagMaker.setAddMonthOfYear(m);
* }
*/
/**
* Returns true if a month of the year attribute is to be generated.
*
* @return true if a month of the year attribute is to be generated.
*/
/*
* public boolean getAddMonthOfYear() { return m_lagMaker.getAddMonthOfYear();
* }
*/
/**
* Set whether to include a quarter of the year attribute.
*
* @param am true if a quarter of the year attribute is to be generated. Only
* has an effect if a date time stamp is in use.
*/
/*
* public void setAddQuarterOfYear(boolean q) {
* m_lagMaker.setAddQuarterOfYear(q); }
*/
/**
* Return true if a quarter of the year attribute is to be generated.
*
* @return if a quarter of the year attribute is to be generated.
*/
/*
* public boolean getAddQuarterOfYear() { return
* m_lagMaker.getAddQuarterOfYear(); }
*/
/**
* Set the name of the field to be considered the primary periodic field (if
* any). This field is one which is not a date-based attribute but is periodic
* and cyclic and declared as nominal. Each distinct value can only be
* succeeded by a single value (so that it is possible to set the appropriate
* values in successive future instances). Any secondary, higher-grained
* periodic fields will automatically be detected once a primary field is
* specified.
*
*
* @param p the name of a primary periodic field (if any)
*/
/*
* public void setPrimaryPeriodicFieldName(String p) { //m_primaryPeriodicName
* = p; m_lagMaker.setPrimaryPeriodicFieldName(p); }
*/
/**
* Get the name of the primary periodic field (if set).
*
* @return the name of the primary periodic field or an empty string if none
* has been set/exists.
*/
/*
* public String getPrimaryPeriodicFieldName() { return
* m_lagMaker.getPrimaryPeriodicFieldName(); }
*/
/**
* Get the specification (scheme name + option setttings) of the underlying
* Weka classifier.
*
* @return the scheme name and options of the underlying Weka classifier
*/
protected String getForecasterSpec() {
Classifier c = getBaseForecaster();
if (c instanceof OptionHandler) {
return c.getClass().getName() + " "
+ Utils.joinOptions(((OptionHandler) c).getOptions());
} else {
return c.getClass().getName();
}
}
/**
* Add a custom date-derived periodic attribute
*
* @param customPeriodic the string definition of the custom date derived
* periodic attribute to add
*/
public void addCustomPeriodic(String customPeriodic) {
m_lagMaker.addCustomPeriodic(customPeriodic);
}
/**
* clear the list of custom date-derived periodic attributes
*/
public void clearCustomPeriodics() {
m_lagMaker.clearCustomPeriodics();
}
/**
* Set the names of the fields/attributes in the data to forecast.
*
* @param fieldsToForecast a list of names of fields to forecast
* @throws Exception if a field(s) can't be found, or if multiple fields are
* specified and this forecaster can't predict multiple fields.
*/
@Override
public void setFieldsToForecast(String fieldsToForecast) throws Exception {
super.setFieldsToForecast(fieldsToForecast);
m_lagMaker.setFieldsToLag(m_fieldsToForecast);
}
/**
* Get a comma-separated list of fields that considered to be overlay fields
*
* @return a list of field names
*/
@Override
public String getOverlayFields() {
String list = "";
List overlayF = m_lagMaker.getOverlayFields();
if (overlayF != null) {
for (String f : overlayF) {
list += (f + ",");
}
list = list.substring(0, list.lastIndexOf(','));
}
return list;
}
/**
* Set the fields to consider as overlay fields
*
* @param overlayFields a comma-separated list of fieldnames
* @throws Exception if there is a problem setting the overlay fields
*/
@Override
public void setOverlayFields(String overlayFields) throws Exception {
if (overlayFields == null) {
m_lagMaker.setOverlayFields(null);
} else {
m_lagMaker.setOverlayFields(AbstractForecaster
.stringToList(overlayFields));
}
}
/**
* Return the number of steps for which confidence intervals will be computed.
*
* @return the number of steps for which confidence intervals will be
* computed.
*/
@Override
public int getCalculateConfIntervalsForForecasts() {
return m_calculateConfLimitsSteps;
}
/**
* Set the number of steps for which to compute confidence intervals for. E.g.
* a value of 5 means that confidence bounds will be computed for 1-step-ahead
* predictions, 2-step-ahead predictions, ..., 5-step-ahead predictions.
*
* @param steps the number of steps for which to compute confidence intervals
* for.
*/
@Override
public void setCalculateConfIntervalsForForecasts(int steps) {
m_calculateConfLimitsSteps = steps;
}
/**
* Returns true if this forecaster is computing confidence limits for some or
* all of its future forecasts (i.e. getCalculateConfIntervalsForForecasts() >
* 0).
*
* @return true if confidence limits will be produced for some or all of its
* future forecasts.
*/
@Override
public boolean isProducingConfidenceIntervals() {
return (getCalculateConfIntervalsForForecasts() > 0);
}
/**
* Get the confidence level in use for computing confidence intervals.
*
* @return the confidence level.
*/
@Override
public double getConfidenceLevel() {
return m_confidenceLevel;
}
/**
* Set the confidence level for confidence intervals.
*
* @param confLevel the confidence level to use.
*/
@Override
public void setConfidenceLevel(double confLevel) {
m_confidenceLevel = confLevel;
}
/**
* Get the base Weka regression scheme being used to make forecasts
*
* @return the base Weka regression scheme
*/
public Classifier getBaseForecaster() {
return m_forecaster;
}
/**
* Set the base Weka regression scheme to use.
*
* @param f the base Weka regression scheme to use for forecasting.
*/
public void setBaseForecaster(Classifier f) {
m_forecaster = f;
}
/**
* Returns true if overlay data has been used to train this forecaster, and
* thus is expected to be supplied for future time steps when making a
* forecast.
*
* @return true if overlay data is expected.
*/
@Override
public boolean isUsingOverlayData() {
if (m_lagMaker.getOverlayFields() != null
&& m_lagMaker.getOverlayFields().size() > 0) {
return true;
}
return false;
}
/**
* Reset the forecaster.
*/
@Override
public void reset() {
m_modelBuilt = false;
/*
* m_varianceAdjusters = null; m_lagMakers = null; m_averagedLagMakers =
* null; m_timeIndexMakers = null; m_timeLagCrossProductMakers = null;
*/
m_lagMaker.reset();
m_dateRemover = null;
m_primedInput = null;
m_confidenceLimitEstimator = null;
m_missingTargetList = new ArrayList();
m_missingTimeStampList = new ArrayList();
m_missingTimeStampRows = new ArrayList();
}
/**
* Builds a new forecasting model using the supplied training data. The
* instances in the data are assumed to be sorted in ascending order of time
* and equally spaced in time. Some methods may not need to implement this
* method and may instead do their work in the primeForecaster method.
*
* @param insts the training instances.
* @param progress an optional varargs parameter supplying progress objects to
* report/log to
* @throws Exception if the model can't be constructed for some reason.
*/
@Override
public void buildForecaster(Instances insts, PrintStream... progress)
throws Exception {
reset();
m_originalHeader = new Instances(insts, 0);
/*
* insts = m_lagMaker.replaceMissing(insts, false, m_missingTargetList,
* m_missingTimeStampList);
*/
insts = new Instances(insts);
insts =
weka.classifiers.timeseries.core.Utils.replaceMissing(insts,
m_fieldsToForecast, m_lagMaker.getTimeStampField(), false,
m_lagMaker.getPeriodicity(), m_lagMaker.getSkipEntries(),
m_missingTargetList, m_missingTimeStampList, m_missingTimeStampRows);
/*
* int classIndex = insts.attribute(m_fieldsToForecast.get(0)).index(); if
* (classIndex < 0) { throw new Exception("Can't find target field '" +
* m_fieldsToForecast + "' in" + "the data!"); }
*/
// setupPeriodicMaps(insts);
for (PrintStream p : progress) {
p.println("Transforming input data...");
}
// Instances trainingData = removeExtraneousAttributes(insts);
Instances trainingData = insts;
trainingData = m_lagMaker.getTransformedData(trainingData);
// System.err.println(trainingData);
m_dateRemover = new RemoveType();
m_dateRemover.setOptions(new String[] { "-T", "date" });
m_dateRemover.setInputFormat(trainingData);
trainingData = Filter.useFilter(trainingData, m_dateRemover);
m_transformedHeader = new Instances(trainingData, 0);
// m_lastHistoricInstance = insts.instance(insts.numInstances() - 1);
m_singleTargetForecasters = new ArrayList();
for (int i = 0; i < m_fieldsToForecast.size(); i++) {
SingleTargetForecaster f = new SingleTargetForecaster();
Classifier c = AbstractClassifier.makeCopy(m_forecaster);
f.setClassifier(c);
f.buildForecaster(trainingData, m_fieldsToForecast.get(i));
m_singleTargetForecasters.add(f);
}
/*
* classIndex = trainingData.attribute(m_fieldsToForecast.get(0)).index();
* trainingData.setClassIndex(classIndex);
* m_forecaster.buildClassifier(trainingData);
*/
m_modelBuilt = true;
/*
* for (int i = 0; i < m_singleTargetForecasters.size(); i++) {
* System.out.println(m_singleTargetForecasters.get(i)); }
*/
if (m_calculateConfLimitsSteps > 0) {
for (PrintStream p : progress) {
p.println("Computing confidence intervals...");
}
// -1 indicates not using an artificial time index
int artificialTimeStart =
(m_lagMaker.isUsingAnArtificialTimeIndex()) ? 1 : -1;
ErrorBasedConfidenceIntervalEstimator e =
new ErrorBasedConfidenceIntervalEstimator();
e.calculateConfidenceOffsets(this, insts, m_lagMaker.getMaxLag(),
artificialTimeStart, m_calculateConfLimitsSteps, m_confidenceLevel,
progress);
m_confidenceLimitEstimator = e;
}
// System.out.println(trainingData);
}
@Override
public String toString() {
if (!m_modelBuilt) {
return "Forecaster has not been built yet!";
}
StringBuffer result = new StringBuffer();
result.append("Transformed training data:\n\n");
for (int i = 0; i < m_transformedHeader.numAttributes(); i++) {
result.append(" " + m_transformedHeader.attribute(i).name())
.append("\n");
}
if (m_missingTimeStampRows != null && m_missingTimeStampRows.size() > 0) {
result
.append("\n--------------------------------------------------------\n"
+ "Instances were inserted in the taining data for the\n"
+ "following time-stamps (target values set by interpolation):\n\n");
for (int i = 0; i < m_missingTimeStampRows.size(); i++) {
if (i == 0) {
result.append(" " + m_missingTimeStampRows.get(i));
} else {
result.append(", " + m_missingTimeStampRows.get(i));
}
}
result
.append("\n--------------------------------------------------------\n");
}
if (m_missingTargetList != null && m_missingTargetList.size() > 0) {
Collections.sort(m_missingTargetList);
result.append("\n---------------------------------------------------\n"
+ "The following training instances had missing values\n"
+ "imputed via interpolation. Check source data as\n"
+ "this may affect forecasting performance:\n\n");
for (int i = 0; i < m_missingTargetList.size(); i++) {
if (i == 0) {
result.append(" " + m_missingTargetList.get(i));
} else if (!m_missingTargetList.get(i).equals(
m_missingTargetList.get(i - 1))) {
result.append("," + m_missingTargetList.get(i));
}
}
result.append("\n---------------------------------------------------\n");
}
if (m_missingTimeStampList != null && m_missingTimeStampList.size() > 0) {
Collections.sort(m_missingTimeStampList);
result
.append("\n--------------------------------------------------------\n"
+ "The following training instances had missing time stamps:\n\n");
for (int i = 0; i < m_missingTimeStampList.size(); i++) {
if (i == 0) {
result.append(" " + m_missingTimeStampList.get(i));
} else {
result.append("," + m_missingTimeStampList.get(i));
}
}
result
.append("\n-------------------------------------------------------\n");
}
// System.out.println(m_transformedHeader);
for (int i = 0; i < m_singleTargetForecasters.size(); i++) {
result.append("\n" + m_singleTargetForecasters.get(i)).append("\n");
}
return result.toString();
}
protected Instance applyFilters(Instance source,
boolean incrementArtificialTime, boolean setAnyPeriodic) throws Exception {
Instance result = source;
/*
* if (m_extraneousAttributeRemover != null) {
* m_extraneousAttributeRemover.input(result); result =
* m_extraneousAttributeRemover.output(); }
*/
result =
m_lagMaker.processInstance(result, incrementArtificialTime,
setAnyPeriodic);
return result;
}
/**
* Supply the (potentially) trained model with enough historical data, up to
* and including the current time point, in order to produce a forecast.
* Instances are assumed to be sorted in ascending order of time and equally
* spaced in time.
*
* @param insts the instances to prime the model with
* @throws Exception if the model can't be primed for some reason.
*/
@Override
public void primeForecaster(Instances insts) throws Exception {
m_primedInput = new Instances(insts);
m_previousPrimeInstance = null; // only used by the incremental method
m_missingBuffer = new Instances(insts, 0);
m_hadLeadingMissingPrime = false;
m_first = true;
m_atLeastOneNonMissingTimeStamp = false;
// m_lastHistoricInstance =
// m_primedInput.instance(m_primedInput.numInstances() - 1);
m_lagMaker.clearLagHistories();
// Does the underlying forecaster learn/update on priming data?
if (m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof PrimingDataLearner) {
if (!(m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof TSLagUser)) {
m_tempHeader = new Instances(insts, 0);
for (int i = 0; i < m_fieldsToForecast.size(); i++) {
PrimingDataLearner l =
(PrimingDataLearner) m_singleTargetForecasters.get( i )
.getWrappedClassifier();
l.reset();
}
}
}
// System.err.println(insts + "\n\n");
for (int i = 0; i < m_primedInput.numInstances(); i++) {
// applyFilters(m_primedInput.instance(i), false, false);
primeForecasterIncremental(m_primedInput.instance(i));
m_first = false;
}
}
/**
* Update the priming information incrementally, i.e. one instance at a time.
* To indicate the start of a new batch of priming data an empty set of
* instances must be passed to TSForecaster.primeForecaster() before the first
* call to primeForecasterIncremental()
*
* @param inst the instance to prime with.
* @throws Exception if something goes wrong.
*/
@Override
public void primeForecasterIncremental(Instance inst) throws Exception {
if (m_primedInput == null) {
throw new Exception("WekaForecaster hasn't been initialized with "
+ "a call to primeForecaster()!!");
}
// Does the underlying forecaster learn/update on priming data?
if (m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof PrimingDataLearner) {
if (!(m_singleTargetForecasters.get(0).getWrappedClassifier() instanceof TSLagUser)) {
// underlying forecaster does not use lagged data, so we can
// just update using the untransformed priming data
for (int i = 0; i < m_fieldsToForecast.size(); i++) {
PrimingDataLearner l =
(PrimingDataLearner) m_singleTargetForecasters.get( i )
.getWrappedClassifier();
Instance toUpdateWith = (Instance) inst.copy();
toUpdateWith.setDataset(m_tempHeader);
m_tempHeader.setClass(m_tempHeader.attribute(m_fieldsToForecast
.get(i)));
l.updateForecaster(toUpdateWith.classValue());
}
}
}
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
// we have at least one valid time stamp value - missing value routine
// can increment/decrement from this to fill in missing time stamp values
// forward (increment) is done below; backward is done by
// Utils.replaceMissing()
// if we have a previous row, then check that time values are increasing
if (!m_first
&& m_previousPrimeInstance != null
&& !m_previousPrimeInstance.isMissing(inst.dataset().attribute(
m_lagMaker.getTimeStampField()))) {
double previous =
m_previousPrimeInstance.value(inst.dataset().attribute( m_lagMaker.getTimeStampField() ));
double current =
inst.value(inst.dataset().attribute(m_lagMaker.getTimeStampField()));
if (current <= previous) {
throw new Exception("Priming instances do not appear to be in "
+ "ascending order of the time stamp field ("
+ m_lagMaker.getTimeStampField() + ")! " + m_previousPrimeInstance
+ " : " + inst);
}
}
}
boolean wasBuffered = false;
boolean onlyTimeMissing = false;
if (inst.hasMissingValue()) {
// first check to see if its a target or date
boolean ok = true;
for (String target : m_fieldsToForecast) {
if (inst.isMissing(inst.dataset().attribute(target))) {
ok = false;
break;
}
}
// check date
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
if (inst.isMissing(inst.dataset().attribute(
m_lagMaker.getTimeStampField()))) {
onlyTimeMissing = ok;
// do we have a previous instance with non-missing time stamp?
if (m_previousPrimeInstance != null
&& !m_previousPrimeInstance.isMissing(inst.dataset().attribute(
m_lagMaker.getTimeStampField()))) {
// set the correct time stamp value by incrementing by the
// lag maker's delta time
// this handles trailing missing time stamp values
double newValue =
m_previousPrimeInstance.value(inst.dataset().attribute( m_lagMaker.getTimeStampField() ));
newValue = m_lagMaker.advanceSuppliedTimeValue(newValue);
inst.setValue(
inst.dataset().attribute(m_lagMaker.getTimeStampField()),
newValue);
// System.err.println("** " + inst);
}
} else {
m_atLeastOneNonMissingTimeStamp = true;
}
}
if (!ok) {
if (m_first) {
// can't do anything with leading missing values, unless its the
// time stamp that's missing
// leading missing time stamps will get filled in by the missing
// value replacement routine when the buffer gets flushed. Trailing
// missing time stamp values get handled above. Exception when
// *all* time stamp values are missing. Nothing can be done in this
// case
m_hadLeadingMissingPrime = !onlyTimeMissing;
// ---
m_missingBuffer.add(inst);
// m_previousPrimeInstance = inst;
wasBuffered = true;
} /*
* else if (m_missingBuffer.numInstances() == 0 &&
* m_previousPrimeInstance != null) { // first one with missing - need
* to add the previous instance
* m_missingBuffer.add(m_previousPrimeInstance);
* m_previousPrimeInstance = null;
*
* m_missingBuffer.add(inst); wasBuffered = true; }
*/else /* if (m_missingBuffer.numInstances() > 0) */{
m_missingBuffer.add(inst);
wasBuffered = true;
// m_previousPrimeInstance = inst;
}
}
} else {
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
// we have at least one valid time stamp value - missing value routine
// can increment/decrement from this to fill in missing time stamp
// values
// forward (increment) is done above; backward is done by
// Utils.replaceMissing()
m_atLeastOneNonMissingTimeStamp = true;
}
}
m_previousPrimeInstance = inst;
if (!wasBuffered && m_missingBuffer.numInstances() > 0) {
// add this one first
m_missingBuffer.add(inst);
wasBuffered = true;
// interpolate missing and then flush the buffer
Instances missingReplaced =
weka.classifiers.timeseries.core.Utils.replaceMissing( m_missingBuffer, m_fieldsToForecast,
m_lagMaker.getTimeStampField(), false, m_lagMaker.getPeriodicity(), m_lagMaker.getSkipEntries() );
/*
* // don't push the first instance into the filters because this one //
* has already been pushed in earlier.
*/
for (int i = 0; i < missingReplaced.numInstances(); i++) {
applyFilters(missingReplaced.instance(i), false, false);
}
m_missingBuffer = new Instances(m_primedInput, 0);
// m_previousPrimeInstance = inst;
} else if (!wasBuffered) {
applyFilters(inst, false, false);
// m_previousPrimeInstance = inst;
}
m_first = false;
}
/**
* Make a one-step-ahead forecast for the supplied test instance
*
* @param transformed a test instance, corresponding to the next time step,
* that has been transformed using the lag maker
*
* @return a one-step-ahead forecast corresponding to the test instance.
* @throws Exception if a problem occurs
*/
protected double forecastOneStepAhead(Instance transformed) throws Exception {
return m_forecaster.classifyInstance(transformed);
}
/**
* Produce a forecast for the target field(s). Assumes that the model has been
* built and/or primed so that a forecast can be generated.
*
* @param numSteps number of forecasted values to produce for each target.
* E.g. a value of 5 would produce a prediction for t+1, t+2, ...,
* t+5. if no overlay data has been used during training)
* @param progress an optional varargs parameter supplying progress objects to
* report/log to
* @return a List of Lists (one for each step) of forecasted values for each
* target
* @throws Exception if the forecast can't be produced for some reason.
*/
@Override
public List> forecast(int numSteps,
PrintStream... progress) throws Exception {
return forecast(numSteps, null, progress);
}
/**
* Produce a forecast for the target field(s). Assumes that the model has been
* built and/or primed so that a forecast can be generated.
*
* @param numSteps number of forecasted values to produce for each target.
* E.g. a value of 5 would produce a prediction for t+1, t+2, ...,
* t+5.
* @param overlay optional overlay data for the period to be forecasted (may
* be null if no overlay data has been used during training)
* @param progress an optional varargs parameter supplying progress objects to
* report/log to
* @return a List of Lists (one for each step) of forecasted values for each
* target
* @throws Exception if the forecast can't be produced for some reason.
*/
@Override
public List> forecast(int numSteps,
Instances overlay, PrintStream... progress) throws Exception {
if (overlay != null) {
if (m_lagMaker.getOverlayFields() == null
|| m_lagMaker.getOverlayFields().size() == 0) {
throw new Exception(
"[WekaForecaster] overlay data has been supplied to the"
+ " forecasting routine but no overlay data has been trained with.");
}
String message = m_originalHeader.equalHeadersMsg(overlay);
if (message != null) {
throw new Exception("[WekaForecaster] supplied overlay data does not "
+ "have the same structure as the data used to learn " + "the model!");
}
} else {
// check to see if we've been trained with overlay data
if (m_lagMaker.getOverlayFields() != null
&& m_lagMaker.getOverlayFields().size() > 0) {
throw new Exception(
"[WekaForecaster] was trained with overlay data but "
+ "none has been supplied for making a forecast!");
}
}
// we need to:
// 1) input a new instance with ? for target into the filter chain in order
// to push the most recent
// known target value into the history
// 2) output() from filter
// 3) make the t + 1 prediction
// 4) set the value of the target for the input instance (this instance, now
// stored in
// the history buffer of the TimeseriesTranslate filters, will now have the
// predicted target
// value - hopefully)
// 4 won't work. Need to add the input instance (with prediction set) to the
// end
// of the primed input data set and then call primeForecaster() again
// double[] finalForecast = new double[numSteps];
// Check the incremental prime buffer to see if there are any pending
// instances to prime. We won't be able to interpolate missing values for
// the
// remaining instances (since there wasn't a prime instance received with
// non-missing
// values to right-hand-side bracket the ones with missing values. So, we'll
// just
// have to flush this buffer (which means the missing values will go into
// the history
// list and the underlying predictor's missing value strategy will be
// invoked). We
// should warn to the progress/log though. Similarly, for leading prime
// instances
// with missing values (i.e. no left-hand-side non-missing bracketing
// instance) we
// should warn to the progress/log
if (m_missingBuffer != null && m_missingBuffer.numInstances() > 0) {
// make one more attempt to interpolate missing values. In the incremental
// priming process, the missing value interpolation for currently buffered
// leading instances is *only* triggered when receiving a priming instance
// where *all* target values are not missing. If this never occurs, i.e.
// every priming instance has at least one of the targets missing, then
// it is still possible that some of the missing values for some targets
// can be interpolated
System.err.println("Here..... \n\n" + m_missingBuffer);
Instances missingReplaced =
weka.classifiers.timeseries.core.Utils.replaceMissing( m_missingBuffer, m_fieldsToForecast,
m_lagMaker.getTimeStampField(), false, m_lagMaker.getPeriodicity(), m_lagMaker.getSkipEntries() );
for (int i = 0; i < m_missingBuffer.numInstances(); i++) {
applyFilters(missingReplaced.instance(i), false, false);
}
for (PrintStream p : progress) {
p.println("WARNING: priming data contained missing target/date values that could "
+ "not be interpolated/replaced. Forecasting performance may be "
+ "adversely affected.");
}
}
if (m_hadLeadingMissingPrime) {
for (PrintStream p : progress) {
p.println("WARNING: priming data contained missing target/date values that could "
+ "not be interpolated/replaced. Forecasting performance may be "
+ "adversely affected.");
}
}
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0
&& !m_atLeastOneNonMissingTimeStamp) {
throw new Exception("All values of the time stamp field ("
+ m_lagMaker.getTimeStampField() + ") were missing in the priming "
+ "data!");
}
List> forecastForSteps =
new ArrayList>();
int stepsToDo = (overlay != null) ? overlay.numInstances() : numSteps;
boolean setPeriodics = true, incrementTime = true;
// check overlay fields (if present)
if (overlay != null) {
for (String field : m_lagMaker.getOverlayFields()) {
Attribute overl = m_originalHeader.attribute(field);
if (overl == null) {
throw new Exception("Unable to find overlay field '" + field
+ "' in the supplied overlay instances");
}
}
}
for (int i = 0; i < stepsToDo; i++) {
incrementTime = true;
// set the target to missing first
double[] newVals = new double[m_originalHeader.numAttributes()];
// set all to missing
for (int j = 0; j < newVals.length; j++) {
newVals[j] = Utils.missingValue();
}
// copy over any overlay fields and time (if present in overlay data)
if (overlay != null) {
Instance overlayI = overlay.instance(i);
for (String field : m_lagMaker.getOverlayFields()) {
int index = m_originalHeader.attribute(field).index();
newVals[index] = overlayI.value(index);
}
// non missing time stamp?
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
int timeStampIndex =
m_originalHeader.attribute(m_lagMaker.getTimeStampField()).index();
if (!overlayI.isMissing(timeStampIndex)) {
newVals[timeStampIndex] = overlayI.value(timeStampIndex);
// want to store, rather than increment, time value since
// we've read a time value from the overlay data
incrementTime = false;
}
}
}
// create the test instance (original format)
Instance origTest = new DenseInstance(1.0, newVals);
origTest.setDataset(m_originalHeader);
// System.err.println("Original with periodic set " + origTest);
Instance transformedWithDate = origTest;
// do all the filters
// System.err.println("--- " + transformedWithDate);
// transformedWithDate = applyFilters(transformedWithDate, true, true);
transformedWithDate =
m_lagMaker.processInstancePreview(transformedWithDate, incrementTime,
setPeriodics);
// the date time stamp (if exists) has now been remapped, so we can remove
// the original
m_dateRemover.input(transformedWithDate);
Instance transformed = m_dateRemover.output();
// System.err.println(transformedWithDate.dataset());
// System.err.println(transformedWithDate);
// System.err.println("Transformed: " + transformed);
// get a prediction
double[] preds = new double[m_singleTargetForecasters.size()];
for (int j = 0; j < m_singleTargetForecasters.size(); j++) {
preds[j] =
m_singleTargetForecasters.get(j).forecastOneStepAhead(transformed);
}
// predictions at step i for all the targets (can only handle a single
// target at
// present)
List finalForecast =
new ArrayList();
// add confidence limits (if applicable)
for (int j = 0; j < m_fieldsToForecast.size(); j++) {
if (m_confidenceLimitEstimator != null
&& i < m_calculateConfLimitsSteps) {
double[] limits =
m_confidenceLimitEstimator.getConfidenceLimitsForTarget( m_fieldsToForecast.get( j ), preds[j], i + 1 );
double[][] limitsToAdd = new double[1][];
limitsToAdd[0] = limits;
finalForecast.add(new NumericPrediction(Utils.missingValue(),
preds[j], 1.0, limitsToAdd));
} else {
finalForecast.add(new NumericPrediction(Utils.missingValue(),
preds[j]));
}
}
forecastForSteps.add(finalForecast);
// set the value of the target in the original test instance
for (int j = 0; j < m_fieldsToForecast.size(); j++) {
int targetIndex =
m_originalHeader.attribute(m_fieldsToForecast.get(j)).index();
origTest.setValue(targetIndex, preds[j]);
}
// If we have a real time stamp, then set the incremented value in the
// original
// test instance (doesn't really need to be done if we've read a
// non-missing
// time value out of any supplied overlay data)
if (!m_lagMaker.isUsingAnArtificialTimeIndex()
&& m_lagMaker.getAdjustForTrends()
&& m_lagMaker.getTimeStampField() != null
&& m_lagMaker.getTimeStampField().length() > 0) {
int timeIndex =
m_originalHeader.attribute(m_lagMaker.getTimeStampField()).index();
double timeValue =
transformedWithDate.value(transformedWithDate.dataset().attribute(
m_lagMaker.getTimeStampField()));
origTest.setValue(timeIndex, timeValue);
}
// now re-prime the forecaster. Incremental method will never buffer here
// because we never have missing targets, since we've just forecasted
// them!
primeForecasterIncremental(origTest);
}
// TODO fix this - move to eval class?
if (m_lagMaker.isUsingAnArtificialTimeIndex()) {
m_lagMaker.incrementArtificialTimeValue(-(stepsToDo - 1));
// -= (numSteps - 1);
}
// invalidate the primed input header
m_primedInput = null;
return forecastForSteps;
}
/**
* Inner class implementing a forecaster for a single target.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*
*/
protected class SingleTargetForecaster implements Serializable {
/** for serialization */
private static final long serialVersionUID = -4404412501006669036L;
/** the underlying Weka classifier used to make forecasts */
protected Classifier m_targetForecaster;
/** filter for removing the targets other than the one to be forecasted */
private Remove m_otherTargetRemover;
private int m_classIndex;
private String m_className;
/**
* Set the base classifier to use
*
* @param classifier the base classifier to use
*/
public void setClassifier(Classifier classifier) {
m_targetForecaster = classifier;
}
/**
* Get the base classifier
*
* @return the base classifier
*/
public Classifier getWrappedClassifier() {
return m_targetForecaster;
}
/**
* Builds the single target forecaster. Assumes that the training data has
* already been transformed by the lag maker.
*
* @param train the transformed training data
* @param targetName the name of the target to forecast
* @param progress an optional varargs parameter of PrintStream to report
* progress to
* @throws Exception if a problem occurs
*/
public void buildForecaster(Instances train, String targetName,
PrintStream... progress) throws Exception {
if (m_targetForecaster == null) {
throw new Exception("[SingleTargetForecaster] base classifier has"
+ " not been set!");
}
train = new Instances(train);
m_classIndex = train.attribute(targetName).index();
if (m_classIndex < 0) {
throw new Exception("Can't find target field '" + targetName + "' in"
+ "the data!");
}
if (!train.attribute(m_classIndex).isNumeric()) {
throw new Exception("[SingleTargetForecaster] target '" + targetName
+ "' is not numeric!");
}
train.setClassIndex(m_classIndex);
m_className = targetName;
String otherTargets = "";
for (String n : m_fieldsToForecast) {
if (!n.equals(targetName)) {
int i = train.attribute(n).index();
if (i >= 0) {
otherTargets += (i + 1) + ",";
}
}
}
if (otherTargets.length() > 0) {
otherTargets = otherTargets.substring(0, otherTargets.lastIndexOf(','));
m_otherTargetRemover = new Remove();
m_otherTargetRemover.setAttributeIndices(otherTargets);
m_otherTargetRemover.setInputFormat(train);
train = Filter.useFilter(train, m_otherTargetRemover);
}
for (PrintStream p : progress) {
p.println("Building forecaster for target: " + m_className);
}
m_targetForecaster.buildClassifier(train);
}
/**
* Makes a one-step-ahead forecast
*
* @param transformed the test instance for the next time step. This will
* have already been processed by the lag maker, and thus will
* contain lagged variables and other derived variables.
*
* @return the one-step-ahead forecast corresponding to the supplied test
* instance
* @throws Exception if something goes wrong during the forecasting process
*/
public double forecastOneStepAhead(Instance transformed) throws Exception {
transformed.dataset().setClassIndex(m_classIndex);
if (m_otherTargetRemover != null) {
m_otherTargetRemover.input(transformed);
transformed = m_otherTargetRemover.output();
}
double pred = m_targetForecaster.classifyInstance(transformed);
// undo the log if adjusting for variance
if (m_lagMaker.getAdjustForVariance()) {
pred = Math.exp(pred);
}
return pred;
}
@Override
public String toString() {
if (m_targetForecaster == null) {
return "SingleTargetForecaster: no model built yet!";
}
return m_className + ":\n" + m_targetForecaster.toString();
}
}
}