weka.experiment.LearningRateResultProducer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    LearningRateResultProducer.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.experiment;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.AdditionalMeasureProducer;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 *  Tells a sub-ResultProducer to reproduce the current
 * run for varying sized subsamples of the dataset. Normally used with an
 * AveragingResultProducer and CrossValidationResultProducer combo to generate
 * learning curve results. For non-numeric result fields, the first value is
 * used.
 * 
 * 
 * 
 *  Valid options are:
 * 

 * 
 * 
 * -X <num steps>
 *  The number of steps in the learning rate curve.
 *  (default 10)
 * 
 * 
 *  * -W <class name>
 *  The full class name of a ResultProducer.
 *  eg: weka.experiment.CrossValidationResultProducer
 * 
 * 
 *  * Options specific to result producer weka.experiment.AveragingResultProducer:
 * 
 * 
 *  * -F <field name>
 *  The name of the field to average over.
 *  (default "Fold")
 * 
 * 
 *  * -X <num results>
 *  The number of results expected per average.
 *  (default 10)
 * 
 * 
 *  * -S
 *  Calculate standard deviations.
 *  (default only averages)
 * 
 * 
 *  * -W <class name>
 *  The full class name of a ResultProducer.
 *  eg: weka.experiment.CrossValidationResultProducer
 * 
 * 
 *  * Options specific to result producer weka.experiment.CrossValidationResultProducer:
 * 
 * 
 *  * -X <number of folds>
 *  The number of folds to use for the cross-validation.
 *  (default 10)
 * 
 * 
 *  * -D
 * Save raw split evaluator output.
 * 
 * 
 *  * -O <file/directory name/path>
 *  The filename where raw output will be stored.
 *  If a directory name is specified then then individual
 *  outputs will be gzipped, otherwise all output will be
 *  zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)
 * 
 * 
 *  * -W <class name>
 *  The full class name of a SplitEvaluator.
 *  eg: weka.experiment.ClassifierSplitEvaluator
 * 
 * 
 *  * Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
 * 
 * 
 *  * -W <class name>
 *  The full class name of the classifier.
 *  eg: weka.classifiers.bayes.NaiveBayes
 * 
 * 
 *  * -C <index>
 *  The index of the class for which IR statistics
 *  are to be output. (default 1)
 * 
 * 
 *  * -I <index>
 *  The index of an attribute to output in the
 *  results. This attribute should identify an
 *  instance in order to know which instances are
 *  in the test set of a cross validation. if 0
 *  no output (default 0).
 * 
 * 
 *  * -P
 *  Add target and prediction columns to the result
 *  for each fold.
 * 
 * 
 *  * Options specific to classifier weka.classifiers.rules.ZeroR:
 * 
 * 
 *  * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
 * 
 * 
 * 
 * All options after -- will be passed to the result producer.
 * 
 * @author Len Trigg ([email protected])
 * @version $Revision: 10203 $
 */
public class LearningRateResultProducer implements ResultListener,
  ResultProducer, OptionHandler, AdditionalMeasureProducer, RevisionHandler {

  /** for serialization */
  static final long serialVersionUID = -3841159673490861331L;

  /** The dataset of interest */
  protected Instances m_Instances;

  /** The ResultListener to send results to */
  protected ResultListener m_ResultListener = new CSVResultListener();

  /** The ResultProducer used to generate results */
  protected ResultProducer m_ResultProducer = new AveragingResultProducer();

  /** The names of any additional measures to look for in SplitEvaluators */
  protected String[] m_AdditionalMeasures = null;

  /**
   * The minimum number of instances to use. If this is zero, the first step
   * will contain m_StepSize instances
   */
  protected int m_LowerSize = 0;

  /**
   * The maximum number of instances to use. -1 indicates no maximum (other than
   * the total number of instances)
   */
  protected int m_UpperSize = -1;

  /** The number of instances to add at each step */
  protected int m_StepSize = 10;

  /** The current dataset size during stepping */
  protected int m_CurrentSize = 0;

  /** The name of the key field containing the learning rate step number */
  public static String STEP_FIELD_NAME = "Total_instances";

  /**
   * Returns a string describing this result producer
   * 
   * @return a description of the result producer suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "Tells a sub-ResultProducer to reproduce the current run for "
      + "varying sized subsamples of the dataset. Normally used with "
      + "an AveragingResultProducer and CrossValidationResultProducer "
      + "combo to generate learning curve results. For non-numeric "
      + "result fields, the first value is used.";
  }

  /**
   * Determines if there are any constraints (imposed by the destination) on the
   * result columns to be produced by resultProducers. Null should be returned
   * if there are NO constraints, otherwise a list of column names should be
   * returned as an array of Strings.
   * 
   * @param rp the ResultProducer to which the constraints will apply
   * @return an array of column names to which resutltProducer's results will be
   *         restricted.
   * @throws Exception if constraints can't be determined
   */
  @Override
  public String[] determineColumnConstraints(ResultProducer rp)
    throws Exception {
    return null;
  }

  /**
   * Gets the keys for a specified run number. Different run numbers correspond
   * to different randomizations of the data. Keys produced should be sent to
   * the current ResultListener
   * 
   * @param run the run number to get keys for.
   * @throws Exception if a problem occurs while getting the keys
   */
  @Override
  public void doRunKeys(int run) throws Exception {

    if (m_ResultProducer == null) {
      throw new Exception("No ResultProducer set");
    }
    if (m_ResultListener == null) {
      throw new Exception("No ResultListener set");
    }
    if (m_Instances == null) {
      throw new Exception("No Instances set");
    }

    // Tell the resultproducer to send results to us
    m_ResultProducer.setResultListener(this);
    m_ResultProducer.setInstances(m_Instances);

    // For each subsample size
    if (m_LowerSize == 0) {
      m_CurrentSize = m_StepSize;
    } else {
      m_CurrentSize = m_LowerSize;
    }
    while (m_CurrentSize <= m_Instances.numInstances()
      && ((m_UpperSize == -1) || (m_CurrentSize <= m_UpperSize))) {
      m_ResultProducer.doRunKeys(run);
      m_CurrentSize += m_StepSize;
    }
  }

  /**
   * Gets the results for a specified run number. Different run numbers
   * correspond to different randomizations of the data. Results produced should
   * be sent to the current ResultListener
   * 
   * @param run the run number to get results for.
   * @throws Exception if a problem occurs while getting the results
   */
  @Override
  public void doRun(int run) throws Exception {

    if (m_ResultProducer == null) {
      throw new Exception("No ResultProducer set");
    }
    if (m_ResultListener == null) {
      throw new Exception("No ResultListener set");
    }
    if (m_Instances == null) {
      throw new Exception("No Instances set");
    }

    // Randomize on a copy of the original dataset
    Instances runInstances = new Instances(m_Instances);
    runInstances.randomize(new Random(run));

    /*
     * if (runInstances.classAttribute().isNominal() &&
     * (m_Instances.numInstances() / m_StepSize >= 1)) { //
     * runInstances.stratify(m_Instances.numInstances() / m_StepSize); }
     */

    // Tell the resultproducer to send results to us
    m_ResultProducer.setResultListener(this);

    // For each subsample size
    if (m_LowerSize == 0) {
      m_CurrentSize = m_StepSize;
    } else {
      m_CurrentSize = m_LowerSize;
    }
    while (m_CurrentSize <= m_Instances.numInstances()
      && ((m_UpperSize == -1) || (m_CurrentSize <= m_UpperSize))) {
      m_ResultProducer.setInstances(new Instances(runInstances, 0,
        m_CurrentSize));
      m_ResultProducer.doRun(run);
      m_CurrentSize += m_StepSize;
    }
  }

  /**
   * Prepare for the results to be received.
   * 
   * @param rp the ResultProducer that will generate the results
   * @throws Exception if an error occurs during preprocessing.
   */
  @Override
  public void preProcess(ResultProducer rp) throws Exception {

    if (m_ResultListener == null) {
      throw new Exception("No ResultListener set");
    }
    m_ResultListener.preProcess(this);
  }

  /**
   * Prepare to generate results. The ResultProducer should call
   * preProcess(this) on the ResultListener it is to send results to.
   * 
   * @throws Exception if an error occurs during preprocessing.
   */
  @Override
  public void preProcess() throws Exception {

    if (m_ResultProducer == null) {
      throw new Exception("No ResultProducer set");
    }
    // Tell the resultproducer to send results to us
    m_ResultProducer.setResultListener(this);
    m_ResultProducer.preProcess();
  }

  /**
   * When this method is called, it indicates that no more results will be sent
   * that need to be grouped together in any way.
   * 
   * @param rp the ResultProducer that generated the results
   * @throws Exception if an error occurs
   */
  @Override
  public void postProcess(ResultProducer rp) throws Exception {

    m_ResultListener.postProcess(this);
  }

  /**
   * When this method is called, it indicates that no more requests to generate
   * results for the current experiment will be sent. The ResultProducer should
   * call preProcess(this) on the ResultListener it is to send results to.
   * 
   * @throws Exception if an error occurs
   */
  @Override
  public void postProcess() throws Exception {

    m_ResultProducer.postProcess();
  }

  /**
   * Accepts results from a ResultProducer.
   * 
   * @param rp the ResultProducer that generated the results
   * @param key an array of Objects (Strings or Doubles) that uniquely identify
   *          a result for a given ResultProducer with given compatibilityState
   * @param result the results stored in an array. The objects stored in the
   *          array may be Strings, Doubles, or null (for the missing value).
   * @throws Exception if the result could not be accepted.
   */
  @Override
  public void acceptResult(ResultProducer rp, Object[] key, Object[] result)
    throws Exception {

    if (m_ResultProducer != rp) {
      throw new Error("Unrecognized ResultProducer sending results!!");
    }
    // Add in current step as key field
    Object[] newKey = new Object[key.length + 1];
    System.arraycopy(key, 0, newKey, 0, key.length);
    newKey[key.length] = new String("" + m_CurrentSize);
    // Pass on to result listener
    m_ResultListener.acceptResult(this, newKey, result);
  }

  /**
   * Determines whether the results for a specified key must be generated.
   * 
   * @param rp the ResultProducer wanting to generate the results
   * @param key an array of Objects (Strings or Doubles) that uniquely identify
   *          a result for a given ResultProducer with given compatibilityState
   * @return true if the result should be generated
   * @throws Exception if it could not be determined if the result is needed.
   */
  @Override
  public boolean isResultRequired(ResultProducer rp, Object[] key)
    throws Exception {

    if (m_ResultProducer != rp) {
      throw new Error("Unrecognized ResultProducer sending results!!");
    }
    // Add in current step as key field
    Object[] newKey = new Object[key.length + 1];
    System.arraycopy(key, 0, newKey, 0, key.length);
    newKey[key.length] = new String("" + m_CurrentSize);
    // Pass on request to result listener
    return m_ResultListener.isResultRequired(this, newKey);
  }

  /**
   * Gets the names of each of the columns produced for a single run.
   * 
   * @return an array containing the name of each column
   * @throws Exception if key names cannot be generated
   */
  @Override
  public String[] getKeyNames() throws Exception {

    String[] keyNames = m_ResultProducer.getKeyNames();
    String[] newKeyNames = new String[keyNames.length + 1];
    System.arraycopy(keyNames, 0, newKeyNames, 0, keyNames.length);
    // Think of a better name for this key field
    newKeyNames[keyNames.length] = STEP_FIELD_NAME;
    return newKeyNames;
  }

  /**
   * Gets the data types of each of the columns produced for a single run. This
   * method should really be static.
   * 
   * @return an array containing objects of the type of each column. The objects
   *         should be Strings, or Doubles.
   * @throws Exception if the key types could not be determined (perhaps because
   *           of a problem from a nested sub-resultproducer)
   */
  @Override
  public Object[] getKeyTypes() throws Exception {

    Object[] keyTypes = m_ResultProducer.getKeyTypes();
    Object[] newKeyTypes = new Object[keyTypes.length + 1];
    System.arraycopy(keyTypes, 0, newKeyTypes, 0, keyTypes.length);
    newKeyTypes[keyTypes.length] = "";
    return newKeyTypes;
  }

  /**
   * Gets the names of each of the columns produced for a single run. A new
   * result field is added for the number of results used to produce each
   * average. If only averages are being produced the names are not altered, if
   * standard deviations are produced then "Dev_" and "Avg_" are prepended to
   * each result deviation and average field respectively.
   * 
   * @return an array containing the name of each column
   * @throws Exception if the result names could not be determined (perhaps
   *           because of a problem from a nested sub-resultproducer)
   */
  @Override
  public String[] getResultNames() throws Exception {

    return m_ResultProducer.getResultNames();
  }

  /**
   * Gets the data types of each of the columns produced for a single run.
   * 
   * @return an array containing objects of the type of each column. The objects
   *         should be Strings, or Doubles.
   * @throws Exception if the result types could not be determined (perhaps
   *           because of a problem from a nested sub-resultproducer)
   */
  @Override
  public Object[] getResultTypes() throws Exception {

    return m_ResultProducer.getResultTypes();
  }

  /**
   * Gets a description of the internal settings of the result producer,
   * sufficient for distinguishing a ResultProducer instance from another with
   * different settings (ignoring those settings set through this interface).
   * For example, a cross-validation ResultProducer may have a setting for the
   * number of folds. For a given state, the results produced should be
   * compatible. Typically if a ResultProducer is an OptionHandler, this string
   * will represent the command line arguments required to set the
   * ResultProducer to that state.
   * 
   * @return the description of the ResultProducer state, or null if no state is
   *         defined
   */
  @Override
  public String getCompatibilityState() {

    String result = " ";
    // + "-F " + Utils.quote(getKeyFieldName())
    // + " -X " + getStepSize() + " ";
    if (m_ResultProducer == null) {
      result += "";
    } else {
      result += "-W " + m_ResultProducer.getClass().getName();
      result += " -- " + m_ResultProducer.getCompatibilityState();
    }

    return result.trim();
  }

  /**
   * Returns an enumeration describing the available options..
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration