weka.attributeSelection.WrapperSubsetEval Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    WrapperSubsetEval.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.AbstractEvaluationMetric;
import weka.classifiers.evaluation.InformationRetrievalEvaluationMetric;
import weka.classifiers.rules.ZeroR;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

import java.util.BitSet;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Vector;

/**
 
 * WrapperSubsetEval:

 * 

 * Evaluates attribute sets by using a learning scheme. Cross validation is used
 * to estimate the accuracy of the learning scheme for a set of attributes.

 * 

 * For more information see:

 * 

 * Ron Kohavi, George H. John (1997). Wrappers for feature subset selection.
 * Artificial Intelligence. 97(1-2):273-324.
 * 
 
 * 
 *  BibTeX:
 * 
 *  * @article{Kohavi1997,
 *    author = {Ron Kohavi and George H. John},
 *    journal = {Artificial Intelligence},
 *    note = {Special issue on relevance},
 *    number = {1-2},
 *    pages = {273-324},
 *    title = {Wrappers for feature subset selection},
 *    volume = {97},
 *    year = {1997},
 *    ISSN = {0004-3702}
 * }
 * 
 * 
 
 * 
 
 * Valid options are:
 * 
 * 
 *  * -B <base learner>
 *  class name of base learner to use for  accuracy estimation.
 *  Place any classifier options LAST on the command line
 *  following a "--". eg.:
 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
 *  (default: weka.classifiers.rules.ZeroR)
 * 
 * 
 *  * -F <num>
 *  number of cross validation folds to use for estimating accuracy.
 *  (default=5)
 * 
 * 
 *  * -R <seed>
 *  Seed for cross validation accuracy testimation.
 *  (default = 1)
 * 
 * 
 *  * -T <num>
 *  threshold by which to execute another cross validation
 *  (standard deviation---expressed as a percentage of the mean).
 *  (default: 0.01 (1%))
 * 
 * 
 *  * -E <acc | rmse | mae | f-meas | auc | auprc>
 *  Performance evaluation measure to use for selecting attributes.
 *  (Default = accuracy for discrete class and rmse for numeric class)
 * 
 * 
 *  * -IRclass <label | index>
 *  Optional class value (label or 1-based index) to use in conjunction with
 *  IR statistics (f-meas, auc or auprc). Omitting this option will use
 *  the class-weighted average.
 * 
 * 
 *  * Options specific to scheme weka.classifiers.rules.ZeroR:
 * 
 * 
 *  * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
 * 
 
 * 
 * @author Mark Hall ([email protected])
 * @version $Revision: 12170 $
 */
public class WrapperSubsetEval extends ASEvaluation
  implements SubsetEvaluator, OptionHandler, TechnicalInformationHandler {

  /** for serialization */
  static final long serialVersionUID = -4573057658746728675L;

  /** training instances */
  private Instances m_trainInstances;
  /** class index */
  private int m_classIndex;
  /** number of attributes in the training data */
  private int m_numAttribs;
  /** holds an evaluation object */
  private Evaluation m_Evaluation;
  /** holds the base classifier object */
  private Classifier m_BaseClassifier;
  /** number of folds to use for cross validation */
  private int m_folds;
  /** random number seed */
  private int m_seed;
  /**
   * the threshold by which to do further cross validations when estimating the
   * accuracy of a subset
   */
  private double m_threshold;

  public static final int EVAL_DEFAULT = 1;
  public static final int EVAL_ACCURACY = 2;
  public static final int EVAL_RMSE = 3;
  public static final int EVAL_MAE = 4;
  public static final int EVAL_FMEASURE = 5;
  public static final int EVAL_AUC = 6;
  public static final int EVAL_AUPRC = 7;
  public static final int EVAL_CORRELATION = 8;
  public static final int EVAL_PLUGIN = 9;

  /**
   * Small subclass of Tag to store info about a plugin metric
   */
  protected static class PluginTag extends Tag {
    private static final long serialVersionUID = -6978438858413428382L;

    /** The metric object itself */
    protected AbstractEvaluationMetric m_metric;

    /** The particular statistic from the metric that this tag pertains to */
    protected String m_statisticName;

    /**
     * Constructor
     *
     * @param metric the metric object
     * @param statisticName the particular statistic that this tag pertains to
     */
    public PluginTag(int ID, AbstractEvaluationMetric metric,
      String statisticName) {
      super(ID, statisticName, statisticName);
      m_metric = metric;
      m_statisticName = statisticName;
    }

    /**
     * Get the name of the metric represented by this tag
     *
     * @return the name of the metric
     */
    public String getMetricName() {
      return m_metric.getMetricName();
    }

    /**
     * Get the name of the statistic that this tag pertains to
     *
     * @return the name of the statistic
     */
    public String getStatisticName() {
      return m_statisticName;
    }

    /**
     * Get the actual metric object
     *
     * @return the metric object
     */
    public AbstractEvaluationMetric getMetric() {
      return m_metric;
    }
  }

  /** Holds all tags for metrics */
  public static final Tag[] TAGS_EVALUATION;

  /**
   * If >= 0, and an IR metric is being used, then evaluate with respect to this
   * class value (0-based index)
   */
  protected int m_IRClassVal = -1;

  /** User supplied option for IR class value (either name or 1-based index) */
  protected String m_IRClassValS = "";

  protected static List PLUGIN_METRICS =
    AbstractEvaluationMetric.getPluginMetrics();

  static {
    int totalPluginCount = 0;
    if (PLUGIN_METRICS != null) {
      for (AbstractEvaluationMetric m : PLUGIN_METRICS) {
        totalPluginCount += m.getStatisticNames().size();
      }
    }

    TAGS_EVALUATION = new Tag[8 + totalPluginCount];
    TAGS_EVALUATION[0] = new Tag(EVAL_DEFAULT, "default",
      "Default: accuracy (discrete class); RMSE (numeric class)");
    TAGS_EVALUATION[1] =
      new Tag(EVAL_ACCURACY, "acc", "Accuracy (discrete class only)");
    TAGS_EVALUATION[2] = new Tag(EVAL_RMSE, "rmse",
      "RMSE (of the class probabilities for discrete class)");
    TAGS_EVALUATION[3] = new Tag(EVAL_MAE, "mae",
      "MAE (of the class probabilities for discrete class)");
    TAGS_EVALUATION[4] =
      new Tag(EVAL_FMEASURE, "f-meas", "F-measure (discrete class only)");
    TAGS_EVALUATION[5] = new Tag(EVAL_AUC, "auc",
      "AUC (area under the ROC curve - discrete class only)");
    TAGS_EVALUATION[6] = new Tag(EVAL_AUPRC, "auprc",
      "AUPRC (area under the precision-recall curve - discrete class only)");
    TAGS_EVALUATION[7] = new Tag(EVAL_CORRELATION, "corr-coeff",
      "Correlation coefficient - numeric class only");

    if (PLUGIN_METRICS != null) {
      int index = 8;
      for (AbstractEvaluationMetric m : PLUGIN_METRICS) {
        for (String stat : m.getStatisticNames()) {
          TAGS_EVALUATION[index++] = new PluginTag(index + 1, m, stat);
        }
      }
    }
  }

  /** The evaluation measure to use */
  protected Tag m_evaluationMeasure = TAGS_EVALUATION[0];

  /**
   * Returns a string describing this attribute evaluator
   * 
   * @return a description of the evaluator suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "WrapperSubsetEval:\n\n"
      + "Evaluates attribute sets by using a learning scheme. Cross "
      + "validation is used to estimate the accuracy of the learning "
      + "scheme for a set of attributes.\n\n" + "For more information see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;

    result = new TechnicalInformation(Type.ARTICLE);
    result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John");
    result.setValue(Field.YEAR, "1997");
    result.setValue(Field.TITLE, "Wrappers for feature subset selection");
    result.setValue(Field.JOURNAL, "Artificial Intelligence");
    result.setValue(Field.VOLUME, "97");
    result.setValue(Field.NUMBER, "1-2");
    result.setValue(Field.PAGES, "273-324");
    result.setValue(Field.NOTE, "Special issue on relevance");
    result.setValue(Field.ISSN, "0004-3702");

    return result;
  }

  /**
   * Constructor. Calls restOptions to set default options
   **/
  public WrapperSubsetEval() {
    resetOptions();
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   **/
  @Override
  public Enumeration