weka.classifiers.meta.FilteredClassifier Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    FilteredClassifier.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.meta;

import weka.classifiers.IterativeClassifier;
import weka.classifiers.RandomizableSingleClassifierEnhancer;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.supervised.attribute.AttributeSelection;
import weka.filters.unsupervised.attribute.Reorder;

import java.util.*;

/**
 *  Class for running an arbitrary classifier on data
 * that has been passed through an arbitrary filter. Like the classifier, the
 * structure of the filter is based exclusively on the training data and test
 * instances will be processed by the filter without changing their structure.
 * If unequal instance weights or attribute weights are present, and the filter
 * or the classifier are unable to deal with them, the instances and/or attributes
 * are resampled with replacement based on the weights before they are passed
 * to the filter or the classifier (as appropriate).
 * 
 * 
 *
 *  Valid options are:
 * 

 * 
 * 
 * -F <filter specification>
 *  Full class name of filter to use, followed
 *  by filter options.
 *  default: "weka.filters.supervised.attribute.Discretize -R first-last -precision 6"
 * 
 *
 *  * -W <classifier name>
 *  Full name of base classifier.
 *  (default: weka.classifiers.trees.J48)
 * 
 *
 *  -S num
 * The random number seed to be used (default 1). 
 *
 * -doNotCheckForModifiedClassAttribute 

 * If this is set, the classifier will not check whether the filter modifies the class attribute (use with caution).
 * 
 *
 * -output-debug-info 

 * If set, classifier is run in debug mode and may output additional info to
 * the console.
 * 

 *
 * -do-not-check-capabilities 

 * If set, classifier capabilities are not checked before classifier is built
 * (use with caution).
 * 

 *
 * -num-decimal-places 

 * The number of decimal places for the output of numbers in the model.
 * 

 *
 * -batch-size 

 * The desired batch size for batch prediction.
 * 

 *
 * 
 * Options specific to classifier weka.classifiers.trees.J48:
 * 
 * 
 *  * -U
 *  Use unpruned tree.
 * 
 * 
 *  * -C <pruning confidence>
 *  Set confidence threshold for pruning.
 *  (default 0.25)
 * 
 * 
 *  * -M <minimum number of instances>
 *  Set minimum number of instances per leaf.
 *  (default 2)
 * 
 * 
 *  * -R
 *  Use reduced error pruning.
 * 
 * 
 *  * -N <number of folds>
 *  Set number of folds for reduced error
 *  pruning. One fold is used as pruning set.
 *  (default 3)
 * 
 * 
 *  * -B
 *  Use binary splits only.
 * 
 * 
 *  * -S
 *  Don't perform subtree raising.
 * 
 * 
 *  * -L
 *  Do not clean up after the tree has been built.
 * 
 * 
 *  * -A
 *  Laplace smoothing for predicted probabilities.
 * 
 * 
 *  * -S <seed>
 *  Seed for random data shuffling (default 1).
 * 
 * 
 * 
 *
 * @author Len Trigg ([email protected])
 * @version $Revision: 15021 $
 */
public class FilteredClassifier extends RandomizableSingleClassifierEnhancer
  implements Drawable, PartitionGenerator, IterativeClassifier, BatchPredictor,
        WeightedInstancesHandler, WeightedAttributesHandler {

  /** for serialization */
  static final long serialVersionUID = -4523450618538717400L;

  /** The filter */
  protected Filter m_Filter = new AttributeSelection();

  /** The instance structure of the filtered instances */
  protected Instances m_FilteredInstances;

  /** Flag that can be set to true if class attribute is not to be checked for modifications by the filer. */
  protected boolean m_DoNotCheckForModifiedClassAttribute = false;

  /** If the attributes are resampled, we store the filter for this */
  protected Reorder m_ReorderOriginal;
  protected Reorder m_ReorderFiltered;

  /**
   * Returns a string describing this classifier
   * 
   * @return a description of the classifier suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "Class for running an arbitrary classifier on data that has been passed "
            + "through an arbitrary filter. Like the classifier, the structure of the filter "
            + "is based exclusively on the training data and test instances will be processed "
            + "by the filter without changing their structure.\n\n" +
            "If unequal instance weights or attribute weights are present, and the filter " +
            "or the classifier are unable to deal with them, the instances and/or attributes " +
            "are resampled with replacement based on the weights before they are passed " +
            "to the filter or the classifier (as appropriate).";
  }

  /**
   * String describing default classifier.
   * 
   * @return the default classifier classname
   */
  protected String defaultClassifierString() {

    return "weka.classifiers.trees.J48";
  }

  /**
   * String describing default filter.
   */
  protected String defaultFilterString() {

    return "weka.filters.supervised.attribute.Discretize -R first-last -precision 6";
  }

  /**
   * Default constructor.
   */
  public FilteredClassifier() {

    m_Classifier = new weka.classifiers.trees.J48();
    m_Filter = new weka.filters.supervised.attribute.Discretize();
  }

  /**
   * Returns the type of graph this classifier represents.
   * 
   * @return the graph type of this classifier
   */
  public int graphType() {

    if (m_Classifier instanceof Drawable)
      return ((Drawable) m_Classifier).graphType();
    else
      return Drawable.NOT_DRAWABLE;
  }

  /**
   * Returns graph describing the classifier (if possible).
   *
   * @return the graph of the classifier in dotty format
   * @throws Exception if the classifier cannot be graphed
   */
  public String graph() throws Exception {

    if (m_Classifier instanceof Drawable)
      return ((Drawable) m_Classifier).graph();
    else
      throw new Exception(
        "Classifier: " + getClassifierSpec() + " cannot be graphed");
  }

  /**
   * Builds the classifier to generate a partition. (If the base classifier
   * supports this.)
   */
  public void generatePartition(Instances data) throws Exception {

    if (m_Classifier instanceof PartitionGenerator)
      buildClassifier(data);
    else
      throw new Exception(
        "Classifier: " + getClassifierSpec() + " cannot generate a partition");
  }

  /**
   * Computes an array that has a value for each element in the partition. (If
   * the base classifier supports this.)
   */
  public double[] getMembershipValues(Instance inst) throws Exception {

    if (m_Classifier instanceof PartitionGenerator) {
      if (m_ReorderOriginal != null) {
        m_ReorderOriginal.input(inst);
        inst = m_ReorderOriginal.output();
      }
      Instance newInstance = filterInstance(inst);
      if (newInstance == null) {
        double[] unclassified = new double[numElements()];
        for (int i = 0; i < unclassified.length; i++) {
          unclassified[i] = Utils.missingValue();
        }
        return unclassified;
      } else {
        if (m_ReorderFiltered != null) {
          m_ReorderFiltered.input(newInstance);
          newInstance = m_ReorderFiltered.output();
        }
        return ((PartitionGenerator) m_Classifier).getMembershipValues(newInstance);
      }
    } else
      throw new Exception(
        "Classifier: " + getClassifierSpec() + " cannot generate a partition");
  }

  /**
   * Returns the number of elements in the partition. (If the base classifier
   * supports this.)
   */
  public int numElements() throws Exception {

    if (m_Classifier instanceof PartitionGenerator)
      return ((PartitionGenerator) m_Classifier).numElements();
    else
      throw new Exception(
        "Classifier: " + getClassifierSpec() + " cannot generate a partition");
  }

  /**
   * Initializes an iterative classifier. (If the base classifier supports
   * this.)
   *
   * @param data the instances to be used in induction
   * @exception Exception if the model cannot be initialized
   */
  @Override public void initializeClassifier(Instances data) throws Exception {

    if (m_Classifier == null) {
      throw new Exception("No base classifier has been set!");
    }

    getCapabilities().testWithFail(data);

    if (m_Classifier instanceof IterativeClassifier) {
      Random r = (data.numInstances() > 0) ? data.getRandomNumberGenerator(getSeed()) : new Random(getSeed());
      data = setUp(data, r);
      if (!data.allInstanceWeightsIdentical() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        data = data.resampleWithWeights(r); // The filter may have assigned weights.
      }
      if (!data.allAttributeWeightsIdentical() && !(m_Classifier instanceof WeightedAttributesHandler)) {
        data = resampleAttributes(data, false, r);
      }

      // can classifier handle the data?
      getClassifier().getCapabilities().testWithFail(data);

      if (m_Classifier instanceof Randomizable) {
        ((Randomizable)m_Classifier).setSeed(r.nextInt());
      }

      ((IterativeClassifier) m_Classifier).initializeClassifier(data);
     } else {
      throw new Exception("Classifier: " + getClassifierSpec() + " is not an IterativeClassifier");
    }
  }

  /**
   * Performs one iteration. (If the base classifier supports this.)
   *
   * @return false if no further iterations could be performed, true otherwise
   * @exception Exception if this iteration fails for unexpected reasons
   */
  @Override public boolean next() throws Exception {

    if (m_Classifier instanceof IterativeClassifier)
      return ((IterativeClassifier) m_Classifier).next();
    else
      throw new Exception("Classifier: " + getClassifierSpec() + " is not an IterativeClassifier");
  }

  /**
   * Signal end of iterating, useful for any house-keeping/cleanup (If the base
   * classifier supports this.)
   *
   * @exception Exception if cleanup fails
   */
  @Override public void done() throws Exception {

    if (m_Classifier instanceof IterativeClassifier)
      ((IterativeClassifier) m_Classifier).done();
    else
      throw new Exception("Classifier: " + getClassifierSpec() + " is not an IterativeClassifier");
  }

  /**
   * Tool tip text for finalize property
   *
   * @return the tool tip text for the finalize property
   */
  public String resumeTipText() {
    return "Set whether classifier can continue training after performing the"
      + "requested number of iterations. \n\tNote that setting this to true will "
      + "retain certain data structures which can increase the \n\t"
      + "size of the model. Only applicable when the base classifier \n\t"
      + "is an IterativeClassifier.";
  }

  /**
   * If called with argument true, then the next time done() is called the model is effectively
   * "frozen" and no further iterations can be performed
   *
   * @param resume true if the model is to be finalized after performing iterations
   */
  public void setResume(boolean resume) throws Exception {
    if (m_Classifier instanceof IterativeClassifier) {
      ((IterativeClassifier) m_Classifier).setResume(resume);
    }
  }

  /**
   * Returns true if the model is to be finalized (or has been finalized) after
   * training.
   *
   * @return the current value of finalize
   */
  public boolean getResume() {
    if (m_Classifier instanceof IterativeClassifier) {
      return ((IterativeClassifier) m_Classifier).getResume();
    }
    return false;
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration