weka.attributeSelection.CheckAttributeSelection Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * CheckAttributeSelection.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.CheckScheme;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializationHelper;
import weka.core.SerializedObject;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class for examining the capabilities and finding problems with attribute
 * selection schemes. If you implement an attribute selection using the
 * WEKA.libraries, you should run the checks on it to ensure robustness and
 * correct operation. Passing all the tests of this object does not mean bugs in
 * the attribute selection don't exist, but this will help find some common
 * ones.
 * 
 * 
 * Typical usage:
 * 

 * java weka.attributeSelection.CheckAttributeSelection -W ASscheme_name 
 * -- ASscheme_options 
 * 

 * 
 * CheckAttributeSelection reports on the following:
 * 

 * Scheme abilities
 * 
 * Possible command line options to the scheme
 * Whether the scheme can predict nominal, numeric, string, date or
 * relational class attributes.
 * Whether the scheme can handle numeric predictor attributes
 * Whether the scheme can handle nominal predictor attributes
 * Whether the scheme can handle string predictor attributes
 * Whether the scheme can handle date predictor attributes
 * Whether the scheme can handle relational predictor attributes
 * Whether the scheme can handle multi-instance data
 * Whether the scheme can handle missing predictor values
 * Whether the scheme can handle missing class values
 * Whether a nominal scheme only handles 2 class problems
 * Whether the scheme can handle instance weights
 * 
 * 
 * Correct functioning
 * 
 * Correct initialisation during search (i.e. no result changes when search
 * is performed repeatedly)
 * Whether the scheme alters the data pased to it (number of instances,
 * instance order, instance weights, etc)
 * 
 * 
 * Degenerate cases
 * 
 * building scheme with zero instances
 * all but one predictor attribute values missing
 * all predictor attribute values missing
 * all but one class values missing
 * all class values missing
 * 
 * 
 * 
 * Running CheckAttributeSelection with the debug option set will output the
 * training dataset for any failed tests.
 * 
 * 
 * The weka.attributeSelection.AbstractAttributeSelectionTest uses
 * this class to test all the schemes. Any changes here, have to be checked in
 * that abstract test class, too.
 * 

 * 
 *  Valid options are:
 * 

 * 
 * 
 * -D
 *  Turn on debugging output.
 * 
 * 
 *  * -S
 *  Silent mode - prints nothing to stdout.
 * 
 * 
 *  * -N <num>
 *  The number of instances in the datasets (default 20).
 * 
 * 
 *  * -nominal <num>
 *  The number of nominal attributes (default 2).
 * 
 * 
 *  * -nominal-values <num>
 *  The number of values for nominal attributes (default 1).
 * 
 * 
 *  * -numeric <num>
 *  The number of numeric attributes (default 1).
 * 
 * 
 *  * -string <num>
 *  The number of string attributes (default 1).
 * 
 * 
 *  * -date <num>
 *  The number of date attributes (default 1).
 * 
 * 
 *  * -relational <num>
 *  The number of relational attributes (default 1).
 * 
 * 
 *  * -num-instances-relational <num>
 *  The number of instances in relational/bag attributes (default 10).
 * 
 * 
 *  * -words <comma-separated-list>
 *  The words to use in string attributes.
 * 
 * 
 *  * -word-separators <chars>
 *  The word separators to use in string attributes.
 * 
 * 
 *  * -eval name [options]
 *  Full name and options of the evaluator analyzed.
 *  eg: weka.attributeSelection.CfsSubsetEval
 * 
 * 
 *  * -search name [options]
 *  Full name and options of the search method analyzed.
 *  eg: weka.attributeSelection.Ranker
 * 
 * 
 *  * -test <eval|search>
 *  The scheme to test, either the evaluator or the search method.
 *  (Default: eval)
 * 
 * 
 *  * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
 * 
 * 
 *  * -M
 *  Treat missing values as a seperate value.
 * 
 * 
 *  * -L
 *  Don't include locally predictive attributes.
 * 
 * 
 *  * Options specific to search method weka.attributeSelection.Ranker:
 * 
 * 
 *  * -P <start set>
 *  Specify a starting set of attributes.
 *  Eg. 1,3,5-7.
 *  Any starting attributes specified are
 *  ignored during the ranking.
 * 
 * 
 *  * -T <threshold>
 *  Specify a theshold by which attributes
 *  may be discarded from the ranking.
 * 
 * 
 *  * -N <num to select>
 *  Specify number of attributes to select
 * 
 * 
 * 
 * 
 * @author Len Trigg ([email protected])
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 11247 $
 * @see TestInstances
 */
public class CheckAttributeSelection extends CheckScheme {

  /*
   * Note about test methods: - methods return array of booleans - first index:
   * success or not - second index: acceptable or not (e.g., Exception is OK)
   * 
   * FracPete (fracpete at waikato dot ac dot nz)
   */

  /*** The evaluator to be examined */
  protected ASEvaluation m_Evaluator = new CfsSubsetEval();

  /*** The search method to be used */
  protected ASSearch m_Search = new Ranker();

  /** whether to test the evaluator (default) or the search method */
  protected boolean m_TestEvaluator = true;

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration