All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.CheckAttributeSelection Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * CheckAttributeSelection.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.CheckScheme;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializationHelper;
import weka.core.SerializedObject;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class for examining the capabilities and finding problems with attribute
 * selection schemes. If you implement an attribute selection using the
 * WEKA.libraries, you should run the checks on it to ensure robustness and
 * correct operation. Passing all the tests of this object does not mean bugs in
 * the attribute selection don't exist, but this will help find some common
 * ones.
 * 

* * Typical usage: *

* java weka.attributeSelection.CheckAttributeSelection -W ASscheme_name * -- ASscheme_options *

* * CheckAttributeSelection reports on the following: *

    *
  • Scheme abilities *
      *
    • Possible command line options to the scheme
    • *
    • Whether the scheme can predict nominal, numeric, string, date or * relational class attributes.
    • *
    • Whether the scheme can handle numeric predictor attributes
    • *
    • Whether the scheme can handle nominal predictor attributes
    • *
    • Whether the scheme can handle string predictor attributes
    • *
    • Whether the scheme can handle date predictor attributes
    • *
    • Whether the scheme can handle relational predictor attributes
    • *
    • Whether the scheme can handle multi-instance data
    • *
    • Whether the scheme can handle missing predictor values
    • *
    • Whether the scheme can handle missing class values
    • *
    • Whether a nominal scheme only handles 2 class problems
    • *
    • Whether the scheme can handle instance weights
    • *
    *
  • *
  • Correct functioning *
      *
    • Correct initialisation during search (i.e. no result changes when search * is performed repeatedly)
    • *
    • Whether the scheme alters the data pased to it (number of instances, * instance order, instance weights, etc)
    • *
    *
  • *
  • Degenerate cases *
      *
    • building scheme with zero instances
    • *
    • all but one predictor attribute values missing
    • *
    • all predictor attribute values missing
    • *
    • all but one class values missing
    • *
    • all class values missing
    • *
    *
  • *
* Running CheckAttributeSelection with the debug option set will output the * training dataset for any failed tests. *

* * The weka.attributeSelection.AbstractAttributeSelectionTest uses * this class to test all the schemes. Any changes here, have to be checked in * that abstract test class, too. *

* * Valid options are: *

* *

 * -D
 *  Turn on debugging output.
 * 
* *
 * -S
 *  Silent mode - prints nothing to stdout.
 * 
* *
 * -N <num>
 *  The number of instances in the datasets (default 20).
 * 
* *
 * -nominal <num>
 *  The number of nominal attributes (default 2).
 * 
* *
 * -nominal-values <num>
 *  The number of values for nominal attributes (default 1).
 * 
* *
 * -numeric <num>
 *  The number of numeric attributes (default 1).
 * 
* *
 * -string <num>
 *  The number of string attributes (default 1).
 * 
* *
 * -date <num>
 *  The number of date attributes (default 1).
 * 
* *
 * -relational <num>
 *  The number of relational attributes (default 1).
 * 
* *
 * -num-instances-relational <num>
 *  The number of instances in relational/bag attributes (default 10).
 * 
* *
 * -words <comma-separated-list>
 *  The words to use in string attributes.
 * 
* *
 * -word-separators <chars>
 *  The word separators to use in string attributes.
 * 
* *
 * -eval name [options]
 *  Full name and options of the evaluator analyzed.
 *  eg: weka.attributeSelection.CfsSubsetEval
 * 
* *
 * -search name [options]
 *  Full name and options of the search method analyzed.
 *  eg: weka.attributeSelection.Ranker
 * 
* *
 * -test <eval|search>
 *  The scheme to test, either the evaluator or the search method.
 *  (Default: eval)
 * 
* *
 * Options specific to evaluator weka.attributeSelection.CfsSubsetEval:
 * 
* *
 * -M
 *  Treat missing values as a seperate value.
 * 
* *
 * -L
 *  Don't include locally predictive attributes.
 * 
* *
 * Options specific to search method weka.attributeSelection.Ranker:
 * 
* *
 * -P <start set>
 *  Specify a starting set of attributes.
 *  Eg. 1,3,5-7.
 *  Any starting attributes specified are
 *  ignored during the ranking.
 * 
* *
 * -T <threshold>
 *  Specify a theshold by which attributes
 *  may be discarded from the ranking.
 * 
* *
 * -N <num to select>
 *  Specify number of attributes to select
 * 
* * * * @author Len Trigg ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 11247 $ * @see TestInstances */ public class CheckAttributeSelection extends CheckScheme { /* * Note about test methods: - methods return array of booleans - first index: * success or not - second index: acceptable or not (e.g., Exception is OK) * * FracPete (fracpete at waikato dot ac dot nz) */ /*** The evaluator to be examined */ protected ASEvaluation m_Evaluator = new CfsSubsetEval(); /*** The search method to be used */ protected ASSearch m_Search = new Ranker(); /** whether to test the evaluator (default) or the search method */ protected boolean m_TestEvaluator = true; /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy