All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.WrapperSubsetEval Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    WrapperSubsetEval.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.util.BitSet;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.rules.ZeroR;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 *  WrapperSubsetEval:
*
* Evaluates attribute sets by using a learning scheme. Cross validation is used * to estimate the accuracy of the learning scheme for a set of attributes.
*
* For more information see:
*
* Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. * Artificial Intelligence. 97(1-2):273-324. *

* * * BibTeX: * *

 * @article{Kohavi1997,
 *    author = {Ron Kohavi and George H. John},
 *    journal = {Artificial Intelligence},
 *    note = {Special issue on relevance},
 *    number = {1-2},
 *    pages = {273-324},
 *    title = {Wrappers for feature subset selection},
 *    volume = {97},
 *    year = {1997},
 *    ISSN = {0004-3702}
 * }
 * 
*

* * * Valid options are: *

* *

 * -B <base learner>
 *  class name of base learner to use for  accuracy estimation.
 *  Place any classifier options LAST on the command line
 *  following a "--". eg.:
 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
 *  (default: weka.classifiers.rules.ZeroR)
 * 
* *
 * -F <num>
 *  number of cross validation folds to use for estimating accuracy.
 *  (default=5)
 * 
* *
 * -R <seed>
 *  Seed for cross validation accuracy testimation.
 *  (default = 1)
 * 
* *
 * -T <num>
 *  threshold by which to execute another cross validation
 *  (standard deviation---expressed as a percentage of the mean).
 *  (default: 0.01 (1%))
 * 
* *
 * -E <acc | rmse | mae | f-meas | auc | auprc>
 *  Performance evaluation measure to use for selecting attributes.
 *  (Default = accuracy for discrete class and rmse for numeric class)
 * 
* *
 * -IRclass <label | index>
 *  Optional class value (label or 1-based index) to use in conjunction with
 *  IR statistics (f-meas, auc or auprc). Omitting this option will use
 *  the class-weighted average.
 * 
* *
 * Options specific to scheme weka.classifiers.rules.ZeroR:
 * 
* *
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* * * * @author Mark Hall ([email protected]) * @version $Revision: 11215 $ */ public class WrapperSubsetEval extends ASEvaluation implements SubsetEvaluator, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -4573057658746728675L; /** training instances */ private Instances m_trainInstances; /** class index */ private int m_classIndex; /** number of attributes in the training data */ private int m_numAttribs; /** holds an evaluation object */ private Evaluation m_Evaluation; /** holds the base classifier object */ private Classifier m_BaseClassifier; /** number of folds to use for cross validation */ private int m_folds; /** random number seed */ private int m_seed; /** * the threshold by which to do further cross validations when estimating the * accuracy of a subset */ private double m_threshold; public static final int EVAL_DEFAULT = 1; public static final int EVAL_ACCURACY = 2; public static final int EVAL_RMSE = 3; public static final int EVAL_MAE = 4; public static final int EVAL_FMEASURE = 5; public static final int EVAL_AUC = 6; public static final int EVAL_AUPRC = 7; public static final Tag[] TAGS_EVALUATION = { new Tag(EVAL_DEFAULT, "Default: accuracy (discrete class); RMSE (numeric class)"), new Tag(EVAL_ACCURACY, "Accuracy (discrete class only)"), new Tag(EVAL_RMSE, "RMSE (of the class probabilities for discrete class)"), new Tag(EVAL_MAE, "MAE (of the class probabilities for discrete class)"), new Tag(EVAL_FMEASURE, "F-measure (discrete class only)"), new Tag(EVAL_AUC, "AUC (area under the ROC curve - discrete class only)"), new Tag(EVAL_AUPRC, "AUPRC (area under the precision-recall curve - discrete class only)") }; /** The evaluation measure to use */ protected int m_evaluationMeasure = EVAL_DEFAULT; /** * If >= 0, and an IR metric is being used, then evaluate with respect to this * class value (0-based index) */ protected int m_IRClassVal = -1; /** User supplied option for IR class value (either name or 1-based index) */ protected String m_IRClassValS = ""; /** * Returns a string describing this attribute evaluator * * @return a description of the evaluator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "WrapperSubsetEval:\n\n" + "Evaluates attribute sets by using a learning scheme. Cross " + "validation is used to estimate the accuracy of the learning " + "scheme for a set of attributes.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John"); result.setValue(Field.YEAR, "1997"); result.setValue(Field.TITLE, "Wrappers for feature subset selection"); result.setValue(Field.JOURNAL, "Artificial Intelligence"); result.setValue(Field.VOLUME, "97"); result.setValue(Field.NUMBER, "1-2"); result.setValue(Field.PAGES, "273-324"); result.setValue(Field.NOTE, "Special issue on relevance"); result.setValue(Field.ISSN, "0004-3702"); return result; } /** * Constructor. Calls restOptions to set default options **/ public WrapperSubsetEval() { resetOptions(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. **/ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy