All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.WrapperSubsetEval Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    WrapperSubsetEval.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.AbstractEvaluationMetric;
import weka.classifiers.evaluation.InformationRetrievalEvaluationMetric;
import weka.classifiers.rules.ZeroR;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

import java.util.BitSet;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Vector;

/**
 
 * WrapperSubsetEval:
*
* Evaluates attribute sets by using a learning scheme. Cross validation is used * to estimate the accuracy of the learning scheme for a set of attributes.
*
* For more information see:
*
*

Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. * Artificial Intelligence. 97(1-2):273-324. *

* * BibTeX: * *
 * @article{Kohavi1997,
 *    author = {Ron Kohavi and George H. John},
 *    journal = {Artificial Intelligence},
 *    note = {Special issue on relevance},
 *    number = {1-2},
 *    pages = {273-324},
 *    title = {Wrappers for feature subset selection},
 *    volume = {97},
 *    year = {1997},
 *    ISSN = {0004-3702}
 * }
 * 
*

* * Valid options are: *

* *
 * -B <base learner>
 *  class name of base learner to use for  accuracy estimation.
 *  Place any classifier options LAST on the command line
 *  following a "--". eg.:
 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
 *  (default: weka.classifiers.rules.ZeroR)
 * 
* *
 * -F <num>
 *  number of cross validation folds to use for estimating accuracy.
 *  (default=5)
 * 
* *
 * -R <seed>
 *  Seed for cross validation accuracy testimation.
 *  (default = 1)
 * 
* *
 * -T <num>
 *  threshold by which to execute another cross validation
 *  (standard deviation---expressed as a percentage of the mean).
 *  (default: 0.01 (1%))
 * 
* *
 * -E <acc | rmse | mae | f-meas | auc | auprc>
 *  Performance evaluation measure to use for selecting attributes.
 *  (Default = accuracy for discrete class and rmse for numeric class)
 * 
* *
 * -IRclass <label | index>
 *  Optional class value (label or 1-based index) to use in conjunction with
 *  IR statistics (f-meas, auc or auprc). Omitting this option will use
 *  the class-weighted average.
 * 
* *
 * Options specific to scheme weka.classifiers.rules.ZeroR:
 * 
* *
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* * * @author Mark Hall ([email protected]) * @version $Revision: 12170 $ */ public class WrapperSubsetEval extends ASEvaluation implements SubsetEvaluator, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -4573057658746728675L; /** training instances */ private Instances m_trainInstances; /** class index */ private int m_classIndex; /** number of attributes in the training data */ private int m_numAttribs; /** holds an evaluation object */ private Evaluation m_Evaluation; /** holds the base classifier object */ private Classifier m_BaseClassifier; /** number of folds to use for cross validation */ private int m_folds; /** random number seed */ private int m_seed; /** * the threshold by which to do further cross validations when estimating the * accuracy of a subset */ private double m_threshold; public static final int EVAL_DEFAULT = 1; public static final int EVAL_ACCURACY = 2; public static final int EVAL_RMSE = 3; public static final int EVAL_MAE = 4; public static final int EVAL_FMEASURE = 5; public static final int EVAL_AUC = 6; public static final int EVAL_AUPRC = 7; public static final int EVAL_CORRELATION = 8; public static final int EVAL_PLUGIN = 9; /** * Small subclass of Tag to store info about a plugin metric */ protected static class PluginTag extends Tag { private static final long serialVersionUID = -6978438858413428382L; /** The metric object itself */ protected AbstractEvaluationMetric m_metric; /** The particular statistic from the metric that this tag pertains to */ protected String m_statisticName; /** * Constructor * * @param metric the metric object * @param statisticName the particular statistic that this tag pertains to */ public PluginTag(int ID, AbstractEvaluationMetric metric, String statisticName) { super(ID, statisticName, statisticName); m_metric = metric; m_statisticName = statisticName; } /** * Get the name of the metric represented by this tag * * @return the name of the metric */ public String getMetricName() { return m_metric.getMetricName(); } /** * Get the name of the statistic that this tag pertains to * * @return the name of the statistic */ public String getStatisticName() { return m_statisticName; } /** * Get the actual metric object * * @return the metric object */ public AbstractEvaluationMetric getMetric() { return m_metric; } } /** Holds all tags for metrics */ public static final Tag[] TAGS_EVALUATION; /** * If >= 0, and an IR metric is being used, then evaluate with respect to this * class value (0-based index) */ protected int m_IRClassVal = -1; /** User supplied option for IR class value (either name or 1-based index) */ protected String m_IRClassValS = ""; protected static List PLUGIN_METRICS = AbstractEvaluationMetric.getPluginMetrics(); static { int totalPluginCount = 0; if (PLUGIN_METRICS != null) { for (AbstractEvaluationMetric m : PLUGIN_METRICS) { totalPluginCount += m.getStatisticNames().size(); } } TAGS_EVALUATION = new Tag[8 + totalPluginCount]; TAGS_EVALUATION[0] = new Tag(EVAL_DEFAULT, "default", "Default: accuracy (discrete class); RMSE (numeric class)"); TAGS_EVALUATION[1] = new Tag(EVAL_ACCURACY, "acc", "Accuracy (discrete class only)"); TAGS_EVALUATION[2] = new Tag(EVAL_RMSE, "rmse", "RMSE (of the class probabilities for discrete class)"); TAGS_EVALUATION[3] = new Tag(EVAL_MAE, "mae", "MAE (of the class probabilities for discrete class)"); TAGS_EVALUATION[4] = new Tag(EVAL_FMEASURE, "f-meas", "F-measure (discrete class only)"); TAGS_EVALUATION[5] = new Tag(EVAL_AUC, "auc", "AUC (area under the ROC curve - discrete class only)"); TAGS_EVALUATION[6] = new Tag(EVAL_AUPRC, "auprc", "AUPRC (area under the precision-recall curve - discrete class only)"); TAGS_EVALUATION[7] = new Tag(EVAL_CORRELATION, "corr-coeff", "Correlation coefficient - numeric class only"); if (PLUGIN_METRICS != null) { int index = 8; for (AbstractEvaluationMetric m : PLUGIN_METRICS) { for (String stat : m.getStatisticNames()) { TAGS_EVALUATION[index++] = new PluginTag(index + 1, m, stat); } } } } /** The evaluation measure to use */ protected Tag m_evaluationMeasure = TAGS_EVALUATION[0]; /** * Returns a string describing this attribute evaluator * * @return a description of the evaluator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "WrapperSubsetEval:\n\n" + "Evaluates attribute sets by using a learning scheme. Cross " + "validation is used to estimate the accuracy of the learning " + "scheme for a set of attributes.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John"); result.setValue(Field.YEAR, "1997"); result.setValue(Field.TITLE, "Wrappers for feature subset selection"); result.setValue(Field.JOURNAL, "Artificial Intelligence"); result.setValue(Field.VOLUME, "97"); result.setValue(Field.NUMBER, "1-2"); result.setValue(Field.PAGES, "273-324"); result.setValue(Field.NOTE, "Special issue on relevance"); result.setValue(Field.ISSN, "0004-3702"); return result; } /** * Constructor. Calls restOptions to set default options **/ public WrapperSubsetEval() { resetOptions(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. **/ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy