All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.ClassifierSubsetEval Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    ClassifierSubsetEval.java
 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.io.File;
import java.util.BitSet;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.evaluation.AbstractEvaluationMetric;
import weka.classifiers.evaluation.InformationRetrievalEvaluationMetric;
import weka.classifiers.rules.ZeroR;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 
 * Classifier subset evaluator:
*
* Evaluates attribute subsets on training data or a seperate hold out testing set. Uses a classifier to estimate the 'merit' of a set of attributes. *

* * Valid options are:

* *

 -B <classifier>
 *  class name of the classifier to use for accuracy estimation.
 *  Place any classifier options LAST on the command line
 *  following a "--". eg.:
 *   -B weka.classifiers.bayes.NaiveBayes ... -- -K
 *  (default: weka.classifiers.rules.ZeroR)
* *
 -T
 *  Use the training data to estimate accuracy.
* *
 -H <filename>
 *  Name of the hold out/test set to 
 *  estimate accuracy on.
* *
 -percentage-split
 *  Perform a percentage split on the training data.
 *  Use in conjunction with -T.
* *
 -P
 *  Split percentage to use (default = 90).
* *
 -S
 *  Random seed for percentage split (default = 1).
* *
 -E <DEFAULT|ACC|RMSE|MAE|F-MEAS|AUC|AUPRC|CORR-COEFF>
 *  Performance evaluation measure to use for selecting attributes.
 *  (Default = default: accuracy for discrete class and rmse for numeric class)
* *
 -IRclass <label | index>
 *  Optional class value (label or 1-based index) to use in conjunction with
 *  IR statistics (f-meas, auc or auprc). Omitting this option will use
 *  the class-weighted average.
* *
 
 * Options specific to scheme weka.classifiers.rules.ZeroR:
 * 
* *
 -output-debug-info
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
* *
 -do-not-check-capabilities
 *  If set, classifier capabilities are not checked before classifier is built
 *  (use with caution).
* *
 -num-decimal-places
 *  The number of decimal places for the output of numbers in the model (default 2).
* *
 -batch-size
 *  The desired batch size for batch prediction  (default 100).
* * * @author Mark Hall ([email protected]) * @version $Revision: 10332 $ */ public class ClassifierSubsetEval extends HoldOutSubsetEvaluator implements OptionHandler, ErrorBasedMeritEvaluator { /** for serialization */ static final long serialVersionUID = 7532217899385278710L; /** training instances */ private Instances m_trainingInstances; /** class index */ private int m_classIndex; /** number of attributes in the training data */ private int m_numAttribs; /** number of training instances */ // private int m_numInstances; NOT USED /** holds the template classifier to use for error estimates */ private Classifier m_ClassifierTemplate = new ZeroR(); /** * Holds the classifier used when evaluating single hold-out instances - this * is used by RaceSearch and the trained classifier may need to persist * between calls to that particular method. */ private Classifier m_Classifier = new ZeroR(); /** the file that containts hold out/test instances */ private File m_holdOutFile = new File("Click to set hold out or " + "test instances"); /** the instances to test on */ private Instances m_holdOutInstances; /** evaluate on training data rather than separate hold out/test set */ private boolean m_useTraining = true; /** Whether to hold out a percentage of the training data */ protected boolean m_usePercentageSplit; /** Seed for randomizing prior to splitting training data */ protected int m_seed = 1; /** The split to use if doing a percentage split */ protected String m_splitPercent = "90"; public static final int EVAL_DEFAULT = 1; public static final int EVAL_ACCURACY = 2; public static final int EVAL_RMSE = 3; public static final int EVAL_MAE = 4; public static final int EVAL_FMEASURE = 5; public static final int EVAL_AUC = 6; public static final int EVAL_AUPRC = 7; public static final int EVAL_CORRELATION = 8; public static final int EVAL_PLUGIN = 9; protected static List PLUGIN_METRICS = AbstractEvaluationMetric.getPluginMetrics(); /** Holds all tags for metrics */ public static final Tag[] TAGS_EVALUATION; static { int totalPluginCount = 0; if (PLUGIN_METRICS != null) { for (AbstractEvaluationMetric m : PLUGIN_METRICS) { totalPluginCount += m.getStatisticNames().size(); } } TAGS_EVALUATION = new Tag[8 + totalPluginCount]; TAGS_EVALUATION[0] = new Tag(EVAL_DEFAULT, "default", "Default: accuracy (discrete class); RMSE (numeric class)"); TAGS_EVALUATION[1] = new Tag(EVAL_ACCURACY, "acc", "Accuracy (discrete class only)"); TAGS_EVALUATION[2] = new Tag(EVAL_RMSE, "rmse", "RMSE (of the class probabilities for discrete class)"); TAGS_EVALUATION[3] = new Tag(EVAL_MAE, "mae", "MAE (of the class probabilities for discrete class)"); TAGS_EVALUATION[4] = new Tag(EVAL_FMEASURE, "f-meas", "F-measure (discrete class only)"); TAGS_EVALUATION[5] = new Tag(EVAL_AUC, "auc", "AUC (area under the ROC curve - discrete class only)"); TAGS_EVALUATION[6] = new Tag(EVAL_AUPRC, "auprc", "AUPRC (area under the precision-recall curve - discrete class only)"); TAGS_EVALUATION[7] = new Tag(EVAL_CORRELATION, "corr-coeff", "Correlation coefficient - numeric class only"); if (PLUGIN_METRICS != null) { int index = 8; for (AbstractEvaluationMetric m : PLUGIN_METRICS) { for (String stat : m.getStatisticNames()) { TAGS_EVALUATION[index++] = new WrapperSubsetEval.PluginTag(index + 1, m, stat); } } } } /** The evaluation measure to use */ protected Tag m_evaluationMeasure = TAGS_EVALUATION[0]; /** * If >= 0, and an IR metric is being used, then evaluate with respect to this * class value (0-based index) */ protected int m_IRClassVal = -1; /** User supplied option for IR class value (either name or 1-based index) */ protected String m_IRClassValS = ""; /** * Returns a string describing this attribute evaluator * * @return a description of the evaluator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Classifier subset evaluator:\n\nEvaluates attribute subsets on training data or a seperate " + "hold out testing set. Uses a classifier to estimate the 'merit' of a set of attributes."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. **/ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy