/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* AttributeSelectedClassifier.java
* Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.meta;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.AttributeSelection;
import weka.classifiers.SingleClassifierEnhancer;
import weka.core.*;
import weka.core.Capabilities.Capability;
/**
* Dimensionality of training and test data is reduced by attribute selection before being passed on to a classifier.
*
*
* Valid options are:
*
* -E <attribute evaluator specification>
* Full class name of attribute evaluator, followed
* by its options.
* eg: "weka.attributeSelection.CfsSubsetEval -L"
* (default weka.attributeSelection.CfsSubsetEval)
*
* -S <search method specification>
* Full class name of search method, followed
* by its options.
* eg: "weka.attributeSelection.BestFirst -D 1"
* (default weka.attributeSelection.BestFirst)
*
* -D
* If set, classifier is run in debug mode and
* may output additional info to the console
*
* -W
* Full name of base classifier.
* (default: weka.classifiers.trees.J48)
*
*
* Options specific to classifier weka.classifiers.trees.J48:
*
*
* -U
* Use unpruned tree.
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)
*
* -R
* Use reduced error pruning.
*
* -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)
*
* -B
* Use binary splits only.
*
* -S
* Don't perform subtree raising.
*
* -L
* Do not clean up after the tree has been built.
*
* -A
* Laplace smoothing for predicted probabilities.
*
* -Q <seed>
* Seed for random data shuffling (default 1).
*
*
* @author Mark Hall ([email protected] )
* @version $Revision: 15520 $
*/
public class AttributeSelectedClassifier
extends SingleClassifierEnhancer
implements OptionHandler, Drawable, AdditionalMeasureProducer,
WeightedInstancesHandler {
/** for serialization */
static final long serialVersionUID = -1151805453487947577L;
/** The attribute selection object */
protected AttributeSelection m_AttributeSelection = null;
/** The attribute evaluator to use */
protected ASEvaluation m_Evaluator =
new weka.attributeSelection.CfsSubsetEval();
/** The search method to use */
protected ASSearch m_Search = new weka.attributeSelection.BestFirst();
/** The header of the dimensionally reduced data */
protected Instances m_ReducedHeader;
/** The number of class vals in the training data (1 if class is numeric) */
protected int m_numClasses;
/** The number of attributes selected by the attribute selection phase */
protected double m_numAttributesSelected;
/** The time taken to select attributes in milliseconds */
protected double m_selectionTime;
/** The time taken to select attributes AND build the classifier */
protected double m_totalTime;
/**
* String describing default classifier.
*
* @return the default classifier classname
*/
protected String defaultClassifierString() {
return "weka.classifiers.trees.J48";
}
/**
* Default constructor.
*/
public AttributeSelectedClassifier() {
m_Classifier = new weka.classifiers.trees.J48();
}
/**
* Returns a string describing this search method
* @return a description of the search method suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Dimensionality of training and test data is reduced by "
+"attribute selection before being passed on to a classifier.";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector (2);
newVector.addElement(new Option(
"\tFull class name of attribute evaluator, followed\n"
+ "\tby its options.\n"
+ "\teg: \"weka.attributeSelection.CfsSubsetEval -L\"\n"
+ "\t(default weka.attributeSelection.CfsSubsetEval)",
"E", 1, "-E "));
newVector.addElement(new Option(
"\tFull class name of search method, followed\n"
+ "\tby its options.\n"
+ "\teg: \"weka.attributeSelection.BestFirst -D 1\"\n"
+ "\t(default weka.attributeSelection.BestFirst)",
"S", 1, "-S "));
newVector.addAll(Collections.list(super.listOptions()));
if (getEvaluator() instanceof OptionHandler) {
newVector.addElement(new Option(
"",
"", 0, "\nOptions specific to attribute evaluator "
+ getEvaluator().getClass().getName() + ":"));
newVector.addAll(Collections.list(((OptionHandler)getEvaluator()).listOptions()));
}
if (getSearch() instanceof OptionHandler) {
newVector.addElement(new Option(
"",
"", 0, "\nOptions specific to search method "
+ getSearch().getClass().getName() + ":"));
newVector.addAll(Collections.list(((OptionHandler)getSearch()).listOptions()));
}
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -E <attribute evaluator specification>
* Full class name of attribute evaluator, followed
* by its options.
* eg: "weka.attributeSelection.CfsSubsetEval -L"
* (default weka.attributeSelection.CfsSubsetEval)
*
* -S <search method specification>
* Full class name of search method, followed
* by its options.
* eg: "weka.attributeSelection.BestFirst -D 1"
* (default weka.attributeSelection.BestFirst)
*
* -D
* If set, classifier is run in debug mode and
* may output additional info to the console
*
* -W
* Full name of base classifier.
* (default: weka.classifiers.trees.J48)
*
*
* Options specific to classifier weka.classifiers.trees.J48:
*
*
* -U
* Use unpruned tree.
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)
*
* -R
* Use reduced error pruning.
*
* -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)
*
* -B
* Use binary splits only.
*
* -S
* Don't perform subtree raising.
*
* -L
* Do not clean up after the tree has been built.
*
* -A
* Laplace smoothing for predicted probabilities.
*
* -Q <seed>
* Seed for random data shuffling (default 1).
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
// same for attribute evaluator
String evaluatorString = Utils.getOption('E', options);
if (evaluatorString.length() == 0)
evaluatorString = weka.attributeSelection.CfsSubsetEval.class.getName();
String [] evaluatorSpec = Utils.splitOptions(evaluatorString);
if (evaluatorSpec.length == 0) {
throw new Exception("Invalid attribute evaluator specification string");
}
String evaluatorName = evaluatorSpec[0];
evaluatorSpec[0] = "";
setEvaluator(ASEvaluation.forName(evaluatorName, evaluatorSpec));
// same for search method
String searchString = Utils.getOption('S', options);
if (searchString.length() == 0)
searchString = weka.attributeSelection.BestFirst.class.getName();
String [] searchSpec = Utils.splitOptions(searchString);
if (searchSpec.length == 0) {
throw new Exception("Invalid search specification string");
}
String searchName = searchSpec[0];
searchSpec[0] = "";
setSearch(ASSearch.forName(searchName, searchSpec));
super.setOptions(options);
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
Vector options = new Vector();
// same attribute evaluator
options.add("-E");
options.add("" +getEvaluatorSpec());
// same for search
options.add("-S");
options.add("" + getSearchSpec());
Collections.addAll(options, super.getOptions());
return options.toArray(new String[0]);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String evaluatorTipText() {
return "Set the attribute evaluator to use. This evaluator is used "
+"during the attribute selection phase before the classifier is "
+"invoked.";
}
/**
* Sets the attribute evaluator
*
* @param evaluator the evaluator with all options set.
*/
public void setEvaluator(ASEvaluation evaluator) {
m_Evaluator = evaluator;
}
/**
* Gets the attribute evaluator used
*
* @return the attribute evaluator
*/
public ASEvaluation getEvaluator() {
return m_Evaluator;
}
/**
* Gets the evaluator specification string, which contains the class name of
* the attribute evaluator and any options to it
*
* @return the evaluator string.
*/
protected String getEvaluatorSpec() {
ASEvaluation e = getEvaluator();
if (e instanceof OptionHandler) {
return e.getClass().getName() + " "
+ Utils.joinOptions(((OptionHandler)e).getOptions());
}
return e.getClass().getName();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String searchTipText() {
return "Set the search method. This search method is used "
+"during the attribute selection phase before the classifier is "
+"invoked.";
}
/**
* Sets the search method
*
* @param search the search method with all options set.
*/
public void setSearch(ASSearch search) {
m_Search = search;
}
/**
* Gets the search method used
*
* @return the search method
*/
public ASSearch getSearch() {
return m_Search;
}
/**
* Gets the search specification string, which contains the class name of
* the search method and any options to it
*
* @return the search string.
*/
protected String getSearchSpec() {
ASSearch s = getSearch();
if (s instanceof OptionHandler) {
return s.getClass().getName() + " "
+ Utils.joinOptions(((OptionHandler)s).getOptions());
}
return s.getClass().getName();
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result;
if (getEvaluator() == null)
result = super.getCapabilities();
else
result = getEvaluator().getCapabilities();
// set dependencies
for (Capability cap: Capability.values())
result.enableDependency(cap);
return result;
}
/**
* Build the classifier on the dimensionally reduced data.
*
* @param data the training data
* @throws Exception if the classifier could not be built successfully
*/
public void buildClassifier(Instances data) throws Exception {
if (m_Classifier == null) {
throw new Exception("No base classifier has been set!");
}
if (m_Evaluator == null) {
throw new Exception("No attribute evaluator has been set!");
}
if (m_Search == null) {
throw new Exception("No search method has been set!");
}
// can classifier handle the data?
getCapabilities().testWithFail(data);
// get fresh Instances object
Instances newData = new Instances(data);
if (newData.numInstances() == 0) {
m_Classifier.buildClassifier(newData);
return;
}
if (newData.classAttribute().isNominal()) {
m_numClasses = newData.classAttribute().numValues();
} else {
m_numClasses = 1;
}
Instances resampledData = null;
// check to see if training data has all equal weights
double weight = newData.instance(0).weight();
boolean ok = false;
for (int i = 1; i < newData.numInstances(); i++) {
if (newData.instance(i).weight() != weight) {
ok = true;
break;
}
}
if (ok) {
if (!(m_Evaluator instanceof WeightedInstancesHandler) ||
!(m_Classifier instanceof WeightedInstancesHandler)) {
Random r = new Random(1);
for (int i = 0; i < 10; i++) {
r.nextDouble();
}
resampledData = newData.resampleWithWeights(r);
}
} else {
// all equal weights in the training data so just use as is
resampledData = newData;
}
m_AttributeSelection = new AttributeSelection();
m_AttributeSelection.setEvaluator(m_Evaluator);
m_AttributeSelection.setSearch(m_Search);
long start = System.currentTimeMillis();
m_AttributeSelection.
SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler)
? newData
: resampledData);
long end = System.currentTimeMillis();
if (m_Classifier instanceof WeightedInstancesHandler) {
newData = m_AttributeSelection.reduceDimensionality(newData);
m_Classifier.buildClassifier(newData);
} else {
resampledData = m_AttributeSelection.reduceDimensionality(resampledData);
m_Classifier.buildClassifier(resampledData);
}
long end2 = System.currentTimeMillis();
m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected();
m_ReducedHeader =
new Instances((m_Classifier instanceof WeightedInstancesHandler) ?
newData
: resampledData, 0);
m_selectionTime = (double)(end - start);
m_totalTime = (double)(end2 - start);
}
/**
* Classifies a given instance after attribute selection
*
* @param instance the instance to be classified
* @return the class distribution
* @throws Exception if instance could not be classified
* successfully
*/
public double [] distributionForInstance(Instance instance)
throws Exception {
Instance newInstance;
if (m_AttributeSelection == null) {
// throw new Exception("AttributeSelectedClassifier: No model built yet!");
newInstance = instance;
} else {
newInstance = m_AttributeSelection.reduceDimensionality(instance);
}
return m_Classifier.distributionForInstance(newInstance);
}
/**
* Tool tip text for this property
*
* @return the tool tip for this property
*/
public String batchSizeTipText() {
return "Batch size to use if base learner is a BatchPredictor";
}
/**
* Set the batch size to use. Gets passed through to the base learner if it
* implements BatchPredictor. Otherwise it is just ignored.
*
* @param size the batch size to use
*/
public void setBatchSize(String size) {
if (getClassifier() instanceof BatchPredictor) {
((BatchPredictor) getClassifier()).setBatchSize(size);
} else {
super.setBatchSize(size);
}
}
/**
* Gets the preferred batch size from the base learner if it implements
* BatchPredictor. Returns 1 as the preferred batch size otherwise.
*
* @return the batch size to use
*/
public String getBatchSize() {
if (getClassifier() instanceof BatchPredictor) {
return ((BatchPredictor) getClassifier()).getBatchSize();
} else {
return super.getBatchSize();
}
}
/**
* Batch scoring method. Calls the appropriate method for the base learner if
* it implements BatchPredictor. Otherwise it simply calls the
* distributionForInstance() method repeatedly.
*
* @param insts the instances to get predictions for
* @return an array of probability distributions, one for each instance
* @throws Exception if a problem occurs
*/
public double[][] distributionsForInstances(Instances insts)
throws Exception {
if (getClassifier() instanceof BatchPredictor) {
Instances newInstances;
if (m_AttributeSelection == null) {
// throw new Exception("AttributeSelectedClassifier: No model built yet!");
newInstances = insts;
} else {
newInstances = m_AttributeSelection.reduceDimensionality(insts);
}
if (newInstances.numInstances() != insts.numInstances()) {
throw new WekaException(
"FilteredClassifier: filter has returned more/less instances than required.");
}
return ((BatchPredictor) getClassifier()).distributionsForInstances(newInstances);
} else {
double[][] result = new double[insts.numInstances()][insts.numClasses()];
for (int i = 0; i < insts.numInstances(); i++) {
result[i] = distributionForInstance(insts.instance(i));
}
return result;
}
}
/**
* Returns true if the base classifier implements BatchPredictor and is able
* to generate batch predictions efficiently
*
* @return true if the base classifier can generate batch predictions
* efficiently
*/
public boolean implementsMoreEfficientBatchPrediction() {
if (!(getClassifier() instanceof BatchPredictor)) {
return super.implementsMoreEfficientBatchPrediction();
}
return ((BatchPredictor) getClassifier()).implementsMoreEfficientBatchPrediction();
}
/**
* Returns the type of graph this classifier
* represents.
*
* @return the type of graph
*/
public int graphType() {
if (m_Classifier instanceof Drawable)
return ((Drawable)m_Classifier).graphType();
else
return Drawable.NOT_DRAWABLE;
}
/**
* Returns graph describing the classifier (if possible).
*
* @return the graph of the classifier in dotty format
* @throws Exception if the classifier cannot be graphed
*/
public String graph() throws Exception {
if (m_Classifier instanceof Drawable)
return ((Drawable)m_Classifier).graph();
else throw new Exception("Classifier: " + getClassifierSpec()
+ " cannot be graphed");
}
/**
* Output a representation of this classifier
*
* @return a representation of this classifier
*/
public String toString() {
if (m_AttributeSelection == null) {
return "AttributeSelectedClassifier: No attribute selection possible.\n\n"
+m_Classifier.toString();
}
StringBuffer result = new StringBuffer();
result.append("AttributeSelectedClassifier:\n\n");
result.append(m_AttributeSelection.toResultsString());
result.append("\n\nHeader of reduced data:\n"+m_ReducedHeader.toString());
result.append("\n\nClassifier Model\n"+m_Classifier.toString());
return result.toString();
}
/**
* Additional measure --- number of attributes selected
* @return the number of attributes selected
*/
public double measureNumAttributesSelected() {
return m_numAttributesSelected;
}
/**
* Additional measure --- time taken (milliseconds) to select the attributes
* @return the time taken to select attributes
*/
public double measureSelectionTime() {
return m_selectionTime;
}
/**
* Additional measure --- time taken (milliseconds) to select attributes
* and build the classifier
* @return the total time (select attributes + build classifier)
*/
public double measureTime() {
return m_totalTime;
}
/**
* Returns an enumeration of the additional measure names
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(3);
newVector.addElement("measureNumAttributesSelected");
newVector.addElement("measureSelectionTime");
newVector.addElement("measureTime");
if (m_Classifier instanceof AdditionalMeasureProducer) {
newVector.addAll(Collections.list(((AdditionalMeasureProducer)m_Classifier).
enumerateMeasures()));
}
return newVector.elements();
}
/**
* Returns the value of the named measure
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.compareToIgnoreCase("measureNumAttributesSelected") == 0) {
return measureNumAttributesSelected();
} else if (additionalMeasureName.compareToIgnoreCase("measureSelectionTime") == 0) {
return measureSelectionTime();
} else if (additionalMeasureName.compareToIgnoreCase("measureTime") == 0) {
return measureTime();
} else if (m_Classifier instanceof AdditionalMeasureProducer) {
return ((AdditionalMeasureProducer)m_Classifier).
getMeasure(additionalMeasureName);
} else {
throw new IllegalArgumentException(additionalMeasureName
+ " not supported (AttributeSelectedClassifier)");
}
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 15520 $");
}
/**
* Main method for testing this class.
*
* @param argv should contain the following arguments:
* -t training file [-T test file] [-c class index]
*/
public static void main(String [] argv) {
runClassifier(new AttributeSelectedClassifier(), argv);
}
}