weka.attributeSelection.AttributeSelection Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* AttributeSelection.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
/**
* Attribute selection class. Takes the name of a search class and an evaluation
* class on the command line.
*
*
* Valid options are:
*
*
* -h
* Display help.
*
*
* -i <name of input file>
* Specify the training data file.
*
*
* -c <class index>
* The index of the attribute to use as the class.
*
*
* -s <search method>
* The full class name of the search method followed by search method options
* (if any).
* Eg. -s "weka.attributeSelection.BestFirst -N 10"
*
*
* -x <number of folds>
* Perform a cross validation.
*
*
* -n <random number seed>
* Specify a random number seed. Use in conjuction with -x. (Default = 1).
*
*
* ------------------------------------------------------------------------
*
*
* Example usage as the main of an attribute evaluator (called FunkyEvaluator):
*
*
* public static void main(String[] args) {
* runEvaluator(new FunkyEvaluator(), args);
* }
*
*
*
* ------------------------------------------------------------------------
*
*
* @author Mark Hall ([email protected])
* @version $Revision: 11942 $
*/
public class AttributeSelection implements Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = 4170171824147584330L;
/** the instances to select attributes from */
private Instances m_trainInstances;
/** the attribute/subset evaluator */
private ASEvaluation m_ASEvaluator;
/** the search method */
private ASSearch m_searchMethod;
/** the number of folds to use for cross validation */
private int m_numFolds;
/** holds a string describing the results of the attribute selection */
private final StringBuffer m_selectionResults;
/** rank features (if allowed by the search method) */
private boolean m_doRank;
/** do cross validation */
private boolean m_doXval;
/** seed used to randomly shuffle instances for cross validation */
private int m_seed;
/** number of attributes requested from ranked results */
private int m_numToSelect;
/** the selected attributes */
private int[] m_selectedAttributeSet;
/** the attribute indexes and associated merits if a ranking is produced */
private double[][] m_attributeRanking;
/** if a feature selection run involves an attribute transformer */
private AttributeTransformer m_transformer = null;
/**
* the attribute filter for processing instances with respect to the most
* recent feature selection run
*/
private Remove m_attributeFilter = null;
/**
* hold statistics for repeated feature selection, such as under cross
* validation
*/
private double[][] m_rankResults = null;
private double[] m_subsetResults = null;
/**
* Return the number of attributes selected from the most recent run of
* attribute selection
*
* @return the number of attributes selected
*/
public int numberAttributesSelected() throws Exception {
int[] att = selectedAttributes();
return att.length - 1;
}
/**
* get the final selected set of attributes.
*
* @return an array of attribute indexes
* @exception Exception if attribute selection has not been performed yet
*/
public int[] selectedAttributes() throws Exception {
if (m_selectedAttributeSet == null) {
throw new Exception("Attribute selection has not been performed yet!");
}
return m_selectedAttributeSet;
}
/**
* get the final ranking of the attributes.
*
* @return a two dimensional array of ranked attribute indexes and their
* associated merit scores as doubles.
* @exception Exception if a ranking has not been produced
*/
public double[][] rankedAttributes() throws Exception {
if (m_attributeRanking == null) {
throw new Exception("Ranking has not been performed");
}
return m_attributeRanking;
}
/**
* set the attribute/subset evaluator
*
* @param evaluator the evaluator to use
*/
public void setEvaluator(ASEvaluation evaluator) {
m_ASEvaluator = evaluator;
}
/**
* set the search method
*
* @param search the search method to use
*/
public void setSearch(ASSearch search) {
m_searchMethod = search;
if (m_searchMethod instanceof RankedOutputSearch) {
setRanking(((RankedOutputSearch) m_searchMethod).getGenerateRanking());
}
}
/**
* set the number of folds for cross validation
*
* @param folds the number of folds
*/
public void setFolds(int folds) {
m_numFolds = folds;
}
/**
* produce a ranking (if possible with the set search and evaluator)
*
* @param r true if a ranking is to be produced
*/
public void setRanking(boolean r) {
m_doRank = r;
}
/**
* do a cross validation
*
* @param x true if a cross validation is to be performed
*/
public void setXval(boolean x) {
m_doXval = x;
}
/**
* set the seed for use in cross validation
*
* @param s the seed
*/
public void setSeed(int s) {
m_seed = s;
}
/**
* get a description of the attribute selection
*
* @return a String describing the results of attribute selection
*/
public String toResultsString() {
return m_selectionResults.toString();
}
/**
* reduce the dimensionality of a set of instances to include only those
* attributes chosen by the last run of attribute selection.
*
* @param in the instances to be reduced
* @return a dimensionality reduced set of instances
* @exception Exception if the instances can't be reduced
*/
public Instances reduceDimensionality(Instances in) throws Exception {
if (m_attributeFilter == null) {
throw new Exception("No feature selection has been performed yet!");
}
if (m_transformer != null) {
Instances transformed =
new Instances(m_transformer.transformedHeader(), in.numInstances());
for (int i = 0; i < in.numInstances(); i++) {
transformed.add(m_transformer.convertInstance(in.instance(i)));
}
return Filter.useFilter(transformed, m_attributeFilter);
}
return Filter.useFilter(in, m_attributeFilter);
}
/**
* reduce the dimensionality of a single instance to include only those
* attributes chosen by the last run of attribute selection.
*
* @param in the instance to be reduced
* @return a dimensionality reduced instance
* @exception Exception if the instance can't be reduced
*/
public Instance reduceDimensionality(Instance in) throws Exception {
if (m_attributeFilter == null) {
throw new Exception("No feature selection has been performed yet!");
}
if (m_transformer != null) {
in = m_transformer.convertInstance(in);
}
m_attributeFilter.input(in);
m_attributeFilter.batchFinished();
Instance result = m_attributeFilter.output();
return result;
}
/**
* constructor. Sets defaults for each member varaible. Default attribute
* evaluator is CfsSubsetEval; default search method is BestFirst.
*/
public AttributeSelection() {
setFolds(10);
setRanking(false);
setXval(false);
setSeed(1);
setEvaluator(new CfsSubsetEval());
setSearch(new GreedyStepwise());
m_selectionResults = new StringBuffer();
m_selectedAttributeSet = null;
m_attributeRanking = null;
}
/**
* Perform attribute selection with a particular evaluator and a set of
* options specifying search method and input file etc.
*
* @param ASEvaluator an evaluator object
* @param options an array of options, not only for the evaluator but also the
* search method (if any) and an input data file
* @return the results of attribute selection as a String
* @exception Exception if no training file is set
*/
public static String SelectAttributes(ASEvaluation ASEvaluator,
String[] options) throws Exception {
String trainFileName, searchName;
Instances train = null;
ASSearch searchMethod = null;
String[] optionsTmp = options.clone();
boolean helpRequested = false;
try {
// get basic options (options the same for all attribute selectors
trainFileName = Utils.getOption('i', options);
helpRequested = Utils.getFlag('h', optionsTmp);
if (helpRequested || (trainFileName.length() == 0)) {
searchName = Utils.getOption('s', optionsTmp);
if (searchName.length() != 0) {
String[] searchOptions = Utils.splitOptions(searchName);
searchMethod =
(ASSearch) Class.forName(searchOptions[0]).newInstance();
}
if (helpRequested) {
throw new Exception("Help requested.");
} else {
throw new Exception("No training file given.");
}
}
} catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
DataSource source = new DataSource(trainFileName);
train = source.getDataSet();
return SelectAttributes(ASEvaluator, options, train);
}
/**
* returns a string summarizing the results of repeated attribute selection
* runs on splits of a dataset.
*
* @return a summary of attribute selection results
* @exception Exception if no attribute selection has been performed.
*/
public String CVResultsString() throws Exception {
StringBuffer CvString = new StringBuffer();
if ((m_subsetResults == null && m_rankResults == null)
|| (m_trainInstances == null)) {
throw new Exception("Attribute selection has not been performed yet!");
}
int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0);
CvString.append("\n\n=== Attribute selection " + m_numFolds
+ " fold cross-validation ");
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)
&& (m_trainInstances.classAttribute().isNominal())) {
CvString.append("(stratified), seed: ");
CvString.append(m_seed + " ===\n\n");
} else {
CvString.append("seed: " + m_seed + " ===\n\n");
}
if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) {
CvString.append("average merit average rank attribute\n");
// calcualte means and std devs
for (int i = 0; i < m_rankResults[0].length; i++) {
m_rankResults[0][i] /= m_numFolds; // mean merit
double var = m_rankResults[0][i] * m_rankResults[0][i] * m_numFolds;
var = (m_rankResults[2][i] - var);
var /= m_numFolds;
if (var <= 0.0) {
var = 0.0;
m_rankResults[2][i] = 0;
} else {
m_rankResults[2][i] = Math.sqrt(var);
}
m_rankResults[1][i] /= m_numFolds; // mean rank
var = m_rankResults[1][i] * m_rankResults[1][i] * m_numFolds;
var = (m_rankResults[3][i] - var);
var /= m_numFolds;
if (var <= 0.0) {
var = 0.0;
m_rankResults[3][i] = 0;
} else {
m_rankResults[3][i] = Math.sqrt(var);
}
}
// now sort them by mean rank
int[] s = Utils.sort(m_rankResults[1]);
for (int element : s) {
if (m_rankResults[1][element] > 0) {
CvString.append(Utils.doubleToString(
/*
* Math. abs(
*/m_rankResults[0][element]/* ) */, 6, 3)
+ " +-"
+ Utils.doubleToString(m_rankResults[2][element], 6, 3)
+ " "
+ Utils
.doubleToString(m_rankResults[1][element], fieldWidth + 2, 1)
+ " +-" + Utils.doubleToString(m_rankResults[3][element], 5, 2)
+ " " + Utils.doubleToString((element + 1), fieldWidth, 0) + " "
+ m_trainInstances.attribute(element).name() + "\n");
}
}
} else {
CvString.append("number of folds (%) attribute\n");
for (int i = 0; i < m_subsetResults.length; i++) {
if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
|| (i != m_trainInstances.classIndex())) {
CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0)
+ "("
+ Utils.doubleToString((m_subsetResults[i] / m_numFolds * 100.0),
3, 0) + " %) " + Utils.doubleToString((i + 1), fieldWidth, 0)
+ " " + m_trainInstances.attribute(i).name() + "\n");
}
}
}
return CvString.toString();
}
/**
* Select attributes for a split of the data. Calling this function updates
* the statistics on attribute selection. CVResultsString() returns a string
* summarizing the results of repeated calls to this function. Assumes that
* splits are from the same dataset--- ie. have the same number and types of
* attributes as previous splits.
*
* @param split the instances to select attributes from
* @exception Exception if an error occurs
*/
public void selectAttributesCVSplit(Instances split) throws Exception {
m_ASEvaluator.buildEvaluator(split);
// Do the search
int[] attributeSet = m_searchMethod.search(m_ASEvaluator, split);
// Do any postprocessing that a attribute selection method might
// require
attributeSet = m_ASEvaluator.postProcess(attributeSet);
updateStatsForModelCVSplit(split, m_ASEvaluator, m_searchMethod,
attributeSet, m_doRank);
}
/**
* Update the attribute selection stats for a cross-validation fold of the
* data.
*
* @param split the instances in this split/fold of the data
* @param evaluator the evaluator that was used
* @param search the search that was used
* @param attributeSet the final subset produced for the split
* @param doRank whether to produce a ranking
* @throws Exception if a problem occurs
*/
public void updateStatsForModelCVSplit(Instances split,
ASEvaluation evaluator, ASSearch search, int[] attributeSet, boolean doRank)
throws Exception {
double[][] attributeRanking = null;
// if the train instances are null then set equal to this split.
// If this is the case then this function is more than likely being
// called from outside this class in order to obtain CV statistics
// and all we need m_trainIstances for is to get at attribute names
// and types etc.
if (m_trainInstances == null) {
m_trainInstances = split;
}
// create space to hold statistics
if (m_rankResults == null && m_subsetResults == null) {
m_subsetResults = new double[split.numAttributes()];
m_rankResults = new double[4][split.numAttributes()];
}
if ((search instanceof RankedOutputSearch) && doRank) {
attributeRanking = ((RankedOutputSearch) search).rankedAttributes();
// System.out.println(attributeRanking[0][1]);
for (int j = 0; j < attributeRanking.length; j++) {
// merit
m_rankResults[0][(int) attributeRanking[j][0]] +=
attributeRanking[j][1];
// squared merit
m_rankResults[2][(int) attributeRanking[j][0]] +=
(attributeRanking[j][1] * attributeRanking[j][1]);
// rank
m_rankResults[1][(int) attributeRanking[j][0]] += (j + 1);
// squared rank
m_rankResults[3][(int) attributeRanking[j][0]] += (j + 1) * (j + 1);
// += (attributeRanking[j][0] * attributeRanking[j][0]);
}
} else {
for (int j = 0; j < attributeSet.length; j++) {
m_subsetResults[attributeSet[j]]++;
}
}
}
/**
* Perform a cross validation for attribute selection. With subset evaluators
* the number of times each attribute is selected over the cross validation is
* reported. For attribute evaluators, the average merit and average ranking +
* std deviation is reported for each attribute.
*
* @return the results of cross validation as a String
* @exception Exception if an error occurs during cross validation
*/
public String CrossValidateAttributes() throws Exception {
Instances cvData = new Instances(m_trainInstances);
Instances train;
Random random = new Random(m_seed);
cvData.randomize(random);
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
if (cvData.classAttribute().isNominal()) {
cvData.stratify(m_numFolds);
}
}
for (int i = 0; i < m_numFolds; i++) {
// Perform attribute selection
train = cvData.trainCV(m_numFolds, i, random);
selectAttributesCVSplit(train);
}
return CVResultsString();
}
/**
* Perform attribute selection on the supplied training instances.
*
* @param data the instances to select attributes from
* @exception Exception if there is a problem during selection
*/
public void SelectAttributes(Instances data) throws Exception {
int[] attributeSet;
m_transformer = null;
m_attributeFilter = null;
m_trainInstances = data;
if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) {
throw new Exception("Can't cross validate an attribute transformer.");
}
if (m_ASEvaluator instanceof SubsetEvaluator
&& m_searchMethod instanceof Ranker) {
throw new Exception(m_ASEvaluator.getClass().getName()
+ " must use a search method other than Ranker");
}
if (m_ASEvaluator instanceof AttributeEvaluator
&& !(m_searchMethod instanceof Ranker)) {
// System.err.println("AttributeEvaluators must use a Ranker search "
// +"method. Switching to Ranker...");
// m_searchMethod = new Ranker();
throw new Exception("AttributeEvaluators must use the Ranker search "
+ "method");
}
if (m_searchMethod instanceof RankedOutputSearch) {
m_doRank = ((RankedOutputSearch) m_searchMethod).getGenerateRanking();
}
if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator
|| m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) {
// unset the class index
// m_trainInstances.setClassIndex(-1);
} else {
// check that a class index has been set
if (m_trainInstances.classIndex() < 0) {
m_trainInstances.setClassIndex(m_trainInstances.numAttributes() - 1);
}
}
// Initialize the attribute evaluator
m_ASEvaluator.buildEvaluator(m_trainInstances);
if (m_ASEvaluator instanceof AttributeTransformer) {
m_trainInstances =
((AttributeTransformer) m_ASEvaluator).transformedHeader();
m_transformer = (AttributeTransformer) m_ASEvaluator;
}
int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0);
// Do the search
attributeSet = m_searchMethod.search(m_ASEvaluator, m_trainInstances);
// try and determine if the search method uses an attribute transformer---
// this is a bit of a hack to make things work properly with RankSearch
// using PrincipalComponents as its attribute ranker
try {
BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass());
PropertyDescriptor properties[];
// methods = bi.getMethodDescriptors();
properties = bi.getPropertyDescriptors();
for (PropertyDescriptor propertie : properties) {
propertie.getDisplayName();
Method meth = propertie.getReadMethod();
Object retType = meth.getReturnType();
if (retType.equals(ASEvaluation.class)) {
Class> args[] = {};
ASEvaluation tempEval =
(ASEvaluation) (meth.invoke(m_searchMethod, (Object[]) args));
if (tempEval instanceof AttributeTransformer) {
// grab the transformed data header
m_trainInstances =
((AttributeTransformer) tempEval).transformedHeader();
m_transformer = (AttributeTransformer) tempEval;
}
}
}
} catch (IntrospectionException ex) {
System.err.println("AttributeSelection: Couldn't " + "introspect");
}
// Do any postprocessing that a attribute selection method might require
attributeSet = m_ASEvaluator.postProcess(attributeSet);
if (!m_doRank) {
m_selectionResults.append(printSelectionResults());
}
if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) {
try {
m_attributeRanking =
((RankedOutputSearch) m_searchMethod).rankedAttributes();
} catch (Exception ex) {
ex.printStackTrace();
throw ex;
}
m_selectionResults.append(printSelectionResults());
m_selectionResults.append("Ranked attributes:\n");
// retrieve the number of attributes to retain
m_numToSelect =
((RankedOutputSearch) m_searchMethod).getCalculatedNumToSelect();
// determine fieldwidth for merit
int f_p = 0;
int w_p = 0;
for (int i = 0; i < m_numToSelect; i++) {
double precision =
(Math.abs(m_attributeRanking[i][1]) - (int) (Math
.abs(m_attributeRanking[i][1])));
double intPart = (int) (Math.abs(m_attributeRanking[i][1]));
if (precision > 0) {
precision =
Math.abs((Math.log(Math.abs(precision)) / Math.log(10))) + 3;
}
if (precision > f_p) {
f_p = (int) precision;
}
if (intPart == 0) {
if (w_p < 2) {
w_p = 2;
}
} else if ((Math
.abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1) > w_p) {
if (m_attributeRanking[i][1] > 0) {
w_p =
(int) Math
.abs(
(Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1;
}
}
}
for (int i = 0; i < m_numToSelect; i++) {
m_selectionResults.append(Utils.doubleToString(
m_attributeRanking[i][1], f_p + w_p + 1, f_p)
+ Utils.doubleToString((m_attributeRanking[i][0] + 1),
fieldWidth + 1, 0)
+ " "
+ m_trainInstances.attribute((int) m_attributeRanking[i][0]).name()
+ "\n");
}
// set up the selected attributes array - usable by a filter or
// whatever
if (m_trainInstances.classIndex() >= 0) {
if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
|| m_ASEvaluator instanceof AttributeTransformer) {
// one more for the class
m_selectedAttributeSet = new int[m_numToSelect + 1];
m_selectedAttributeSet[m_numToSelect] = m_trainInstances.classIndex();
} else {
m_selectedAttributeSet = new int[m_numToSelect];
}
} else {
m_selectedAttributeSet = new int[m_numToSelect];
}
m_selectionResults.append("\nSelected attributes: ");
for (int i = 0; i < m_numToSelect; i++) {
m_selectedAttributeSet[i] = (int) m_attributeRanking[i][0];
if (i == m_numToSelect - 1) {
m_selectionResults.append(((int) m_attributeRanking[i][0] + 1)
+ " : " + (i + 1) + "\n");
} else {
m_selectionResults.append(((int) m_attributeRanking[i][0] + 1));
m_selectionResults.append(",");
}
}
} else {
// set up the selected attributes array - usable by a filter or
// whatever
if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
|| m_trainInstances.classIndex() >= 0)
// one more for the class
{
m_selectedAttributeSet = new int[attributeSet.length + 1];
m_selectedAttributeSet[attributeSet.length] =
m_trainInstances.classIndex();
} else {
m_selectedAttributeSet = new int[attributeSet.length];
}
for (int i = 0; i < attributeSet.length; i++) {
m_selectedAttributeSet[i] = attributeSet[i];
}
m_selectionResults.append("Selected attributes: ");
for (int i = 0; i < attributeSet.length; i++) {
if (i == (attributeSet.length - 1)) {
m_selectionResults.append((attributeSet[i] + 1) + " : "
+ attributeSet.length + "\n");
} else {
m_selectionResults.append((attributeSet[i] + 1) + ",");
}
}
for (int element : attributeSet) {
m_selectionResults.append(" "
+ m_trainInstances.attribute(element).name() + "\n");
}
}
// Cross validation should be called from here
if (m_doXval == true) {
m_selectionResults.append(CrossValidateAttributes());
}
// set up the attribute filter with the selected attributes
if (m_selectedAttributeSet != null && !m_doXval) {
m_attributeFilter = new Remove();
m_attributeFilter.setAttributeIndicesArray(m_selectedAttributeSet);
m_attributeFilter.setInvertSelection(true);
m_attributeFilter.setInputFormat(m_trainInstances);
}
// Save space
m_trainInstances = new Instances(m_trainInstances, 0);
m_ASEvaluator.clean();
}
/**
* Perform attribute selection with a particular evaluator and a set of
* options specifying search method and options for the search method and
* evaluator.
*
* @param ASEvaluator an evaluator object
* @param options an array of options, not only for the evaluator but also the
* search method (if any) and an input data file
* @param train the input instances
* @return the results of attribute selection as a String
* @exception Exception if incorrect options are supplied
*/
public static String SelectAttributes(ASEvaluation ASEvaluator,
String[] options, Instances train) throws Exception {
int seed = 1, folds = 10;
String foldsString, seedString, searchName;
String classString;
String searchClassName;
String[] searchOptions = null; // new String [1];
ASSearch searchMethod = null;
boolean doCrossVal = false;
int classIndex = -1;
boolean helpRequested = false;
AttributeSelection trainSelector = new AttributeSelection();
try {
if (Utils.getFlag('h', options)) {
helpRequested = true;
}
// does data already have a class attribute set?
if (train.classIndex() != -1) {
classIndex = train.classIndex() + 1;
}
// get basic options (options the same for all attribute selectors
classString = Utils.getOption('c', options);
if (classString.length() != 0) {
if (classString.equals("first")) {
classIndex = 1;
} else if (classString.equals("last")) {
classIndex = train.numAttributes();
} else {
classIndex = Integer.parseInt(classString);
}
}
if ((classIndex != -1)
&& ((classIndex == 0) || (classIndex > train.numAttributes()))) {
throw new Exception("Class index out of range.");
}
if (classIndex != -1) {
train.setClassIndex(classIndex - 1);
} else {
// classIndex = train.numAttributes();
// train.setClassIndex(classIndex - 1);
}
foldsString = Utils.getOption('x', options);
if (foldsString.length() != 0) {
folds = Integer.parseInt(foldsString);
doCrossVal = true;
}
trainSelector.setFolds(folds);
trainSelector.setXval(doCrossVal);
seedString = Utils.getOption('n', options);
if (seedString.length() != 0) {
seed = Integer.parseInt(seedString);
}
trainSelector.setSeed(seed);
searchName = Utils.getOption('s', options);
if ((searchName.length() == 0)
&& (!(ASEvaluator instanceof AttributeEvaluator))) {
throw new Exception("No search method given.");
}
if (searchName.length() != 0) {
searchName = searchName.trim();
// split off any search options
int breakLoc = searchName.indexOf(' ');
searchClassName = searchName;
String searchOptionsString = "";
if (breakLoc != -1) {
searchClassName = searchName.substring(0, breakLoc);
searchOptionsString = searchName.substring(breakLoc).trim();
searchOptions = Utils.splitOptions(searchOptionsString);
}
} else {
try {
searchClassName = new String("weka.attributeSelection.Ranker");
searchMethod =
(ASSearch) Class.forName(searchClassName).newInstance();
} catch (Exception e) {
throw new Exception("Can't create Ranker object");
}
}
// if evaluator is a subset evaluator
// create search method and set its options (if any)
if (searchMethod == null) {
searchMethod = ASSearch.forName(searchClassName, searchOptions);
}
// set the search method
trainSelector.setSearch(searchMethod);
} catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
try {
// Set options for ASEvaluator
if (ASEvaluator instanceof OptionHandler) {
((OptionHandler) ASEvaluator).setOptions(options);
}
/*
* // Set options for Search method if (searchMethod instanceof
* OptionHandler) { if (searchOptions != null) {
* ((OptionHandler)searchMethod).setOptions(searchOptions); } }
* Utils.checkForRemainingOptions(searchOptions);
*/
} catch (Exception e) {
throw new Exception("\n" + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
try {
Utils.checkForRemainingOptions(options);
} catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
if (helpRequested) {
System.out.println(makeOptionString(ASEvaluator, searchMethod));
System.exit(0);
}
// set the attribute evaluator
trainSelector.setEvaluator(ASEvaluator);
// do the attribute selection
trainSelector.SelectAttributes(train);
// return the results string
return trainSelector.toResultsString();
}
/**
* Assembles a text description of the attribute selection results.
*
* @return a string describing the results of attribute selection.
*/
private String printSelectionResults() {
StringBuffer text = new StringBuffer();
text.append("\n\n=== Attribute Selection on all input data ===\n\n"
+ "Search Method:\n");
text.append(m_searchMethod.toString());
text.append("\nAttribute ");
if (m_ASEvaluator instanceof SubsetEvaluator) {
text.append("Subset Evaluator (");
} else {
text.append("Evaluator (");
}
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
text.append("supervised, ");
text.append("Class (");
if (m_trainInstances.attribute(m_trainInstances.classIndex()).isNumeric()) {
text.append("numeric): ");
} else {
text.append("nominal): ");
}
text.append((m_trainInstances.classIndex() + 1) + " "
+ m_trainInstances.attribute(m_trainInstances.classIndex()).name()
+ "):\n");
} else {
text.append("unsupervised):\n");
}
text.append(m_ASEvaluator.toString() + "\n");
return text.toString();
}
/**
* Make up the help string giving all the command line options
*
* @param ASEvaluator the attribute evaluator to include options for
* @param searchMethod the search method to include options for
* @return a string detailing the valid command line options
* @throws Exception if something goes wrong
*/
private static String makeOptionString(ASEvaluation ASEvaluator,
ASSearch searchMethod) throws Exception {
StringBuffer optionsText = new StringBuffer("");
// General options
optionsText.append("\n\nGeneral options:\n\n");
optionsText.append("-h\n\tdisplay this help\n");
optionsText.append("-i \n");
optionsText.append("\tSets training file.\n");
optionsText.append("-c \n");
optionsText.append("\tSets the class index for supervised attribute\n");
optionsText.append("\tselection. Default=last column.\n");
optionsText.append("-s \n");
optionsText.append("\tSets search method for subset evaluators.\n");
optionsText.append("-x \n");
optionsText.append("\tPerform a cross validation.\n");
optionsText.append("-n \n");
optionsText.append("\tUse in conjunction with -x.\n");
// Get attribute evaluator-specific options
if (ASEvaluator instanceof OptionHandler) {
optionsText.append("\nOptions specific to "
+ ASEvaluator.getClass().getName() + ":\n\n");
Enumeration
© 2015 - 2024 Weber Informatics LLC | Privacy Policy