weka.attributeSelection.AttributeSelection Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* AttributeSelection.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.MethodDescriptor;
import java.beans.PropertyDescriptor;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;
/**
* Attribute selection class. Takes the name of a search class and
* an evaluation class on the command line.
*
* Valid options are:
*
* -h
* Display help.
*
* -i <name of input file>
* Specify the training data file.
*
* -c <class index>
* The index of the attribute to use as the class.
*
* -s <search method>
* The full class name of the search method followed by search method options
* (if any).
* Eg. -s "weka.attributeSelection.BestFirst -N 10"
*
* -x <number of folds>
* Perform a cross validation.
*
* -n <random number seed>
* Specify a random number seed. Use in conjuction with -x. (Default = 1).
*
* ------------------------------------------------------------------------
*
* Example usage as the main of an attribute evaluator (called FunkyEvaluator):
*
* public static void main(String [] args) {
* runEvaluator(new FunkyEvaluator(), args);
* }
*
*
*
* ------------------------------------------------------------------------
*
* @author Mark Hall ([email protected])
* @version $Revision: 7953 $
*/
public class AttributeSelection
implements Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = 4170171824147584330L;
/** the instances to select attributes from */
private Instances m_trainInstances;
/** the attribute/subset evaluator */
private ASEvaluation m_ASEvaluator;
/** the search method */
private ASSearch m_searchMethod;
/** the number of folds to use for cross validation */
private int m_numFolds;
/** holds a string describing the results of the attribute selection */
private StringBuffer m_selectionResults;
/** rank features (if allowed by the search method) */
private boolean m_doRank;
/** do cross validation */
private boolean m_doXval;
/** seed used to randomly shuffle instances for cross validation */
private int m_seed;
/** number of attributes requested from ranked results */
private int m_numToSelect;
/** the selected attributes */
private int [] m_selectedAttributeSet;
/** the attribute indexes and associated merits if a ranking is produced */
private double [][] m_attributeRanking;
/** if a feature selection run involves an attribute transformer */
private AttributeTransformer m_transformer = null;
/** the attribute filter for processing instances with respect to
the most recent feature selection run */
private Remove m_attributeFilter = null;
/** hold statistics for repeated feature selection, such as
under cross validation */
private double [][] m_rankResults = null;
private double [] m_subsetResults = null;
private int m_trials = 0;
/**
* Return the number of attributes selected from the most recent
* run of attribute selection
* @return the number of attributes selected
*/
public int numberAttributesSelected() throws Exception {
int [] att = selectedAttributes();
return att.length-1;
}
/**
* get the final selected set of attributes.
* @return an array of attribute indexes
* @exception Exception if attribute selection has not been performed yet
*/
public int [] selectedAttributes () throws Exception {
if (m_selectedAttributeSet == null) {
throw new Exception("Attribute selection has not been performed yet!");
}
return m_selectedAttributeSet;
}
/**
* get the final ranking of the attributes.
* @return a two dimensional array of ranked attribute indexes and their
* associated merit scores as doubles.
* @exception Exception if a ranking has not been produced
*/
public double [][] rankedAttributes () throws Exception {
if (m_attributeRanking == null) {
throw new Exception("Ranking has not been performed");
}
return m_attributeRanking;
}
/**
* set the attribute/subset evaluator
* @param evaluator the evaluator to use
*/
public void setEvaluator (ASEvaluation evaluator) {
m_ASEvaluator = evaluator;
}
/**
* set the search method
* @param search the search method to use
*/
public void setSearch (ASSearch search) {
m_searchMethod = search;
if (m_searchMethod instanceof RankedOutputSearch) {
setRanking(((RankedOutputSearch)m_searchMethod).getGenerateRanking());
}
}
/**
* set the number of folds for cross validation
* @param folds the number of folds
*/
public void setFolds (int folds) {
m_numFolds = folds;
}
/**
* produce a ranking (if possible with the set search and evaluator)
* @param r true if a ranking is to be produced
*/
public void setRanking (boolean r) {
m_doRank = r;
}
/**
* do a cross validation
* @param x true if a cross validation is to be performed
*/
public void setXval (boolean x) {
m_doXval = x;
}
/**
* set the seed for use in cross validation
* @param s the seed
*/
public void setSeed (int s) {
m_seed = s;
}
/**
* get a description of the attribute selection
* @return a String describing the results of attribute selection
*/
public String toResultsString() {
return m_selectionResults.toString();
}
/**
* reduce the dimensionality of a set of instances to include only those
* attributes chosen by the last run of attribute selection.
* @param in the instances to be reduced
* @return a dimensionality reduced set of instances
* @exception Exception if the instances can't be reduced
*/
public Instances reduceDimensionality(Instances in) throws Exception {
if (m_attributeFilter == null) {
throw new Exception("No feature selection has been performed yet!");
}
if (m_transformer != null) {
Instances transformed = new Instances(m_transformer.transformedHeader(),
in.numInstances());
for (int i=0;i 0) {
CvString.append(Utils.doubleToString(/*Math.
abs(*/m_rankResults[0][s[i]]/*)*/,
6, 3)
+ " +-"
+ Utils.doubleToString(m_rankResults[2][s[i]], 6, 3)
+ " "
+ Utils.doubleToString(m_rankResults[1][s[i]],
fieldWidth+2, 1)
+ " +-"
+ Utils.doubleToString(m_rankResults[3][s[i]], 5, 2)
+" "
+ Utils.doubleToString(((double)(s[i] + 1)),
fieldWidth, 0)
+ " "
+ m_trainInstances.attribute(s[i]).name()
+ "\n");
}
}
}
else {
CvString.append("number of folds (%) attribute\n");
for (int i = 0; i < m_subsetResults.length; i++) {
if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) ||
(i != m_trainInstances.classIndex())) {
CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0)
+ "("
+ Utils.doubleToString((m_subsetResults[i] /
m_numFolds * 100.0)
, 3, 0)
+ " %) "
+ Utils.doubleToString(((double)(i + 1)),
fieldWidth, 0)
+ " "
+ m_trainInstances.attribute(i).name()
+ "\n");
}
}
}
return CvString.toString();
}
/**
* Select attributes for a split of the data. Calling this function
* updates the statistics on attribute selection. CVResultsString()
* returns a string summarizing the results of repeated calls to
* this function. Assumes that splits are from the same dataset---
* ie. have the same number and types of attributes as previous
* splits.
*
* @param split the instances to select attributes from
* @exception Exception if an error occurs
*/
public void selectAttributesCVSplit(Instances split) throws Exception {
double[][] attributeRanking = null;
// if the train instances are null then set equal to this split.
// If this is the case then this function is more than likely being
// called from outside this class in order to obtain CV statistics
// and all we need m_trainIstances for is to get at attribute names
// and types etc.
if (m_trainInstances == null) {
m_trainInstances = split;
}
// create space to hold statistics
if (m_rankResults == null && m_subsetResults == null) {
m_subsetResults = new double[split.numAttributes()];
m_rankResults = new double[4][split.numAttributes()];
}
m_ASEvaluator.buildEvaluator(split);
// Do the search
int[] attributeSet = m_searchMethod.search(m_ASEvaluator,
split);
// Do any postprocessing that a attribute selection method might
// require
attributeSet = m_ASEvaluator.postProcess(attributeSet);
if ((m_searchMethod instanceof RankedOutputSearch) &&
(m_doRank == true)) {
attributeRanking = ((RankedOutputSearch)m_searchMethod).
rankedAttributes();
// System.out.println(attributeRanking[0][1]);
for (int j = 0; j < attributeRanking.length; j++) {
// merit
m_rankResults[0][(int)attributeRanking[j][0]] +=
attributeRanking[j][1];
// squared merit
m_rankResults[2][(int)attributeRanking[j][0]] +=
(attributeRanking[j][1]*attributeRanking[j][1]);
// rank
m_rankResults[1][(int)attributeRanking[j][0]] += (j + 1);
// squared rank
m_rankResults[3][(int)attributeRanking[j][0]] += (j + 1)*(j + 1);
// += (attributeRanking[j][0] * attributeRanking[j][0]);
}
} else {
for (int j = 0; j < attributeSet.length; j++) {
m_subsetResults[attributeSet[j]]++;
}
}
m_trials++;
}
/**
* Perform a cross validation for attribute selection. With subset
* evaluators the number of times each attribute is selected over
* the cross validation is reported. For attribute evaluators, the
* average merit and average ranking + std deviation is reported for
* each attribute.
*
* @return the results of cross validation as a String
* @exception Exception if an error occurs during cross validation
*/
public String CrossValidateAttributes () throws Exception {
Instances cvData = new Instances(m_trainInstances);
Instances train;
Random random = new Random(m_seed);
cvData.randomize(random);
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) &&
!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
if (cvData.classAttribute().isNominal()) {
cvData.stratify(m_numFolds);
}
}
for (int i = 0; i < m_numFolds; i++) {
// Perform attribute selection
train = cvData.trainCV(m_numFolds, i, random);
selectAttributesCVSplit(train);
}
return CVResultsString();
}
/**
* Perform attribute selection on the supplied training instances.
*
* @param data the instances to select attributes from
* @exception Exception if there is a problem during selection
*/
public void SelectAttributes (Instances data) throws Exception {
int [] attributeSet;
m_transformer = null;
m_attributeFilter = null;
m_trainInstances = data;
if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) {
throw new Exception("Can't cross validate an attribute transformer.");
}
if (m_ASEvaluator instanceof SubsetEvaluator &&
m_searchMethod instanceof Ranker) {
throw new Exception(m_ASEvaluator.getClass().getName()
+" must use a search method other than Ranker");
}
if (m_ASEvaluator instanceof AttributeEvaluator &&
!(m_searchMethod instanceof Ranker)) {
// System.err.println("AttributeEvaluators must use a Ranker search "
// +"method. Switching to Ranker...");
// m_searchMethod = new Ranker();
throw new Exception("AttributeEvaluators must use the Ranker search "
+ "method");
}
if (m_searchMethod instanceof RankedOutputSearch) {
m_doRank = ((RankedOutputSearch)m_searchMethod).getGenerateRanking();
}
if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator ||
m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) {
// unset the class index
// m_trainInstances.setClassIndex(-1);
} else {
// check that a class index has been set
if (m_trainInstances.classIndex() < 0) {
m_trainInstances.setClassIndex(m_trainInstances.numAttributes()-1);
}
}
// Initialize the attribute evaluator
m_ASEvaluator.buildEvaluator(m_trainInstances);
if (m_ASEvaluator instanceof AttributeTransformer) {
m_trainInstances =
((AttributeTransformer)m_ASEvaluator).transformedHeader();
m_transformer = (AttributeTransformer)m_ASEvaluator;
}
int fieldWidth = (int)(Math.log(m_trainInstances.numAttributes()) +1.0);
// Do the search
attributeSet = m_searchMethod.search(m_ASEvaluator,
m_trainInstances);
// try and determine if the search method uses an attribute transformer---
// this is a bit of a hack to make things work properly with RankSearch
// using PrincipalComponents as its attribute ranker
try {
BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass());
PropertyDescriptor properties[];
MethodDescriptor methods[];
// methods = bi.getMethodDescriptors();
properties = bi.getPropertyDescriptors();
for (int i=0;i 0) {
precision = Math.abs((Math.log(Math.abs(precision)) /
Math.log(10)))+3;
}
if (precision > f_p) {
f_p = (int)precision;
}
if (intPart == 0) {
if (w_p < 2) {
w_p = 2;
}
} else if ((Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
/ Math.log(10)))+1) > w_p) {
if (m_attributeRanking[i][1] > 0) {
w_p = (int)Math.abs((Math.log(Math.abs(m_attributeRanking[i][1]))
/ Math.log(10)))+1;
}
}
}
for (int i = 0; i < m_numToSelect; i++) {
m_selectionResults.
append(Utils.doubleToString(m_attributeRanking[i][1],
f_p+w_p+1,f_p)
+ Utils.doubleToString((m_attributeRanking[i][0] + 1),
fieldWidth+1,0)
+ " "
+ m_trainInstances.
attribute((int)m_attributeRanking[i][0]).name()
+ "\n");
}
// set up the selected attributes array - usable by a filter or
// whatever
if (m_trainInstances.classIndex() >= 0) {
if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) ||
m_ASEvaluator instanceof AttributeTransformer) {
// one more for the class
m_selectedAttributeSet = new int[m_numToSelect + 1];
m_selectedAttributeSet[m_numToSelect] =
m_trainInstances.classIndex();
} else {
m_selectedAttributeSet = new int[m_numToSelect];
}
} else {
m_selectedAttributeSet = new int[m_numToSelect];
}
m_selectionResults.append("\nSelected attributes: ");
for (int i = 0; i < m_numToSelect; i++) {
m_selectedAttributeSet[i] = (int)m_attributeRanking[i][0];
if (i == m_numToSelect - 1) {
m_selectionResults.append(((int)m_attributeRanking[i][0] + 1)
+ " : "
+ (i + 1)
+ "\n");
}
else {
m_selectionResults.append(((int)m_attributeRanking[i][0] + 1));
m_selectionResults.append(",");
}
}
} else {
// set up the selected attributes array - usable by a filter or
// whatever
if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) ||
m_trainInstances.classIndex() >= 0)
// one more for the class
{
m_selectedAttributeSet = new int[attributeSet.length + 1];
m_selectedAttributeSet[attributeSet.length] =
m_trainInstances.classIndex();
}
else {
m_selectedAttributeSet = new int[attributeSet.length];
}
for (int i = 0; i < attributeSet.length; i++) {
m_selectedAttributeSet[i] = attributeSet[i];
}
m_selectionResults.append("Selected attributes: ");
for (int i = 0; i < attributeSet.length; i++) {
if (i == (attributeSet.length - 1)) {
m_selectionResults.append((attributeSet[i] + 1)
+ " : "
+ attributeSet.length
+ "\n");
}
else {
m_selectionResults.append((attributeSet[i] + 1) + ",");
}
}
for (int i=0;i train.numAttributes()))) {
throw new Exception("Class index out of range.");
}
if (classIndex != -1) {
train.setClassIndex(classIndex - 1);
}
else {
// classIndex = train.numAttributes();
// train.setClassIndex(classIndex - 1);
}
foldsString = Utils.getOption('x', options);
if (foldsString.length() != 0) {
folds = Integer.parseInt(foldsString);
doCrossVal = true;
}
trainSelector.setFolds(folds);
trainSelector.setXval(doCrossVal);
seedString = Utils.getOption('n', options);
if (seedString.length() != 0) {
seed = Integer.parseInt(seedString);
}
trainSelector.setSeed(seed);
searchName = Utils.getOption('s', options);
if ((searchName.length() == 0) &&
(!(ASEvaluator instanceof AttributeEvaluator))) {
throw new Exception("No search method given.");
}
if (searchName.length() != 0) {
searchName = searchName.trim();
// split off any search options
int breakLoc = searchName.indexOf(' ');
searchClassName = searchName;
String searchOptionsString = "";
if (breakLoc != -1) {
searchClassName = searchName.substring(0, breakLoc);
searchOptionsString = searchName.substring(breakLoc).trim();
searchOptions = Utils.splitOptions(searchOptionsString);
}
}
else {
try {
searchClassName = new String("weka.attributeSelection.Ranker");
searchMethod = (ASSearch)Class.
forName(searchClassName).newInstance();
}
catch (Exception e) {
throw new Exception("Can't create Ranker object");
}
}
// if evaluator is a subset evaluator
// create search method and set its options (if any)
if (searchMethod == null) {
searchMethod = ASSearch.forName(searchClassName, searchOptions);
}
// set the search method
trainSelector.setSearch(searchMethod);
}
catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
try {
// Set options for ASEvaluator
if (ASEvaluator instanceof OptionHandler) {
((OptionHandler)ASEvaluator).setOptions(options);
}
/* // Set options for Search method
if (searchMethod instanceof OptionHandler)
{
if (searchOptions != null)
{
((OptionHandler)searchMethod).setOptions(searchOptions);
}
}
Utils.checkForRemainingOptions(searchOptions); */
}
catch (Exception e) {
throw new Exception("\n" + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
try {
Utils.checkForRemainingOptions(options);
}
catch (Exception e) {
throw new Exception('\n' + e.getMessage()
+ makeOptionString(ASEvaluator, searchMethod));
}
if (helpRequested) {
System.out.println(makeOptionString(ASEvaluator, searchMethod));
System.exit(0);
}
// set the attribute evaluator
trainSelector.setEvaluator(ASEvaluator);
// do the attribute selection
trainSelector.SelectAttributes(train);
// return the results string
return trainSelector.toResultsString();
}
/**
* Assembles a text description of the attribute selection results.
*
* @return a string describing the results of attribute selection.
*/
private String printSelectionResults () {
StringBuffer text = new StringBuffer();
text.append("\n\n=== Attribute Selection on all input data ===\n\n"
+ "Search Method:\n");
text.append(m_searchMethod.toString());
text.append("\nAttribute ");
if (m_ASEvaluator instanceof SubsetEvaluator) {
text.append("Subset Evaluator (");
}
else {
text.append("Evaluator (");
}
if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
&& !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
text.append("supervised, ");
text.append("Class (");
if (m_trainInstances.attribute(m_trainInstances.classIndex())
.isNumeric()) {
text.append("numeric): ");
}
else {
text.append("nominal): ");
}
text.append((m_trainInstances.classIndex() + 1)
+ " "
+ m_trainInstances.attribute(m_trainInstances
.classIndex()).name()
+ "):\n");
}
else {
text.append("unsupervised):\n");
}
text.append(m_ASEvaluator.toString() + "\n");
return text.toString();
}
/**
* Make up the help string giving all the command line options
*
* @param ASEvaluator the attribute evaluator to include options for
* @param searchMethod the search method to include options for
* @return a string detailing the valid command line options
* @throws Exception if something goes wrong
*/
private static String makeOptionString (ASEvaluation ASEvaluator,
ASSearch searchMethod)
throws Exception {
StringBuffer optionsText = new StringBuffer("");
// General options
optionsText.append("\n\nGeneral options:\n\n");
optionsText.append("-h\n\tdisplay this help\n");
optionsText.append("-i \n");
optionsText.append("\tSets training file.\n");
optionsText.append("-c \n");
optionsText.append("\tSets the class index for supervised attribute\n");
optionsText.append("\tselection. Default=last column.\n");
optionsText.append("-s \n");
optionsText.append("\tSets search method for subset evaluators.\n");
optionsText.append("-x \n");
optionsText.append("\tPerform a cross validation.\n");
optionsText.append("-n \n");
optionsText.append("\tUse in conjunction with -x.\n");
// Get attribute evaluator-specific options
if (ASEvaluator instanceof OptionHandler) {
optionsText.append("\nOptions specific to "
+ ASEvaluator.getClass().getName()
+ ":\n\n");
Enumeration enu = ((OptionHandler)ASEvaluator).listOptions();
while (enu.hasMoreElements()) {
Option option = (Option)enu.nextElement();
optionsText.append(option.synopsis() + '\n');
optionsText.append(option.description() + "\n");
}
}
if (searchMethod != null) {
if (searchMethod instanceof OptionHandler) {
optionsText.append("\nOptions specific to "
+ searchMethod.getClass().getName()
+ ":\n\n");
Enumeration enu = ((OptionHandler)searchMethod).listOptions();
while (enu.hasMoreElements()) {
Option option = (Option)enu.nextElement();
optionsText.append(option.synopsis() + '\n');
optionsText.append(option.description() + "\n");
}
}
}
else {
if (ASEvaluator instanceof SubsetEvaluator) {
System.out.println("No search method given.");
}
}
return optionsText.toString();
}
/**
* Main method for testing this class.
*
* @param args the options
*/
public static void main (String[] args) {
try {
if (args.length == 0) {
throw new Exception("The first argument must be the name of an "
+ "attribute/subset evaluator");
}
String EvaluatorName = args[0];
args[0] = "";
ASEvaluation newEval = ASEvaluation.forName(EvaluatorName, null);
System.out.println(SelectAttributes(newEval, args));
}
catch (Exception e) {
System.out.println(e.getMessage());
}
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 7953 $");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy