All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.attributeSelection.AttributeSelection Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    AttributeSelection.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 * Attribute selection class. Takes the name of a search class and an evaluation
 * class on the command line.
 * 

* * Valid options are: *

* * -h
* Display help. *

* * -i <name of input file>
* Specify the training data file. *

* * -c <class index>
* The index of the attribute to use as the class. *

* * -s <search method>
* The full class name of the search method followed by search method options * (if any).
* Eg. -s "weka.attributeSelection.BestFirst -N 10" *

* * -x <number of folds>
* Perform a cross validation. *

* * -n <random number seed>
* Specify a random number seed. Use in conjuction with -x. (Default = 1). *

* * ------------------------------------------------------------------------ *

* * Example usage as the main of an attribute evaluator (called FunkyEvaluator): * *

 * public static void main(String[] args) {
 *   runEvaluator(new FunkyEvaluator(), args);
 * }
 * 
*

* * ------------------------------------------------------------------------ *

* * @author Mark Hall ([email protected]) * @version $Revision: 11942 $ */ public class AttributeSelection implements Serializable, RevisionHandler { /** for serialization */ static final long serialVersionUID = 4170171824147584330L; /** the instances to select attributes from */ private Instances m_trainInstances; /** the attribute/subset evaluator */ private ASEvaluation m_ASEvaluator; /** the search method */ private ASSearch m_searchMethod; /** the number of folds to use for cross validation */ private int m_numFolds; /** holds a string describing the results of the attribute selection */ private final StringBuffer m_selectionResults; /** rank features (if allowed by the search method) */ private boolean m_doRank; /** do cross validation */ private boolean m_doXval; /** seed used to randomly shuffle instances for cross validation */ private int m_seed; /** number of attributes requested from ranked results */ private int m_numToSelect; /** the selected attributes */ private int[] m_selectedAttributeSet; /** the attribute indexes and associated merits if a ranking is produced */ private double[][] m_attributeRanking; /** if a feature selection run involves an attribute transformer */ private AttributeTransformer m_transformer = null; /** * the attribute filter for processing instances with respect to the most * recent feature selection run */ private Remove m_attributeFilter = null; /** * hold statistics for repeated feature selection, such as under cross * validation */ private double[][] m_rankResults = null; private double[] m_subsetResults = null; /** * Return the number of attributes selected from the most recent run of * attribute selection * * @return the number of attributes selected */ public int numberAttributesSelected() throws Exception { int[] att = selectedAttributes(); return att.length - 1; } /** * get the final selected set of attributes. * * @return an array of attribute indexes * @exception Exception if attribute selection has not been performed yet */ public int[] selectedAttributes() throws Exception { if (m_selectedAttributeSet == null) { throw new Exception("Attribute selection has not been performed yet!"); } return m_selectedAttributeSet; } /** * get the final ranking of the attributes. * * @return a two dimensional array of ranked attribute indexes and their * associated merit scores as doubles. * @exception Exception if a ranking has not been produced */ public double[][] rankedAttributes() throws Exception { if (m_attributeRanking == null) { throw new Exception("Ranking has not been performed"); } return m_attributeRanking; } /** * set the attribute/subset evaluator * * @param evaluator the evaluator to use */ public void setEvaluator(ASEvaluation evaluator) { m_ASEvaluator = evaluator; } /** * set the search method * * @param search the search method to use */ public void setSearch(ASSearch search) { m_searchMethod = search; if (m_searchMethod instanceof RankedOutputSearch) { setRanking(((RankedOutputSearch) m_searchMethod).getGenerateRanking()); } } /** * set the number of folds for cross validation * * @param folds the number of folds */ public void setFolds(int folds) { m_numFolds = folds; } /** * produce a ranking (if possible with the set search and evaluator) * * @param r true if a ranking is to be produced */ public void setRanking(boolean r) { m_doRank = r; } /** * do a cross validation * * @param x true if a cross validation is to be performed */ public void setXval(boolean x) { m_doXval = x; } /** * set the seed for use in cross validation * * @param s the seed */ public void setSeed(int s) { m_seed = s; } /** * get a description of the attribute selection * * @return a String describing the results of attribute selection */ public String toResultsString() { return m_selectionResults.toString(); } /** * reduce the dimensionality of a set of instances to include only those * attributes chosen by the last run of attribute selection. * * @param in the instances to be reduced * @return a dimensionality reduced set of instances * @exception Exception if the instances can't be reduced */ public Instances reduceDimensionality(Instances in) throws Exception { if (m_attributeFilter == null) { throw new Exception("No feature selection has been performed yet!"); } if (m_transformer != null) { Instances transformed = new Instances(m_transformer.transformedHeader(), in.numInstances()); for (int i = 0; i < in.numInstances(); i++) { transformed.add(m_transformer.convertInstance(in.instance(i))); } return Filter.useFilter(transformed, m_attributeFilter); } return Filter.useFilter(in, m_attributeFilter); } /** * reduce the dimensionality of a single instance to include only those * attributes chosen by the last run of attribute selection. * * @param in the instance to be reduced * @return a dimensionality reduced instance * @exception Exception if the instance can't be reduced */ public Instance reduceDimensionality(Instance in) throws Exception { if (m_attributeFilter == null) { throw new Exception("No feature selection has been performed yet!"); } if (m_transformer != null) { in = m_transformer.convertInstance(in); } m_attributeFilter.input(in); m_attributeFilter.batchFinished(); Instance result = m_attributeFilter.output(); return result; } /** * constructor. Sets defaults for each member varaible. Default attribute * evaluator is CfsSubsetEval; default search method is BestFirst. */ public AttributeSelection() { setFolds(10); setRanking(false); setXval(false); setSeed(1); setEvaluator(new CfsSubsetEval()); setSearch(new GreedyStepwise()); m_selectionResults = new StringBuffer(); m_selectedAttributeSet = null; m_attributeRanking = null; } /** * Perform attribute selection with a particular evaluator and a set of * options specifying search method and input file etc. * * @param ASEvaluator an evaluator object * @param options an array of options, not only for the evaluator but also the * search method (if any) and an input data file * @return the results of attribute selection as a String * @exception Exception if no training file is set */ public static String SelectAttributes(ASEvaluation ASEvaluator, String[] options) throws Exception { String trainFileName, searchName; Instances train = null; ASSearch searchMethod = null; String[] optionsTmp = options.clone(); boolean helpRequested = false; try { // get basic options (options the same for all attribute selectors trainFileName = Utils.getOption('i', options); helpRequested = Utils.getFlag('h', optionsTmp); if (helpRequested || (trainFileName.length() == 0)) { searchName = Utils.getOption('s', optionsTmp); if (searchName.length() != 0) { String[] searchOptions = Utils.splitOptions(searchName); searchMethod = (ASSearch) Class.forName(searchOptions[0]).newInstance(); } if (helpRequested) { throw new Exception("Help requested."); } else { throw new Exception("No training file given."); } } } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod)); } DataSource source = new DataSource(trainFileName); train = source.getDataSet(); return SelectAttributes(ASEvaluator, options, train); } /** * returns a string summarizing the results of repeated attribute selection * runs on splits of a dataset. * * @return a summary of attribute selection results * @exception Exception if no attribute selection has been performed. */ public String CVResultsString() throws Exception { StringBuffer CvString = new StringBuffer(); if ((m_subsetResults == null && m_rankResults == null) || (m_trainInstances == null)) { throw new Exception("Attribute selection has not been performed yet!"); } int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0); CvString.append("\n\n=== Attribute selection " + m_numFolds + " fold cross-validation "); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator) && (m_trainInstances.classAttribute().isNominal())) { CvString.append("(stratified), seed: "); CvString.append(m_seed + " ===\n\n"); } else { CvString.append("seed: " + m_seed + " ===\n\n"); } if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) { CvString.append("average merit average rank attribute\n"); // calcualte means and std devs for (int i = 0; i < m_rankResults[0].length; i++) { m_rankResults[0][i] /= m_numFolds; // mean merit double var = m_rankResults[0][i] * m_rankResults[0][i] * m_numFolds; var = (m_rankResults[2][i] - var); var /= m_numFolds; if (var <= 0.0) { var = 0.0; m_rankResults[2][i] = 0; } else { m_rankResults[2][i] = Math.sqrt(var); } m_rankResults[1][i] /= m_numFolds; // mean rank var = m_rankResults[1][i] * m_rankResults[1][i] * m_numFolds; var = (m_rankResults[3][i] - var); var /= m_numFolds; if (var <= 0.0) { var = 0.0; m_rankResults[3][i] = 0; } else { m_rankResults[3][i] = Math.sqrt(var); } } // now sort them by mean rank int[] s = Utils.sort(m_rankResults[1]); for (int element : s) { if (m_rankResults[1][element] > 0) { CvString.append(Utils.doubleToString( /* * Math. abs( */m_rankResults[0][element]/* ) */, 6, 3) + " +-" + Utils.doubleToString(m_rankResults[2][element], 6, 3) + " " + Utils .doubleToString(m_rankResults[1][element], fieldWidth + 2, 1) + " +-" + Utils.doubleToString(m_rankResults[3][element], 5, 2) + " " + Utils.doubleToString((element + 1), fieldWidth, 0) + " " + m_trainInstances.attribute(element).name() + "\n"); } } } else { CvString.append("number of folds (%) attribute\n"); for (int i = 0; i < m_subsetResults.length; i++) { if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) || (i != m_trainInstances.classIndex())) { CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0) + "(" + Utils.doubleToString((m_subsetResults[i] / m_numFolds * 100.0), 3, 0) + " %) " + Utils.doubleToString((i + 1), fieldWidth, 0) + " " + m_trainInstances.attribute(i).name() + "\n"); } } } return CvString.toString(); } /** * Select attributes for a split of the data. Calling this function updates * the statistics on attribute selection. CVResultsString() returns a string * summarizing the results of repeated calls to this function. Assumes that * splits are from the same dataset--- ie. have the same number and types of * attributes as previous splits. * * @param split the instances to select attributes from * @exception Exception if an error occurs */ public void selectAttributesCVSplit(Instances split) throws Exception { m_ASEvaluator.buildEvaluator(split); // Do the search int[] attributeSet = m_searchMethod.search(m_ASEvaluator, split); // Do any postprocessing that a attribute selection method might // require attributeSet = m_ASEvaluator.postProcess(attributeSet); updateStatsForModelCVSplit(split, m_ASEvaluator, m_searchMethod, attributeSet, m_doRank); } /** * Update the attribute selection stats for a cross-validation fold of the * data. * * @param split the instances in this split/fold of the data * @param evaluator the evaluator that was used * @param search the search that was used * @param attributeSet the final subset produced for the split * @param doRank whether to produce a ranking * @throws Exception if a problem occurs */ public void updateStatsForModelCVSplit(Instances split, ASEvaluation evaluator, ASSearch search, int[] attributeSet, boolean doRank) throws Exception { double[][] attributeRanking = null; // if the train instances are null then set equal to this split. // If this is the case then this function is more than likely being // called from outside this class in order to obtain CV statistics // and all we need m_trainIstances for is to get at attribute names // and types etc. if (m_trainInstances == null) { m_trainInstances = split; } // create space to hold statistics if (m_rankResults == null && m_subsetResults == null) { m_subsetResults = new double[split.numAttributes()]; m_rankResults = new double[4][split.numAttributes()]; } if ((search instanceof RankedOutputSearch) && doRank) { attributeRanking = ((RankedOutputSearch) search).rankedAttributes(); // System.out.println(attributeRanking[0][1]); for (int j = 0; j < attributeRanking.length; j++) { // merit m_rankResults[0][(int) attributeRanking[j][0]] += attributeRanking[j][1]; // squared merit m_rankResults[2][(int) attributeRanking[j][0]] += (attributeRanking[j][1] * attributeRanking[j][1]); // rank m_rankResults[1][(int) attributeRanking[j][0]] += (j + 1); // squared rank m_rankResults[3][(int) attributeRanking[j][0]] += (j + 1) * (j + 1); // += (attributeRanking[j][0] * attributeRanking[j][0]); } } else { for (int j = 0; j < attributeSet.length; j++) { m_subsetResults[attributeSet[j]]++; } } } /** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); } /** * Perform attribute selection on the supplied training instances. * * @param data the instances to select attributes from * @exception Exception if there is a problem during selection */ public void SelectAttributes(Instances data) throws Exception { int[] attributeSet; m_transformer = null; m_attributeFilter = null; m_trainInstances = data; if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) { throw new Exception("Can't cross validate an attribute transformer."); } if (m_ASEvaluator instanceof SubsetEvaluator && m_searchMethod instanceof Ranker) { throw new Exception(m_ASEvaluator.getClass().getName() + " must use a search method other than Ranker"); } if (m_ASEvaluator instanceof AttributeEvaluator && !(m_searchMethod instanceof Ranker)) { // System.err.println("AttributeEvaluators must use a Ranker search " // +"method. Switching to Ranker..."); // m_searchMethod = new Ranker(); throw new Exception("AttributeEvaluators must use the Ranker search " + "method"); } if (m_searchMethod instanceof RankedOutputSearch) { m_doRank = ((RankedOutputSearch) m_searchMethod).getGenerateRanking(); } if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator || m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) { // unset the class index // m_trainInstances.setClassIndex(-1); } else { // check that a class index has been set if (m_trainInstances.classIndex() < 0) { m_trainInstances.setClassIndex(m_trainInstances.numAttributes() - 1); } } // Initialize the attribute evaluator m_ASEvaluator.buildEvaluator(m_trainInstances); if (m_ASEvaluator instanceof AttributeTransformer) { m_trainInstances = ((AttributeTransformer) m_ASEvaluator).transformedHeader(); m_transformer = (AttributeTransformer) m_ASEvaluator; } int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0); // Do the search attributeSet = m_searchMethod.search(m_ASEvaluator, m_trainInstances); // try and determine if the search method uses an attribute transformer--- // this is a bit of a hack to make things work properly with RankSearch // using PrincipalComponents as its attribute ranker try { BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass()); PropertyDescriptor properties[]; // methods = bi.getMethodDescriptors(); properties = bi.getPropertyDescriptors(); for (PropertyDescriptor propertie : properties) { propertie.getDisplayName(); Method meth = propertie.getReadMethod(); Object retType = meth.getReturnType(); if (retType.equals(ASEvaluation.class)) { Class args[] = {}; ASEvaluation tempEval = (ASEvaluation) (meth.invoke(m_searchMethod, (Object[]) args)); if (tempEval instanceof AttributeTransformer) { // grab the transformed data header m_trainInstances = ((AttributeTransformer) tempEval).transformedHeader(); m_transformer = (AttributeTransformer) tempEval; } } } } catch (IntrospectionException ex) { System.err.println("AttributeSelection: Couldn't " + "introspect"); } // Do any postprocessing that a attribute selection method might require attributeSet = m_ASEvaluator.postProcess(attributeSet); if (!m_doRank) { m_selectionResults.append(printSelectionResults()); } if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) { try { m_attributeRanking = ((RankedOutputSearch) m_searchMethod).rankedAttributes(); } catch (Exception ex) { ex.printStackTrace(); throw ex; } m_selectionResults.append(printSelectionResults()); m_selectionResults.append("Ranked attributes:\n"); // retrieve the number of attributes to retain m_numToSelect = ((RankedOutputSearch) m_searchMethod).getCalculatedNumToSelect(); // determine fieldwidth for merit int f_p = 0; int w_p = 0; for (int i = 0; i < m_numToSelect; i++) { double precision = (Math.abs(m_attributeRanking[i][1]) - (int) (Math .abs(m_attributeRanking[i][1]))); double intPart = (int) (Math.abs(m_attributeRanking[i][1])); if (precision > 0) { precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10))) + 3; } if (precision > f_p) { f_p = (int) precision; } if (intPart == 0) { if (w_p < 2) { w_p = 2; } } else if ((Math .abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1) > w_p) { if (m_attributeRanking[i][1] > 0) { w_p = (int) Math .abs( (Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1; } } } for (int i = 0; i < m_numToSelect; i++) { m_selectionResults.append(Utils.doubleToString( m_attributeRanking[i][1], f_p + w_p + 1, f_p) + Utils.doubleToString((m_attributeRanking[i][0] + 1), fieldWidth + 1, 0) + " " + m_trainInstances.attribute((int) m_attributeRanking[i][0]).name() + "\n"); } // set up the selected attributes array - usable by a filter or // whatever if (m_trainInstances.classIndex() >= 0) { if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) || m_ASEvaluator instanceof AttributeTransformer) { // one more for the class m_selectedAttributeSet = new int[m_numToSelect + 1]; m_selectedAttributeSet[m_numToSelect] = m_trainInstances.classIndex(); } else { m_selectedAttributeSet = new int[m_numToSelect]; } } else { m_selectedAttributeSet = new int[m_numToSelect]; } m_selectionResults.append("\nSelected attributes: "); for (int i = 0; i < m_numToSelect; i++) { m_selectedAttributeSet[i] = (int) m_attributeRanking[i][0]; if (i == m_numToSelect - 1) { m_selectionResults.append(((int) m_attributeRanking[i][0] + 1) + " : " + (i + 1) + "\n"); } else { m_selectionResults.append(((int) m_attributeRanking[i][0] + 1)); m_selectionResults.append(","); } } } else { // set up the selected attributes array - usable by a filter or // whatever if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) || m_trainInstances.classIndex() >= 0) // one more for the class { m_selectedAttributeSet = new int[attributeSet.length + 1]; m_selectedAttributeSet[attributeSet.length] = m_trainInstances.classIndex(); } else { m_selectedAttributeSet = new int[attributeSet.length]; } for (int i = 0; i < attributeSet.length; i++) { m_selectedAttributeSet[i] = attributeSet[i]; } m_selectionResults.append("Selected attributes: "); for (int i = 0; i < attributeSet.length; i++) { if (i == (attributeSet.length - 1)) { m_selectionResults.append((attributeSet[i] + 1) + " : " + attributeSet.length + "\n"); } else { m_selectionResults.append((attributeSet[i] + 1) + ","); } } for (int element : attributeSet) { m_selectionResults.append(" " + m_trainInstances.attribute(element).name() + "\n"); } } // Cross validation should be called from here if (m_doXval == true) { m_selectionResults.append(CrossValidateAttributes()); } // set up the attribute filter with the selected attributes if (m_selectedAttributeSet != null && !m_doXval) { m_attributeFilter = new Remove(); m_attributeFilter.setAttributeIndicesArray(m_selectedAttributeSet); m_attributeFilter.setInvertSelection(true); m_attributeFilter.setInputFormat(m_trainInstances); } // Save space m_trainInstances = new Instances(m_trainInstances, 0); m_ASEvaluator.clean(); } /** * Perform attribute selection with a particular evaluator and a set of * options specifying search method and options for the search method and * evaluator. * * @param ASEvaluator an evaluator object * @param options an array of options, not only for the evaluator but also the * search method (if any) and an input data file * @param train the input instances * @return the results of attribute selection as a String * @exception Exception if incorrect options are supplied */ public static String SelectAttributes(ASEvaluation ASEvaluator, String[] options, Instances train) throws Exception { int seed = 1, folds = 10; String foldsString, seedString, searchName; String classString; String searchClassName; String[] searchOptions = null; // new String [1]; ASSearch searchMethod = null; boolean doCrossVal = false; int classIndex = -1; boolean helpRequested = false; AttributeSelection trainSelector = new AttributeSelection(); try { if (Utils.getFlag('h', options)) { helpRequested = true; } // does data already have a class attribute set? if (train.classIndex() != -1) { classIndex = train.classIndex() + 1; } // get basic options (options the same for all attribute selectors classString = Utils.getOption('c', options); if (classString.length() != 0) { if (classString.equals("first")) { classIndex = 1; } else if (classString.equals("last")) { classIndex = train.numAttributes(); } else { classIndex = Integer.parseInt(classString); } } if ((classIndex != -1) && ((classIndex == 0) || (classIndex > train.numAttributes()))) { throw new Exception("Class index out of range."); } if (classIndex != -1) { train.setClassIndex(classIndex - 1); } else { // classIndex = train.numAttributes(); // train.setClassIndex(classIndex - 1); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); doCrossVal = true; } trainSelector.setFolds(folds); trainSelector.setXval(doCrossVal); seedString = Utils.getOption('n', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } trainSelector.setSeed(seed); searchName = Utils.getOption('s', options); if ((searchName.length() == 0) && (!(ASEvaluator instanceof AttributeEvaluator))) { throw new Exception("No search method given."); } if (searchName.length() != 0) { searchName = searchName.trim(); // split off any search options int breakLoc = searchName.indexOf(' '); searchClassName = searchName; String searchOptionsString = ""; if (breakLoc != -1) { searchClassName = searchName.substring(0, breakLoc); searchOptionsString = searchName.substring(breakLoc).trim(); searchOptions = Utils.splitOptions(searchOptionsString); } } else { try { searchClassName = new String("weka.attributeSelection.Ranker"); searchMethod = (ASSearch) Class.forName(searchClassName).newInstance(); } catch (Exception e) { throw new Exception("Can't create Ranker object"); } } // if evaluator is a subset evaluator // create search method and set its options (if any) if (searchMethod == null) { searchMethod = ASSearch.forName(searchClassName, searchOptions); } // set the search method trainSelector.setSearch(searchMethod); } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod)); } try { // Set options for ASEvaluator if (ASEvaluator instanceof OptionHandler) { ((OptionHandler) ASEvaluator).setOptions(options); } /* * // Set options for Search method if (searchMethod instanceof * OptionHandler) { if (searchOptions != null) { * ((OptionHandler)searchMethod).setOptions(searchOptions); } } * Utils.checkForRemainingOptions(searchOptions); */ } catch (Exception e) { throw new Exception("\n" + e.getMessage() + makeOptionString(ASEvaluator, searchMethod)); } try { Utils.checkForRemainingOptions(options); } catch (Exception e) { throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod)); } if (helpRequested) { System.out.println(makeOptionString(ASEvaluator, searchMethod)); System.exit(0); } // set the attribute evaluator trainSelector.setEvaluator(ASEvaluator); // do the attribute selection trainSelector.SelectAttributes(train); // return the results string return trainSelector.toResultsString(); } /** * Assembles a text description of the attribute selection results. * * @return a string describing the results of attribute selection. */ private String printSelectionResults() { StringBuffer text = new StringBuffer(); text.append("\n\n=== Attribute Selection on all input data ===\n\n" + "Search Method:\n"); text.append(m_searchMethod.toString()); text.append("\nAttribute "); if (m_ASEvaluator instanceof SubsetEvaluator) { text.append("Subset Evaluator ("); } else { text.append("Evaluator ("); } if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { text.append("supervised, "); text.append("Class ("); if (m_trainInstances.attribute(m_trainInstances.classIndex()).isNumeric()) { text.append("numeric): "); } else { text.append("nominal): "); } text.append((m_trainInstances.classIndex() + 1) + " " + m_trainInstances.attribute(m_trainInstances.classIndex()).name() + "):\n"); } else { text.append("unsupervised):\n"); } text.append(m_ASEvaluator.toString() + "\n"); return text.toString(); } /** * Make up the help string giving all the command line options * * @param ASEvaluator the attribute evaluator to include options for * @param searchMethod the search method to include options for * @return a string detailing the valid command line options * @throws Exception if something goes wrong */ private static String makeOptionString(ASEvaluation ASEvaluator, ASSearch searchMethod) throws Exception { StringBuffer optionsText = new StringBuffer(""); // General options optionsText.append("\n\nGeneral options:\n\n"); optionsText.append("-h\n\tdisplay this help\n"); optionsText.append("-i \n"); optionsText.append("\tSets training file.\n"); optionsText.append("-c \n"); optionsText.append("\tSets the class index for supervised attribute\n"); optionsText.append("\tselection. Default=last column.\n"); optionsText.append("-s \n"); optionsText.append("\tSets search method for subset evaluators.\n"); optionsText.append("-x \n"); optionsText.append("\tPerform a cross validation.\n"); optionsText.append("-n \n"); optionsText.append("\tUse in conjunction with -x.\n"); // Get attribute evaluator-specific options if (ASEvaluator instanceof OptionHandler) { optionsText.append("\nOptions specific to " + ASEvaluator.getClass().getName() + ":\n\n"); Enumeration





© 2015 - 2024 Weber Informatics LLC | Privacy Policy