weka.attributeSelection.ExhaustiveSearch Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ExhaustiveSearch.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import java.math.BigInteger;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;
/**
* ExhaustiveSearch :
*
* Performs an exhaustive search through the space of attribute subsets starting from the empty set of attrubutes. Reports the best subset found.
*
*
* Valid options are:
*
* -V
* Output subsets as the search progresses.
* (default = false).
*
*
* @author Mark Hall ([email protected])
* @version $Revision: 1.15 $
*/
public class ExhaustiveSearch
extends ASSearch
implements OptionHandler {
/** for serialization */
static final long serialVersionUID = 5741842861142379712L;
/** the best feature set found during the search */
private BitSet m_bestGroup;
/** the merit of the best subset found */
private double m_bestMerit;
/** does the data have a class */
private boolean m_hasClass;
/** holds the class index */
private int m_classIndex;
/** number of attributes in the data */
private int m_numAttribs;
/** if true, then ouput new best subsets as the search progresses */
private boolean m_verbose;
/** the number of subsets evaluated during the search */
private int m_evaluations;
/**
* Returns a string describing this search method
* @return a description of the search suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "ExhaustiveSearch : \n\nPerforms an exhaustive search through "
+"the space of attribute subsets starting from the empty set of "
+"attrubutes. Reports the best subset found.";
}
/**
* Constructor
*/
public ExhaustiveSearch () {
resetOptions();
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tOutput subsets as the search progresses."
+"\n\t(default = false)."
, "V", 0
, "-V"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -V
* Output subsets as the search progresses.
* (default = false).
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
resetOptions();
setVerbose(Utils.getFlag('V',options));
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String verboseTipText() {
return "Print progress information. Sends progress info to the terminal "
+"as the search progresses.";
}
/**
* set whether or not to output new best subsets as the search proceeds
* @param v true if output is to be verbose
*/
public void setVerbose(boolean v) {
m_verbose = v;
}
/**
* get whether or not output is verbose
* @return true if output is set to verbose
*/
public boolean getVerbose() {
return m_verbose;
}
/**
* Gets the current settings of RandomSearch.
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[1];
int current = 0;
if (m_verbose) {
options[current++] = "-V";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* prints a description of the search
* @return a description of the search as a string
*/
public String toString() {
StringBuffer text = new StringBuffer();
text.append("\tExhaustive Search.\n\tStart set: ");
text.append("no attributes\n");
text.append("\tNumber of evaluations: "+m_evaluations+"\n");
text.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
return text.toString();
}
/**
* Searches the attribute subset space using an exhaustive search.
*
* @param ASEval the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @throws Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
double best_merit;
double tempMerit;
boolean done = false;
int sizeOfBest;
int tempSize;
BigInteger space = BigInteger.ZERO;
m_evaluations = 0;
m_numAttribs = data.numAttributes();
m_bestGroup = new BitSet(m_numAttribs);
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = data.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
m_numAttribs = data.numAttributes();
best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
m_evaluations++;
sizeOfBest = countFeatures(m_bestGroup);
BitSet tempGroup = new BitSet(m_numAttribs);
tempMerit = ASEvaluator.evaluateSubset(tempGroup);
if (m_verbose) {
System.out.println("Zero feature subset ("
+Utils.doubleToString(Math.
abs(tempMerit),8,5)
+")");
}
if (tempMerit >= best_merit) {
tempSize = countFeatures(tempGroup);
if (tempMerit > best_merit ||
(tempSize < sizeOfBest)) {
best_merit = tempMerit;
m_bestGroup = (BitSet)(tempGroup.clone());
sizeOfBest = tempSize;
}
}
int numatts = (m_hasClass)
? m_numAttribs - 1
: m_numAttribs;
BigInteger searchSpaceEnd =
BigInteger.ONE.add(BigInteger.ONE).pow(numatts).subtract(BigInteger.ONE);
while (!done) {
// the next subset
space = space.add(BigInteger.ONE);
if (space.equals(searchSpaceEnd)) {
done = true;
}
tempGroup.clear();
for (int i = 0; i < numatts; i++) {
if (space.testBit(i)) {
if (!m_hasClass) {
tempGroup.set(i);
} else {
int j = (i >= m_classIndex)
? i + 1
: i;
tempGroup.set(j);
}
}
}
tempMerit = ASEvaluator.evaluateSubset(tempGroup);
m_evaluations++;
if (tempMerit >= best_merit) {
tempSize = countFeatures(tempGroup);
if (tempMerit > best_merit ||
(tempSize < sizeOfBest)) {
best_merit = tempMerit;
m_bestGroup = (BitSet)(tempGroup.clone());
sizeOfBest = tempSize;
if (m_verbose) {
System.out.println("New best subset ("
+Utils.doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup));
}
}
}
}
m_bestMerit = best_merit;
return attributeList(m_bestGroup);
}
/**
* counts the number of features in a subset
* @param featureSet the feature set for which to count the features
* @return the number of features in the subset
*/
private int countFeatures(BitSet featureSet) {
int count = 0;
for (int i=0;i=0;i--) {
if (temp.get(i)) {
temp.clear(i);
int newP;
if (i != (m_numAttribs-1-counter)) {
newP = i+1;
if (newP == m_classIndex) {
newP++;
}
if (newP < m_numAttribs) {
temp.set(newP);
for (j=0;j
© 2015 - 2025 Weber Informatics LLC | Privacy Policy