weka.attributeSelection.RandomSearch Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* RandomSearch.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
/**
* RandomSearch :
*
* Performs a Random search in the space of attribute subsets. If no start set is supplied, Random search starts from a random point and reports the best subset found. If a start set is supplied, Random searches randomly for subsets that are as good or better than the start point with the same or or fewer attributes. Using RandomSearch in conjunction with a start set containing all attributes equates to the LVF algorithm of Liu and Setiono (ICML-96).
*
* For more information see:
*
* H. Liu, R. Setiono: A probabilistic approach to feature selection - A filter solution. In: 13th International Conference on Machine Learning, 319-327, 1996.
*
*
* BibTeX:
*
* @inproceedings{Liu1996,
* author = {H. Liu and R. Setiono},
* booktitle = {13th International Conference on Machine Learning},
* pages = {319-327},
* title = {A probabilistic approach to feature selection - A filter solution},
* year = {1996}
* }
*
*
*
* Valid options are:
*
* -P <start set>
* Specify a starting set of attributes.
* Eg. 1,3,5-7.
* If a start point is supplied,
* random search evaluates the start
* point and then randomly looks for
* subsets that are as good as or better
* than the start point with the same
* or lower cardinality.
*
* -F <percent>
* Percent of search space to consider.
* (default = 25%).
*
* -V
* Output subsets as the search progresses.
* (default = false).
*
* -seed <num>
* Random seed
* (default = 1)
*
*
* @author Mark Hall ([email protected])
* @version $Revision: 8949 $
*/
public class RandomSearch
extends ASSearch
implements StartSetHandler, OptionHandler, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 7479392617377425484L;
/**
* holds a starting set as an array of attributes.
*/
private int[] m_starting;
/** holds the start set as a range */
private Range m_startRange;
/** the best feature set found during the search */
private BitSet m_bestGroup;
/** the merit of the best subset found */
private double m_bestMerit;
/**
* only accept a feature set as being "better" than the best if its
* merit is better or equal to the best, and it contains as many or fewer
* features than the best (this allows LVF to be implemented).
*/
private boolean m_onlyConsiderBetterAndSmaller;
/** does the data have a class */
private boolean m_hasClass;
/** holds the class index */
private int m_classIndex;
/** number of attributes in the data */
private int m_numAttribs;
/** seed for random number generation */
private int m_seed;
/** percentage of the search space to consider */
private double m_searchSize;
/** the number of iterations performed */
private int m_iterations;
/** random number object */
private Random m_random;
/** output new best subsets as the search progresses */
private boolean m_verbose;
/**
* Returns a string describing this search method
* @return a description of the search suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "RandomSearch : \n\nPerforms a Random search in "
+"the space of attribute subsets. If no start set is supplied, Random "
+"search starts from a random point and reports the best subset found. "
+"If a start set is supplied, Random searches randomly for subsets "
+"that are as good or better than the start point with the same or "
+"or fewer attributes. Using RandomSearch in conjunction with a start "
+"set containing all attributes equates to the LVF algorithm of Liu "
+"and Setiono (ICML-96).\n\n"
+ "For more information see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR, "H. Liu and R. Setiono");
result.setValue(Field.TITLE, "A probabilistic approach to feature selection - A filter solution");
result.setValue(Field.BOOKTITLE, "13th International Conference on Machine Learning");
result.setValue(Field.YEAR, "1996");
result.setValue(Field.PAGES, "319-327");
return result;
}
/**
* Constructor
*/
public RandomSearch () {
resetOptions();
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(3);
newVector.addElement(new Option("\tSpecify a starting set of attributes."
+ "\n\tEg. 1,3,5-7."
+"\n\tIf a start point is supplied,"
+"\n\trandom search evaluates the start"
+"\n\tpoint and then randomly looks for"
+"\n\tsubsets that are as good as or better"
+"\n\tthan the start point with the same"
+"\n\tor lower cardinality."
,"P",1
, "-P "));
newVector.addElement(new Option("\tPercent of search space to consider."
+"\n\t(default = 25%)."
, "F", 1
, "-F "));
newVector.addElement(new Option("\tOutput subsets as the search progresses."
+"\n\t(default = false)."
, "V", 0
, "-V"));
newVector.addElement(new Option("\tRandom seed\n\t(default = 1)",
"seed", 1, "-seed "));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -P <start set>
* Specify a starting set of attributes.
* Eg. 1,3,5-7.
* If a start point is supplied,
* random search evaluates the start
* point and then randomly looks for
* subsets that are as good as or better
* than the start point with the same
* or lower cardinality.
*
* -F <percent>
* Percent of search space to consider.
* (default = 25%).
*
* -V
* Output subsets as the search progresses.
* (default = false).
*
* -seed <num>
* Random seed
* (default = 1)
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('P', options);
if (optionString.length() != 0) {
setStartSet(optionString);
}
optionString = Utils.getOption('F',options);
if (optionString.length() != 0) {
setSearchPercent((new Double(optionString)).doubleValue());
}
setVerbose(Utils.getFlag('V',options));
optionString = Utils.getOption("seed", options);
if (optionString.length() > 0) {
setSeed(Integer.parseInt(optionString));
}
}
/**
* Gets the current settings of RandomSearch.
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[7];
int current = 0;
if (m_verbose) {
options[current++] = "-V";
}
if (!(getStartSet().equals(""))) {
options[current++] = "-P";
options[current++] = "" + startSetToString();
}
options[current++] = "-F";
options[current++] = "" + getSearchPercent();
options[current++] = "-seed";
options[current++] = "" + getSeed();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String startSetTipText() {
return "Set the start point for the search. This is specified as a comma "
+"seperated list off attribute indexes starting at 1. It can include "
+"ranges. Eg. 1,2,5-9,17. If specified, Random searches for subsets "
+"of attributes that are as good as or better than the start set with "
+"the same or lower cardinality.";
}
/**
* Sets a starting set of attributes for the search. It is the
* search method's responsibility to report this start set (if any)
* in its toString() method.
* @param startSet a string containing a list of attributes (and or ranges),
* eg. 1,2,6,10-15. "" indicates no start point.
* If a start point is supplied, random search evaluates the
* start point and then looks for subsets that are as good as or better
* than the start point with the same or lower cardinality.
* @throws Exception if start set can't be set.
*/
public void setStartSet (String startSet) throws Exception {
m_startRange.setRanges(startSet);
}
/**
* Returns a list of attributes (and or attribute ranges) as a String
* @return a list of attributes (and or attribute ranges)
*/
public String getStartSet () {
return m_startRange.getRanges();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String verboseTipText() {
return "Print progress information. Sends progress info to the terminal "
+"as the search progresses.";
}
/**
* set whether or not to output new best subsets as the search proceeds
* @param v true if output is to be verbose
*/
public void setVerbose(boolean v) {
m_verbose = v;
}
/**
* get whether or not output is verbose
* @return true if output is set to verbose
*/
public boolean getVerbose() {
return m_verbose;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String searchPercentTipText() {
return "Percentage of the search space to explore.";
}
/**
* set the percentage of the search space to consider
* @param p percent of the search space ( 0 < p <= 100)
*/
public void setSearchPercent(double p) {
p = Math.abs(p);
if (p == 0) {
p = 25;
}
if (p > 100.0) {
p = 100;
}
m_searchSize = (p/100.0);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "Seed for the random number generator";
}
/**
* Set the random seed to use
*
* @param seed the seed to use
*/
public void setSeed(int seed) {
m_seed = seed;
}
/**
* Get the random seed to use
*
* @return the random seed
*/
public int getSeed() {
return m_seed;
}
/**
* get the percentage of the search space to consider
* @return the percent of the search space explored
*/
public double getSearchPercent() {
return m_searchSize * 100;
}
/**
* converts the array of starting attributes to a string. This is
* used by getOptions to return the actual attributes specified
* as the starting set. This is better than using m_startRanges.getRanges()
* as the same start set can be specified in different ways from the
* command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
* is stored in a database is comparable.
* @return a comma seperated list of individual attribute numbers as a String
*/
private String startSetToString() {
StringBuffer FString = new StringBuffer();
boolean didPrint;
if (m_starting == null) {
return getStartSet();
}
for (int i = 0; i < m_starting.length; i++) {
didPrint = false;
if ((m_hasClass == false) ||
(m_hasClass == true && i != m_classIndex)) {
FString.append((m_starting[i] + 1));
didPrint = true;
}
if (i == (m_starting.length - 1)) {
FString.append("");
}
else {
if (didPrint) {
FString.append(",");
}
}
}
return FString.toString();
}
/**
* prints a description of the search
* @return a description of the search as a string
*/
public String toString() {
StringBuffer text = new StringBuffer();
text.append("\tRandom search.\n\tStart set: ");
if (m_starting == null) {
text.append("no attributes\n");
}
else {
text.append(startSetToString()+"\n");
}
text.append("\tNumber of iterations: "+m_iterations+" ("
+(m_searchSize * 100.0)+"% of the search space)\n");
text.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
return text.toString();
}
/**
* Searches the attribute subset space randomly.
*
* @param ASEval the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @throws Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
double best_merit;
int sizeOfBest = m_numAttribs;
BitSet temp;
m_bestGroup = new BitSet(m_numAttribs);
m_onlyConsiderBetterAndSmaller = false;
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
m_random = new Random(m_seed);
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = data.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
m_numAttribs = data.numAttributes();
m_startRange.setUpper(m_numAttribs-1);
if (!(getStartSet().equals(""))) {
m_starting = m_startRange.getSelection();
}
// If a starting subset has been supplied, then initialise the bitset
if (m_starting != null) {
for (int i = 0; i < m_starting.length; i++) {
if ((m_starting[i]) != m_classIndex) {
m_bestGroup.set(m_starting[i]);
}
}
m_onlyConsiderBetterAndSmaller = true;
best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
sizeOfBest = countFeatures(m_bestGroup);
} else {
// do initial random subset
m_bestGroup = generateRandomSubset();
best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
}
if (m_verbose) {
System.out.println("Initial subset ("
+Utils.doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup));
}
int i;
if (m_hasClass) {
i = m_numAttribs -1;
} else {
i = m_numAttribs;
}
m_iterations = (int)((m_searchSize * Math.pow(2, i)));
int tempSize;
double tempMerit;
// main loop
for (i=0;i= best_merit) {
sizeOfBest = tempSize;
m_bestGroup = temp;
best_merit = tempMerit;
if (m_verbose) {
System.out.print("New best subset ("
+Utils.doubleToString(Math.
abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup) + " :");
System.out.println(Utils.
doubleToString((((double)i)/
((double)m_iterations)*
100.0),5,1)
+"% done");
}
}
}
} else {
tempMerit = ASEvaluator.evaluateSubset(temp);
if (tempMerit > best_merit) {
m_bestGroup = temp;
best_merit = tempMerit;
if (m_verbose) {
System.out.print("New best subset ("
+Utils.doubleToString(Math.abs(best_merit),8,5)
+"): "+printSubset(m_bestGroup) + " :");
System.out.println(Utils.
doubleToString((((double)i)/
((double)m_iterations)
*100.0),5,1)
+"% done");
}
}
}
}
m_bestMerit = best_merit;
return attributeList(m_bestGroup);
}
/**
* prints a subset as a series of attribute numbers
* @param temp the subset to print
* @return a subset as a String of attribute numbers
*/
private String printSubset(BitSet temp) {
StringBuffer text = new StringBuffer();
for (int j=0;j
© 2015 - 2025 Weber Informatics LLC | Privacy Policy