weka.attributeSelection.ScatterSearchV1 Maven / Gradle / Ivy
Show all versions of weka-stable Show documentation
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ScatterSearchV1.java
* Copyright (C) 2008 Adrian Pino
* Copyright (C) 2008 University of Waikato, Hamilton, NZ
*
*/
package weka.attributeSelection;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.List;
import java.util.Random;
import java.util.Vector;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
/**
* Class for performing the Sequential Scatter Search.
*
* Scatter Search :
*
* Performs an Scatter Search through the space of attribute subsets. Start with a population of many significants and diverses subset stops when the result is higher than a given treshold or there's not more improvement
* For more information see:
*
* Felix Garcia Lopez (2004). Solving feature subset selection problem by a Parallel Scatter Search. Elsevier.
*
*
* Valid options are:
*
* -Z <num>
* Specify the number of subsets to generate
* in the initial population..
*
* -T <threshold>
* Specify the treshold used for considering when a subset is significant.
*
* -R <0 = greedy combination | 1 = reduced greedy combination >
* Specify the kind of combiantion
* for using it in the combination method.
*
* -S <seed>
* Set the random number seed.
* (default = 1)
*
* -D
* Verbose output for monitoring the search.
*
*
* BibTeX:
*
* @book{Lopez2004,
* author = {Felix Garcia Lopez},
* month = {October},
* publisher = {Elsevier},
* title = {Solving feature subset selection problem by a Parallel Scatter Search},
* year = {2004},
* language = {English}
* }
*
*
*
* from the Book: Solving feature subset selection problem by a Parallel Scatter Search, Felix Garcia Lopez.
* @author Adrian Pino ([email protected])
* @version $Revision: 6277 $
*
*/
public class ScatterSearchV1 extends ASSearch
implements OptionHandler, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = -8512041420388121326L;
/** number of attributes in the data */
private int m_numAttribs;
/** holds the class index */
private int m_classIndex;
/** holds the treshhold that delimits the good attributes */
private double m_treshold;
/** the initial threshold */
private double m_initialThreshold;
/** the kind of comination betwen parents ((0)greedy combination/(1)reduced greedy combination)*/
int m_typeOfCombination;
/** random number generation */
private Random m_random;
/** seed for random number generation */
private int m_seed;
/** verbose output for monitoring the search and debugging */
private boolean m_debug = false;
/** holds a report of the search */
private StringBuffer m_InformationReports;
/** total number of subsets evaluated during a search */
private int m_totalEvals;
/** holds the merit of the best subset found */
protected double m_bestMerit;
/** time for procesing the search method */
private long m_processinTime;
/** holds the Initial Population of Subsets*/
private List m_population;
/** holds the population size*/
private int m_popSize;
/** holds the user selected initial population size */
private int m_initialPopSize;
/** if no initial user pop size, then this holds the initial
* pop size calculated from the number of attributes in the data
* (for use in the toString() method)
*/
private int m_calculatedInitialPopSize;
/** holds the subsets most significants and diverses
* of the population (ReferenceSet).
*
* (transient because the subList() method returns
* a non serializable Object).
*/
private transient List m_ReferenceSet;
/** holds the greedy combination(reduced or not) of all the subsets of the ReferenceSet*/
private transient List m_parentsCombination;
/**holds the attributes ranked*/
private List m_attributeRanking;
/**Evaluator used to know the significance of a subset (for guiding the search)*/
private SubsetEvaluator ASEvaluator =null;
/** kind of combination */
protected static final int COMBINATION_NOT_REDUCED = 0;
protected static final int COMBINATION_REDUCED = 1; ;
public static final Tag [] TAGS_SELECTION = {
new Tag(COMBINATION_NOT_REDUCED, "Greedy Combination"),
new Tag(COMBINATION_REDUCED, "Reduced Greedy Combination")
};
/**
* Returns a string describing this search method
* @return a description of the search suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Scatter Search :\n\nPerforms an Scatter Search "
+"through "
+"the space of attribute subsets. Start with a population of many significants and diverses subset "
+" stops when the result is higher than a given treshold or there's not more improvement\n"
+ "For more information see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.BOOK);
result.setValue(Field.AUTHOR, "Felix Garcia Lopez");
result.setValue(Field.MONTH, "October");
result.setValue(Field.YEAR, "2004");
result.setValue(Field.TITLE, "Solving feature subset selection problem by a Parallel Scatter Search");
result.setValue(Field.PUBLISHER, "Elsevier");
result.setValue(Field.LANGUAGE, "English");
return result;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.0$");
}
public ScatterSearchV1 () {
resetOptions();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String thresholdTipText() {
return "Set the treshold that subsets most overcome to be considered as significants";
}
/**
* Set the treshold
*
* @param threshold for identifyng significant subsets
*/
public void setThreshold (double threshold) {
m_initialThreshold = threshold;
}
/**
* Get the treshold
*
* @return the treshold that subsets most overcome to be considered as significants
*/
public double getThreshold () {
return m_initialThreshold;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String populationSizeTipText() {
return "Set the number of subset to generate in the initial Population";
}
/**
* Set the population size
*
* @param size the number of subset in the initial population
*/
public void setPopulationSize (int size) {
m_initialPopSize = size;
}
/**
* Get the population size
*
* @return the number of subsets to generate in the initial population
*/
public int getPopulationSize () {
return m_initialPopSize;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String combinationTipText() {
return "Set the kind of combination for using it to combine ReferenceSet subsets.";
}
/**
* Set the kind of combination
*
* @param c the kind of combination of the search
*/
public void setCombination (SelectedTag c) {
if (c.getTags() == TAGS_SELECTION) {
m_typeOfCombination = c.getSelectedTag().getID();
}
}
/**
* Get the combination
*
* @return the kind of combination used in the Combination method
*/
public SelectedTag getCombination () {
return new SelectedTag(m_typeOfCombination, TAGS_SELECTION);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "Set the random seed.";
}
/**
* set the seed for random number generation
* @param s seed value
*/
public void setSeed(int s) {
m_seed = s;
}
/**
* get the value of the random number generator's seed
* @return the seed for random number generation
*/
public int getSeed() {
return m_seed;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "Turn on verbose output for monitoring the search's progress.";
}
/**
* Set whether verbose output should be generated.
* @param d true if output is to be verbose.
*/
public void setDebug(boolean d) {
m_debug = d;
}
/**
* Get whether output is to be verbose
* @return true if output will be verbose
*/
public boolean getDebug() {
return m_debug;
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(6);
newVector.addElement(new Option("\tSpecify the number of subsets to generate "
+ "\n\tin the initial population.."
,"Z",1
, "-Z "));
newVector.addElement(new Option("\tSpecify the treshold used for considering when a subset is significant."
, "T", 1
, "-T "));
newVector.addElement(new Option("\tSpecify the kind of combiantion "
+ "\n\tfor using it in the combination method."
, "R", 1, "-R <0 = greedy combination | 1 = reduced greedy combination >"));
newVector.addElement(new Option("\tSet the random number seed."
+"\n\t(default = 1)"
, "S", 1, "-S "));
newVector.addElement(new Option("\tVerbose output for monitoring the search.","D",0,"-D"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -Z
* Specify the number of subsets to generate in the initial population.
*
* -T
* Specify the treshold used for considering when a subset is significant.
*
* -R
* Specify the kind of combiantion.
*
* -S
* Set the random number seed.
* (default = 1)
*
* -D
* Verbose output for monitoring the search
* (default = false)
*
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('Z', options);
if (optionString.length() != 0) {
setPopulationSize(Integer.parseInt(optionString));
}
optionString = Utils.getOption('T', options);
if (optionString.length() != 0) {
setThreshold(Double.parseDouble(optionString));
}
optionString = Utils.getOption('R', options);
if (optionString.length() != 0) {
setCombination(new SelectedTag(Integer.parseInt(optionString),
TAGS_SELECTION));
} else {
setCombination(new SelectedTag(COMBINATION_NOT_REDUCED, TAGS_SELECTION));
}
optionString = Utils.getOption('S', options);
if (optionString.length() != 0) {
setSeed(Integer.parseInt(optionString));
}
setDebug(Utils.getFlag('D', options));
}
/**
* Gets the current settings of ScatterSearchV1.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[9];
int current = 0;
options[current++] = "-T";
options[current++] = "" + getThreshold ();
options[current++] = "-Z";
options[current++] = ""+getPopulationSize ();
options[current++] = "-R";
options[current++] = ""+String.valueOf (getCombination ().getSelectedTag ().getID ());
options[current++] = "-S";
options[current++] = "" + getSeed();
if (getDebug())
options[current++] = "-D";
while (current < options.length)
options[current++] = "";
return options;
}
/**
* returns a description of the search.
* @return a description of the search as a String.
*/
public String toString() {
StringBuffer FString = new StringBuffer();
FString.append("\tScatter Search "
+ "\n\tInit Population: "+m_calculatedInitialPopSize);
FString.append("\n\tKind of Combination: "
+getCombination ().getSelectedTag ().getReadable ());
FString.append("\n\tRandom number seed: "+m_seed);
FString.append("\n\tDebug: "+m_debug);
FString.append("\n\tTreshold: "
+Utils.doubleToString(Math.abs(getThreshold ()),8,3)+"\n");
FString.append("\tTotal number of subsets evaluated: "
+ m_totalEvals + "\n");
FString.append("\tMerit of best subset found: "
+Utils.doubleToString(Math.abs(m_bestMerit),8,3)+"\n");
/* FString.append("\tTime procesing the search space: "
+(double)m_processinTime/1000+" seconds\n"); */
if(m_debug)
return FString.toString()+"\n\n"+m_InformationReports.toString ();
return FString.toString();
}
/**
* Searches the attribute subset space using Scatter Search.
*
* @param ASEval the attribute evaluator to guide the search
* @param data the training instances.
* @return an array of selected attribute indexes
* @exception Exception if the search can't be completed
*/
public int[] search(ASEvaluation ASEval, Instances data)
throws Exception{
m_totalEvals = 0;
m_popSize = m_initialPopSize;
m_calculatedInitialPopSize = m_initialPopSize;
m_treshold = m_initialThreshold;
m_processinTime =System.currentTimeMillis ();
m_InformationReports = new StringBuffer();
m_numAttribs =data.numAttributes ();
m_classIndex =data.classIndex ();
if(m_popSize<=0) {
m_popSize =m_numAttribs/2;
m_calculatedInitialPopSize = m_popSize;
}
ASEvaluator =(SubsetEvaluator)ASEval;
if(!(m_treshold >= 0)){
m_treshold =calculateTreshhold();
m_totalEvals++;
}
m_random = new Random(m_seed);
m_attributeRanking =RankEachAttribute();
CreatePopulation(m_popSize);
int bestSolutions =m_popSize/4;
int divSolutions =m_popSize/4;
if(m_popSize < 4){
bestSolutions = m_popSize/2;
divSolutions = m_popSize/2;
if(m_popSize == 1) return attributeList(((Subset)m_population.get (0)).subset);
}
m_ReferenceSet =new ArrayList();
for (int i = 0; i bestTemp.merit)*/) {
//while(){
CombineParents();
ImproveSolutions();
// }
bestTemp =m_ReferenceSet.get (0);
int numBest =m_ReferenceSet.size ()/2;
int numDiverses =m_ReferenceSet.size ()/2;
UpdateReferenceSet(numBest, numDiverses);
m_ReferenceSet = m_ReferenceSet.subList (0,numBest+numDiverses);
}
m_InformationReports.append("\nLast Reference Set Updated:\n");
m_InformationReports.append ("merit \tsubset\n");
for (int i = 0; i ReferenceSet, int bestSolutions, int divSolutions){
//Sorting the Initial ReferenceSet
ReferenceSet =bubbleSubsetSort (ReferenceSet);
// storing all the attributes that are now in the ReferenceSet (just till bestSolutions)
BitSet allBits_RefSet =getAllBits (ReferenceSet.subList (0,bestSolutions));
// for stopping when ReferenceSet.size () ==bestSolutions+divSolutions
int refSetlength =bestSolutions;
int total =bestSolutions+divSolutions;
while (refSetlength aux =new ArrayList();
for (int i =refSetlength; i ranking =new ArrayList();
/*
for(int j=aux1.nextClearBit (0); j<=m_numAttribs; j=aux1.nextClearBit(j+1)){
if(j ==m_classIndex)continue;
BitSet aux2 =new BitSet(m_numAttribs);
aux2.set (j);
double merit =ASEvaluator.evaluateSubset (aux2);
m_totalEvals++;
ranking.add (new Subset((BitSet)aux2.clone (), merit));
}
ranking =bubbleSubsetSort (ranking);
*/
for (int k =0; k ();
// this two 'for' are for selecting parents in the refSet
for (int i= 0; i merit2){
child1 =best1.clone ();
continue;
}
if(merit1 best2.subset.cardinality ()){
child2 =best2.clone ();
continue;
}
if(best1.subset.cardinality () < best2.subset.cardinality ()){
child1 =best1.clone ();
continue;
}
if(best1.subset.cardinality () == best2.subset.cardinality ()){
double random = m_random.nextDouble ();
if(random < 0.5)child1 =best1.clone ();
else child2 =best2.clone ();
continue;
}
}
}
}else{
m_parentsCombination.add (child1);
m_parentsCombination.add (child2);
improvement =false;
}
}
}
}
m_parentsCombination = filterSubset (m_parentsCombination,m_ReferenceSet.size());
GenerateReferenceSet (m_parentsCombination,m_ReferenceSet.size ()/2, m_ReferenceSet.size ()/2);
m_parentsCombination = m_parentsCombination.subList (0, m_ReferenceSet.size ());
}
/**
* Create the initial Population
*
* @param popSize the size of the initial population
* @exception Exception if there is a trouble evaluating any solution
*/
public void CreatePopulation(int popSize)
throws Exception{
InitPopulation(popSize);
/** Delimit the best attributes from the worst*/
int segmentation =m_numAttribs/2;
/*TEST*/
/* System.out.println ("AttributeRanking");
for (int i = 0; i attributeRankingCopy = new ArrayList();
for (int j = 0; j last_evaluation){
m_population.set (i,joiners);
last_evaluation =current_evaluation;
try {
attributeRankingCopy.set (random_number, attributeRankingCopy.get (segmentation+1));
attributeRankingCopy.remove (segmentation+1);
}catch (IndexOutOfBoundsException ex) {
attributeRankingCopy.set (random_number,new Subset(new BitSet(m_numAttribs),0));
continue;
}
}
else{
// there's not more improvement
break;
}
}
}
//m_population =bubbleSubsetSort (m_population);
}
/**
* Rank all the attributes individually acording to their merits
*
* @return an ordered List of Subsets with just one attribute
* @exception Exception if the evaluation can not be completed
*/
public List RankEachAttribute()
throws Exception{
List result =new ArrayList();
for (int i = 0; i=0; i =gens.nextSetBit(i+1)){
BitSet aux =(BitSet)subset.subset.clone ();
if(aux.get (i))continue;
aux.set (i);
double merit2 =ASEvaluator.evaluateSubset (aux);
m_totalEvals++;
if(merit2 >merit1){
merit1 =merit2;
result =new Subset(aux,merit1);
}
}
return result;
}
/**
* Sort a List of subsets according to their merits
*
* @param subsetList the subsetList to be ordered
* @return a List with ordered subsets
*/
public List bubbleSubsetSort(List subsetList){
List result =new ArrayList();
for (int i = 0; i merit1){
Subset temp =subset1;
subsetList.set (i,subset2);
subsetList.set (j,temp);
subset1 =subset2;
merit1 =subset1.merit;
}
}
}
return subsetList;
}
/**
* get the index in a List where this have the biggest number
*
* @param simDif the Lists of numbers for getting from them the index of the bigger
* @return an index that represents where the bigest number is.
*/
public int getIndexofBiggest(List simDif){
int aux =-99999;
int result1 =-1;
List equalSimDif =new ArrayList();
if(simDif.size ()==0) return -1;
for (int i = 0; iaux){
aux =simDif.get (i);
result1 =i;
}
}
for (int i =0; i subsets){
BitSet result =new BitSet(m_numAttribs);
for (int i =0; i =0; j=aux.nextSetBit(j+1)) {
result.set (j);
}
}
return result;
}
/**
* Creating space for introducing the population
*
* @param popSize the number of subset in the initial population
*/
public void InitPopulation(int popSize){
m_population =new ArrayList();
for (int i = 0; i weightVector =new ArrayList();
BitSet res =result.subset;
for(int i=res.nextSetBit(0); i>=0; i=res.nextSetBit(i+1)){
double merits =0;
int numSolutions =0;
Subset solution =null;
for (int j = 0; j =avgAcurracy){
newResult.or (aux.subset);
}
}
double merit =ASEvaluator.evaluateSubset (newResult);
result =new Subset(newResult, merit);
}
return result;
}
public int generateRandomNumber(int limit){
return (int)Math.round (Math.random ()*(limit+0.4));
}
/**
* Calculate the treshold of a dataSet given an evaluator
*
* @return the treshhold of the dataSet
* @exception Exception if the calculation can not be completed
*/
public double calculateTreshhold()
throws Exception{
BitSet fullSet =new BitSet(m_numAttribs);
for (int i= 0; i< m_numAttribs; i++) {
if(i ==m_classIndex)continue;
fullSet.set (i);
}
return ASEvaluator.evaluateSubset (fullSet);
}
/**
* Calculate the Simetric Diference of two subsets
*
* @return the Simetric Diference
* @exception Exception if the calculation can not be completed
*/
public int SimetricDiference(Subset subset, BitSet bitset){
BitSet aux =subset.clone ().subset;
aux.xor (bitset);
return aux.cardinality ();
}
/**
* Filter a given Lis of Subsets removing the equals subsets
* @param subsetList to filter
* @param preferredSize the preferred size of the new List (if it is -1, then the filter is make it
* for all subsets, else then the filter method stops when the given preferred
* size is reached or all the subset have been filtered).
* @return a new List filtered
* @exception Exception if the calculation can not be completed
*/
public List filterSubset(List subsetList, int preferredSize){
if(subsetList.size () <=preferredSize && preferredSize !=-1)return subsetList;
for (int i =0; i indexes =new ArrayList();
for (int i = 0; i