weka.attributeSelection.GeneticSearch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* GeneticSearch.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.attributeSelection;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.io.Serializable;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
/**
* GeneticSearch:
*
* Performs a search using the simple genetic algorithm described in Goldberg (1989).
*
* For more information see:
*
* David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley.
*
*
* BibTeX:
*
* @book{Goldberg1989,
* author = {David E. Goldberg},
* publisher = {Addison-Wesley},
* title = {Genetic algorithms in search, optimization and machine learning},
* year = {1989},
* ISBN = {0201157675}
* }
*
*
*
* Valid options are:
*
* -P <start set>
* Specify a starting set of attributes.
* Eg. 1,3,5-7.If supplied, the starting set becomes
* one member of the initial random
* population.
*
* -Z <population size>
* Set the size of the population (even number).
* (default = 20).
*
* -G <number of generations>
* Set the number of generations.
* (default = 20)
*
* -C <probability of crossover>
* Set the probability of crossover.
* (default = 0.6)
*
* -M <probability of mutation>
* Set the probability of mutation.
* (default = 0.033)
*
* -R <report frequency>
* Set frequency of generation reports.
* e.g, setting the value to 5 will
* report every 5th generation
* (default = number of generations)
*
* -S <seed>
* Set the random number seed.
* (default = 1)
*
*
* @author Mark Hall ([email protected])
* @version $Revision: 6759 $
*/
public class GeneticSearch
extends ASSearch
implements StartSetHandler, OptionHandler, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = -1618264232838472679L;
/**
* holds a starting set as an array of attributes. Becomes one member of the
* initial random population
*/
private int[] m_starting;
/** holds the start set for the search as a Range */
private Range m_startRange;
/** does the data have a class */
private boolean m_hasClass;
/** holds the class index */
private int m_classIndex;
/** number of attributes in the data */
private int m_numAttribs;
/** the current population */
private GABitSet [] m_population;
/** the number of individual solutions */
private int m_popSize;
/** the best population member found during the search */
private GABitSet m_best;
/** the number of features in the best population member */
private int m_bestFeatureCount;
/** the number of entries to cache for lookup */
private int m_lookupTableSize;
/** the lookup table */
private Hashtable m_lookupTable;
/** random number generation */
private Random m_random;
/** seed for random number generation */
private int m_seed;
/** the probability of crossover occuring */
private double m_pCrossover;
/** the probability of mutation occuring */
private double m_pMutation;
/** sum of the current population fitness */
private double m_sumFitness;
private double m_maxFitness;
private double m_minFitness;
private double m_avgFitness;
/** the maximum number of generations to evaluate */
private int m_maxGenerations;
/** how often reports are generated */
private int m_reportFrequency;
/** holds the generation reports */
private StringBuffer m_generationReports;
// Inner class
/**
* A bitset for the genetic algorithm
*/
protected class GABitSet
implements Cloneable, Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = -2930607837482622224L;
/** the bitset */
private BitSet m_chromosome;
/** holds raw merit */
private double m_objective = -Double.MAX_VALUE;
/** the fitness */
private double m_fitness;
/**
* Constructor
*/
public GABitSet () {
m_chromosome = new BitSet();
}
/**
* makes a copy of this GABitSet
* @return a copy of the object
* @throws CloneNotSupportedException if something goes wrong
*/
public Object clone() throws CloneNotSupportedException {
GABitSet temp = new GABitSet();
temp.setObjective(this.getObjective());
temp.setFitness(this.getFitness());
temp.setChromosome((BitSet)(this.m_chromosome.clone()));
return temp;
//return super.clone();
}
/**
* sets the objective merit value
* @param objective the objective value of this population member
*/
public void setObjective(double objective) {
m_objective = objective;
}
/**
* gets the objective merit
* @return the objective merit of this population member
*/
public double getObjective() {
return m_objective;
}
/**
* sets the scaled fitness
* @param fitness the scaled fitness of this population member
*/
public void setFitness(double fitness) {
m_fitness = fitness;
}
/**
* gets the scaled fitness
* @return the scaled fitness of this population member
*/
public double getFitness() {
return m_fitness;
}
/**
* get the chromosome
* @return the chromosome of this population member
*/
public BitSet getChromosome() {
return m_chromosome;
}
/**
* set the chromosome
* @param c the chromosome to be set for this population member
*/
public void setChromosome(BitSet c) {
m_chromosome = c;
}
/**
* unset a bit in the chromosome
* @param bit the bit to be cleared
*/
public void clear(int bit) {
m_chromosome.clear(bit);
}
/**
* set a bit in the chromosome
* @param bit the bit to be set
*/
public void set(int bit) {
m_chromosome.set(bit);
}
/**
* get the value of a bit in the chromosome
* @param bit the bit to query
* @return the value of the bit
*/
public boolean get(int bit) {
return m_chromosome.get(bit);
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 6759 $");
}
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(6);
newVector.addElement(new Option("\tSpecify a starting set of attributes."
+ "\n\tEg. 1,3,5-7."
+"If supplied, the starting set becomes"
+"\n\tone member of the initial random"
+"\n\tpopulation."
,"P",1
, "-P "));
newVector.addElement(new Option("\tSet the size of the population (even number)."
+"\n\t(default = 20)."
, "Z", 1
, "-Z "));
newVector.addElement(new Option("\tSet the number of generations."
+"\n\t(default = 20)"
, "G", 1, "-G "));
newVector.addElement(new Option("\tSet the probability of crossover."
+"\n\t(default = 0.6)"
, "C", 1, "-C "));
newVector.addElement(new Option("\tSet the probability of mutation."
+"\n\t(default = 0.033)"
, "M", 1, "-M "));
newVector.addElement(new Option("\tSet frequency of generation reports."
+"\n\te.g, setting the value to 5 will "
+"\n\treport every 5th generation"
+"\n\t(default = number of generations)"
, "R", 1, "-R "));
newVector.addElement(new Option("\tSet the random number seed."
+"\n\t(default = 1)"
, "S", 1, "-S "));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -P <start set>
* Specify a starting set of attributes.
* Eg. 1,3,5-7.If supplied, the starting set becomes
* one member of the initial random
* population.
*
* -Z <population size>
* Set the size of the population (even number).
* (default = 20).
*
* -G <number of generations>
* Set the number of generations.
* (default = 20)
*
* -C <probability of crossover>
* Set the probability of crossover.
* (default = 0.6)
*
* -M <probability of mutation>
* Set the probability of mutation.
* (default = 0.033)
*
* -R <report frequency>
* Set frequency of generation reports.
* e.g, setting the value to 5 will
* report every 5th generation
* (default = number of generations)
*
* -S <seed>
* Set the random number seed.
* (default = 1)
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('P', options);
if (optionString.length() != 0) {
setStartSet(optionString);
}
optionString = Utils.getOption('Z', options);
if (optionString.length() != 0) {
setPopulationSize(Integer.parseInt(optionString));
}
optionString = Utils.getOption('G', options);
if (optionString.length() != 0) {
setMaxGenerations(Integer.parseInt(optionString));
setReportFrequency(Integer.parseInt(optionString));
}
optionString = Utils.getOption('C', options);
if (optionString.length() != 0) {
setCrossoverProb((new Double(optionString)).doubleValue());
}
optionString = Utils.getOption('M', options);
if (optionString.length() != 0) {
setMutationProb((new Double(optionString)).doubleValue());
}
optionString = Utils.getOption('R', options);
if (optionString.length() != 0) {
setReportFrequency(Integer.parseInt(optionString));
}
optionString = Utils.getOption('S', options);
if (optionString.length() != 0) {
setSeed(Integer.parseInt(optionString));
}
}
/**
* Gets the current settings of ReliefFAttributeEval.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[14];
int current = 0;
if (!(getStartSet().equals(""))) {
options[current++] = "-P";
options[current++] = ""+startSetToString();
}
options[current++] = "-Z";
options[current++] = "" + getPopulationSize();
options[current++] = "-G";
options[current++] = "" + getMaxGenerations();
options[current++] = "-C";
options[current++] = "" + getCrossoverProb();
options[current++] = "-M";
options[current++] = "" + getMutationProb();
options[current++] = "-R";
options[current++] = "" + getReportFrequency();
options[current++] = "-S";
options[current++] = "" + getSeed();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String startSetTipText() {
return "Set a start point for the search. This is specified as a comma "
+"seperated list off attribute indexes starting at 1. It can include "
+"ranges. Eg. 1,2,5-9,17. The start set becomes one of the population "
+"members of the initial population.";
}
/**
* Sets a starting set of attributes for the search. It is the
* search method's responsibility to report this start set (if any)
* in its toString() method.
* @param startSet a string containing a list of attributes (and or ranges),
* eg. 1,2,6,10-15.
* @throws Exception if start set can't be set.
*/
public void setStartSet (String startSet) throws Exception {
m_startRange.setRanges(startSet);
}
/**
* Returns a list of attributes (and or attribute ranges) as a String
* @return a list of attributes (and or attribute ranges)
*/
public String getStartSet () {
return m_startRange.getRanges();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "Set the random seed.";
}
/**
* set the seed for random number generation
* @param s seed value
*/
public void setSeed(int s) {
m_seed = s;
}
/**
* get the value of the random number generator's seed
* @return the seed for random number generation
*/
public int getSeed() {
return m_seed;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String reportFrequencyTipText() {
return "Set how frequently reports are generated. Default is equal to "
+"the number of generations meaning that a report will be printed for "
+"initial and final generations. Setting the value to 5 will result in "
+"a report being printed every 5 generations.";
}
/**
* set how often reports are generated
* @param f generate reports every f generations
*/
public void setReportFrequency(int f) {
m_reportFrequency = f;
}
/**
* get how often repports are generated
* @return how often reports are generated
*/
public int getReportFrequency() {
return m_reportFrequency;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String mutationProbTipText() {
return "Set the probability of mutation occuring.";
}
/**
* set the probability of mutation
* @param m the probability for mutation occuring
*/
public void setMutationProb(double m) {
m_pMutation = m;
}
/**
* get the probability of mutation
* @return the probability of mutation occuring
*/
public double getMutationProb() {
return m_pMutation;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String crossoverProbTipText() {
return "Set the probability of crossover. This is the probability that "
+"two population members will exchange genetic material.";
}
/**
* set the probability of crossover
* @param c the probability that two population members will exchange
* genetic material
*/
public void setCrossoverProb(double c) {
m_pCrossover = c;
}
/**
* get the probability of crossover
* @return the probability of crossover
*/
public double getCrossoverProb() {
return m_pCrossover;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String maxGenerationsTipText() {
return "Set the number of generations to evaluate.";
}
/**
* set the number of generations to evaluate
* @param m the number of generations
*/
public void setMaxGenerations(int m) {
m_maxGenerations = m;
}
/**
* get the number of generations
* @return the maximum number of generations
*/
public int getMaxGenerations() {
return m_maxGenerations;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String populationSizeTipText() {
return "Set the population size (even number), this is the number of individuals "
+"(attribute sets) in the population.";
}
/**
* set the population size
* @param p the size of the population
*/
public void setPopulationSize(int p) {
if (p % 2 == 0)
m_popSize = p;
else
System.out.println("Population size needs to be an even number!");
}
/**
* get the size of the population
* @return the population size
*/
public int getPopulationSize() {
return m_popSize;
}
/**
* Returns a string describing this search method
* @return a description of the search suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return
"GeneticSearch:\n\nPerforms a search using the simple genetic "
+ "algorithm described in Goldberg (1989).\n\n"
+ "For more information see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.BOOK);
result.setValue(Field.AUTHOR, "David E. Goldberg");
result.setValue(Field.YEAR, "1989");
result.setValue(Field.TITLE, "Genetic algorithms in search, optimization and machine learning");
result.setValue(Field.ISBN, "0201157675");
result.setValue(Field.PUBLISHER, "Addison-Wesley");
return result;
}
/**
* Constructor. Make a new GeneticSearch object
*/
public GeneticSearch() {
resetOptions();
}
/**
* converts the array of starting attributes to a string. This is
* used by getOptions to return the actual attributes specified
* as the starting set. This is better than using m_startRanges.getRanges()
* as the same start set can be specified in different ways from the
* command line---eg 1,2,3 == 1-3. This is to ensure that stuff that
* is stored in a database is comparable.
* @return a comma seperated list of individual attribute numbers as a String
*/
private String startSetToString() {
StringBuffer FString = new StringBuffer();
boolean didPrint;
if (m_starting == null) {
return getStartSet();
}
for (int i = 0; i < m_starting.length; i++) {
didPrint = false;
if ((m_hasClass == false) ||
(m_hasClass == true && i != m_classIndex)) {
FString.append((m_starting[i] + 1));
didPrint = true;
}
if (i == (m_starting.length - 1)) {
FString.append("");
}
else {
if (didPrint) {
FString.append(",");
}
}
}
return FString.toString();
}
/**
* returns a description of the search
* @return a description of the search as a String
*/
public String toString() {
StringBuffer GAString = new StringBuffer();
GAString.append("\tGenetic search.\n\tStart set: ");
if (m_starting == null) {
GAString.append("no attributes\n");
}
else {
GAString.append(startSetToString()+"\n");
}
GAString.append("\tPopulation size: "+m_popSize);
GAString.append("\n\tNumber of generations: "+m_maxGenerations);
GAString.append("\n\tProbability of crossover: "
+Utils.doubleToString(m_pCrossover,6,3));
GAString.append("\n\tProbability of mutation: "
+Utils.doubleToString(m_pMutation,6,3));
GAString.append("\n\tReport frequency: "+m_reportFrequency);
GAString.append("\n\tRandom number seed: "+m_seed+"\n");
GAString.append(m_generationReports.toString());
return GAString.toString();
}
/**
* Searches the attribute subset space using a genetic algorithm.
*
* @param ASEval the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @throws Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
m_best = null;
m_generationReports = new StringBuffer();
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator!");
}
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
m_hasClass = false;
}
else {
m_hasClass = true;
m_classIndex = data.classIndex();
}
SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
m_numAttribs = data.numAttributes();
m_startRange.setUpper(m_numAttribs-1);
if (!(getStartSet().equals(""))) {
m_starting = m_startRange.getSelection();
}
// initial random population
m_lookupTable = new Hashtable(m_lookupTableSize);
m_random = new Random(m_seed);
m_population = new GABitSet [m_popSize];
// set up random initial population
initPopulation();
evaluatePopulation(ASEvaluator);
populationStatistics();
scalePopulation();
checkBest();
m_generationReports.append(populationReport(0));
boolean converged;
for (int i=1;i<=m_maxGenerations;i++) {
generation();
evaluatePopulation(ASEvaluator);
populationStatistics();
scalePopulation();
// find the best pop member and check for convergence
converged = checkBest();
if ((i == m_maxGenerations) ||
((i % m_reportFrequency) == 0) ||
(converged == true)) {
m_generationReports.append(populationReport(i));
if (converged == true) {
break;
}
}
}
return attributeList(m_best.getChromosome());
}
/**
* converts a BitSet into a list of attribute indexes
* @param group the BitSet to convert
* @return an array of attribute indexes
**/
private int[] attributeList (BitSet group) {
int count = 0;
// count how many were selected
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
count++;
}
}
int[] list = new int[count];
count = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (group.get(i)) {
list[count++] = i;
}
}
return list;
}
/**
* checks to see if any population members in the current
* population are better than the best found so far. Also checks
* to see if the search has converged---that is there is no difference
* in fitness between the best and worse population member
* @return true is the search has converged
* @throws Exception if something goes wrong
*/
private boolean checkBest() throws Exception {
int i,count,lowestCount = m_numAttribs;
double b = -Double.MAX_VALUE;
GABitSet localbest = null;
BitSet temp;
boolean converged = false;
int oldcount = Integer.MAX_VALUE;
if (m_maxFitness - m_minFitness > 0) {
// find the best in this population
for (i=0;i b) {
b = m_population[i].getObjective();
localbest = m_population[i];
oldcount = countFeatures(localbest.getChromosome());
} else if (Utils.eq(m_population[i].getObjective(), b)) {
// see if it contains fewer features
count = countFeatures(m_population[i].getChromosome());
if (count < oldcount) {
b = m_population[i].getObjective();
localbest = m_population[i];
oldcount = count;
}
}
}
} else {
// look for the smallest subset
for (i=0;i m_best.getObjective()) {
m_best = (GABitSet)localbest.clone();
m_bestFeatureCount = count;
} else if (Utils.eq(m_best.getObjective(), b)) {
// see if the localbest has fewer features than the best so far
if (count < m_bestFeatureCount) {
m_best = (GABitSet)localbest.clone();
m_bestFeatureCount = count;
}
}
return converged;
}
/**
* counts the number of features in a subset
* @param featureSet the feature set for which to count the features
* @return the number of features in the subset
*/
private int countFeatures(BitSet featureSet) {
int count = 0;
for (int i=0;i best_fit) {
j = i;
best_fit = m_population[i].getFitness();
old_count = countFeatures(m_population[i].getChromosome());
} else if (Utils.eq(m_population[i].getFitness(), best_fit)) {
count = countFeatures(m_population[i].getChromosome());
if (count < old_count) {
j = i;
best_fit = m_population[i].getFitness();
old_count = count;
}
}
}
newPop[0] = (GABitSet)(m_population[j].clone());
newPop[1] = newPop[0];
for (j=2;j= 3) {
if (r < m_pCrossover) {
// cross point
int cp = Math.abs(m_random.nextInt());
cp %= (m_numAttribs-2);
cp ++;
for (i=0;i= r || (i == m_popSize - 1)) {
break;
}
}
// if none was found, take first
if (i == m_popSize)
i = 0;
return i;
}
/**
* evaluates an entire population. Population members are looked up in
* a hash table and if they are not found then they are evaluated using
* ASEvaluator.
* @param ASEvaluator the subset evaluator to use for evaluating population
* members
* @throws Exception if something goes wrong during evaluation
*/
private void evaluatePopulation (SubsetEvaluator ASEvaluator)
throws Exception {
int i;
double merit;
for (i=0;i m_numAttribs) {
throw new Exception("Problem in population init");
}
m_population[i].set(bit);
}
}
}
/**
* calculates summary statistics for the current population
*/
private void populationStatistics() {
int i;
m_sumFitness = m_minFitness = m_maxFitness =
m_population[0].getObjective();
for (i=1;i m_maxFitness) {
m_maxFitness = m_population[i].getObjective();
}
else if (m_population[i].getObjective() < m_minFitness) {
m_minFitness = m_population[i].getObjective();
}
}
m_avgFitness = (m_sumFitness / m_popSize);
}
/**
* scales the raw (objective) merit of the population members
*/
private void scalePopulation() {
int j;
double a = 0;
double b = 0;
double fmultiple = 2.0;
double delta;
// prescale
if (m_minFitness > ((fmultiple * m_avgFitness - m_maxFitness) /
(fmultiple - 1.0))) {
delta = m_maxFitness - m_avgFitness;
a = ((fmultiple - 1.0) * m_avgFitness / delta);
b = m_avgFitness * (m_maxFitness - fmultiple * m_avgFitness) / delta;
}
else {
delta = m_avgFitness - m_minFitness;
a = m_avgFitness / delta;
b = -m_minFitness * m_avgFitness / delta;
}
// scalepop
m_sumFitness = 0;
for (j=0;j
© 2015 - 2025 Weber Informatics LLC | Privacy Policy