
weka.classifiers.rules.PART Maven / Gradle / Ivy
Show all versions of weka-dev Show documentation
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* PART.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.rules;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.rules.part.MakeDecList;
import weka.classifiers.trees.j48.BinC45ModelSelection;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.ModelSelection;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
/**
* Class for generating a PART decision list. Uses
* separate-and-conquer. Builds a partial C4.5 decision tree in each iteration
* and makes the "best" leaf into a rule.
*
* For more information, see:
*
* Eibe Frank, Ian H. Witten: Generating Accurate Rule Sets Without Global
* Optimization. In: Fifteenth International Conference on Machine Learning,
* 144-151, 1998.
*
*
*
* BibTeX:
*
*
* @inproceedings{Frank1998,
* author = {Eibe Frank and Ian H. Witten},
* booktitle = {Fifteenth International Conference on Machine Learning},
* editor = {J. Shavlik},
* pages = {144-151},
* publisher = {Morgan Kaufmann},
* title = {Generating Accurate Rule Sets Without Global Optimization},
* year = {1998},
* PS = {http://www.cs.waikato.ac.nz/\~eibe/pubs/ML98-57.ps.gz}
* }
*
*
*
*
* Valid options are:
*
*
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
*
* * -M <minimum number of objects>
* Set minimum number of objects per leaf.
* (default 2)
*
*
* * -R
* Use reduced error pruning.
*
*
* * -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)
*
*
* * -B
* Use binary splits only.
*
*
* * -U
* Generate unpruned decision list.
*
*
* * -J
* Do not use MDL correction for info gain on numeric attributes.
*
*
* * -Q <seed>
* Seed for random data shuffling (default 1).
*
*
* * -doNotMakeSplitPointActualValue
* Do not make split point actual value.
*
*
*
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 11004 $
*/
public class PART extends AbstractClassifier implements OptionHandler,
WeightedInstancesHandler, Summarizable, AdditionalMeasureProducer,
TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 8121455039782598361L;
/** The decision list */
private MakeDecList m_root;
/** Confidence level */
private float m_CF = 0.25f;
/** Minimum number of objects */
private int m_minNumObj = 2;
/** Use MDL correction? */
private boolean m_useMDLcorrection = true;
/** Use reduced error pruning? */
private boolean m_reducedErrorPruning = false;
/** Number of folds for reduced error pruning. */
private int m_numFolds = 3;
/** Binary splits on nominal attributes? */
private boolean m_binarySplits = false;
/** Generate unpruned list? */
private boolean m_unpruned = false;
/** The seed for random number generation. */
private int m_Seed = 1;
/** Do not relocate split point to actual data value */
private boolean m_doNotMakeSplitPointActualValue;
/**
* Returns a string describing classifier
*
* @return a description suitable for displaying in the explorer/experimenter
* gui
*/
public String globalInfo() {
return "Class for generating a PART decision list. Uses "
+ "separate-and-conquer. Builds a partial C4.5 decision tree "
+ "in each iteration and makes the \"best\" leaf into a rule.\n\n"
+ "For more information, see:\n\n" + getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR, "Eibe Frank and Ian H. Witten");
result.setValue(Field.TITLE,
"Generating Accurate Rule Sets Without Global Optimization");
result.setValue(Field.BOOKTITLE,
"Fifteenth International Conference on Machine Learning");
result.setValue(Field.EDITOR, "J. Shavlik");
result.setValue(Field.YEAR, "1998");
result.setValue(Field.PAGES, "144-151");
result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
result.setValue(Field.PS,
"http://www.cs.waikato.ac.nz/~eibe/pubs/ML98-57.ps.gz");
return result;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
@Override
public Capabilities getCapabilities() {
Capabilities result;
result = new Capabilities(this);
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
// instances
result.setMinimumNumberInstances(0);
return result;
}
/**
* Generates the classifier.
*
* @param instances the data to train with
* @throws Exception if classifier can't be built successfully
*/
@Override
public void buildClassifier(Instances instances) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(instances);
// remove instances with missing class
instances = new Instances(instances);
instances.deleteWithMissingClass();
ModelSelection modSelection;
if (m_binarySplits) {
modSelection = new BinC45ModelSelection(m_minNumObj, instances,
m_useMDLcorrection, m_doNotMakeSplitPointActualValue);
} else {
modSelection = new C45ModelSelection(m_minNumObj, instances,
m_useMDLcorrection, m_doNotMakeSplitPointActualValue);
}
if (m_unpruned) {
m_root = new MakeDecList(modSelection, m_minNumObj);
} else if (m_reducedErrorPruning) {
m_root = new MakeDecList(modSelection, m_numFolds, m_minNumObj, m_Seed);
} else {
m_root = new MakeDecList(modSelection, m_CF, m_minNumObj);
}
m_root.buildClassifier(instances);
if (m_binarySplits) {
((BinC45ModelSelection) modSelection).cleanup();
} else {
((C45ModelSelection) modSelection).cleanup();
}
}
/**
* Classifies an instance.
*
* @param instance the instance to classify
* @return the classification
* @throws Exception if instance can't be classified successfully
*/
@Override
public double classifyInstance(Instance instance) throws Exception {
return m_root.classifyInstance(instance);
}
/**
* Returns class probabilities for an instance.
*
* @param instance the instance to get the distribution for
* @return the class probabilities
* @throws Exception if the distribution can't be computed successfully
*/
@Override
public final double[] distributionForInstance(Instance instance)
throws Exception {
return m_root.distributionForInstance(instance);
}
/**
* Returns an enumeration describing the available options.
*
* Valid options are:
*
*
* -C confidence
* Set confidence threshold for pruning. (Default: 0.25)
*
*
* -M number
* Set minimum number of instances per leaf. (Default: 2)
*
*
* -R
* Use reduced error pruning.
*
*
* -N number
* Set number of folds for reduced error pruning. One fold is used as the
* pruning set. (Default: 3)
*
*
* -B
* Use binary splits for nominal attributes.
*
*
* -U
* Generate unpruned decision list.
*
*
* -Q
* The seed for reduced-error pruning.
*
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration
*
* Valid options are:
*
*
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
*
* * -M <minimum number of objects>
* Set minimum number of objects per leaf.
* (default 2)
*
*
* * -R
* Use reduced error pruning.
*
*
* * -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)
*
*
* * -B
* Use binary splits only.
*
*
* * -U
* Generate unpruned decision list.
*
*
* * -J
* Do not use MDL correction for info gain on numeric attributes.
*
*
* * -Q <seed>
* Seed for random data shuffling (default 1).
*
*
* * -doNotMakeSplitPointActualValue
* Do not make split point actual value.
*
*
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
@Override
public void setOptions(String[] options) throws Exception {
// Pruning options
m_unpruned = Utils.getFlag('U', options);
m_reducedErrorPruning = Utils.getFlag('R', options);
m_binarySplits = Utils.getFlag('B', options);
m_useMDLcorrection = !Utils.getFlag('J', options);
m_doNotMakeSplitPointActualValue = Utils.getFlag(
"doNotMakeSplitPointActualValue", options);
String confidenceString = Utils.getOption('C', options);
if (confidenceString.length() != 0) {
if (m_reducedErrorPruning) {
throw new Exception("Setting CF doesn't make sense "
+ "for reduced error pruning.");
} else {
m_CF = (new Float(confidenceString)).floatValue();
if ((m_CF <= 0) || (m_CF >= 1)) {
throw new Exception(
"CF has to be greater than zero and smaller than one!");
}
}
} else {
m_CF = 0.25f;
}
String numFoldsString = Utils.getOption('N', options);
if (numFoldsString.length() != 0) {
if (!m_reducedErrorPruning) {
throw new Exception("Setting the number of folds"
+ " does only make sense for" + " reduced error pruning.");
} else {
m_numFolds = Integer.parseInt(numFoldsString);
}
} else {
m_numFolds = 3;
}
// Other options
String minNumString = Utils.getOption('M', options);
if (minNumString.length() != 0) {
m_minNumObj = Integer.parseInt(minNumString);
} else {
m_minNumObj = 2;
}
String seedString = Utils.getOption('Q', options);
if (seedString.length() != 0) {
m_Seed = Integer.parseInt(seedString);
} else {
m_Seed = 1;
}
super.setOptions(options);
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
@Override
public String[] getOptions() {
Vector options = new Vector(13);
if (m_unpruned) {
options.add("-U");
}
if (m_reducedErrorPruning) {
options.add("-R");
}
if (m_binarySplits) {
options.add("-B");
}
options.add("-M");
options.add("" + m_minNumObj);
if (!m_reducedErrorPruning) {
options.add("-C");
options.add("" + m_CF);
}
if (m_reducedErrorPruning) {
options.add("-N");
options.add("" + m_numFolds);
}
options.add("-Q");
options.add("" + m_Seed);
if (!m_useMDLcorrection) {
options.add("-J");
}
if (m_doNotMakeSplitPointActualValue) {
options.add("-doNotMakeSplitPointActualValue");
}
Collections.addAll(options, super.getOptions());
return options.toArray(new String[0]);
}
/**
* Returns a description of the classifier
*
* @return a string representation of the classifier
*/
@Override
public String toString() {
if (m_root == null) {
return "No classifier built";
}
return "PART decision list\n------------------\n\n" + m_root.toString();
}
/**
* Returns a superconcise version of the model
*
* @return a concise version of the model
*/
@Override
public String toSummaryString() {
return "Number of rules: " + m_root.numRules() + "\n";
}
/**
* Return the number of rules.
*
* @return the number of rules
*/
public double measureNumRules() {
return m_root.numRules();
}
/**
* Returns an enumeration of the additional measure names
*
* @return an enumeration of the measure names
*/
@Override
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(1);
newVector.addElement("measureNumRules");
return newVector.elements();
}
/**
* Returns the value of the named measure
*
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
@Override
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) {
return measureNumRules();
} else {
throw new IllegalArgumentException(additionalMeasureName
+ " not supported (PART)");
}
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String confidenceFactorTipText() {
return "The confidence factor used for pruning (smaller values incur "
+ "more pruning).";
}
/**
* Get the value of CF.
*
* @return Value of CF.
*/
public float getConfidenceFactor() {
return m_CF;
}
/**
* Set the value of CF.
*
* @param v Value to assign to CF.
*/
public void setConfidenceFactor(float v) {
m_CF = v;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String minNumObjTipText() {
return "The minimum number of instances per rule.";
}
/**
* Get the value of minNumObj.
*
* @return Value of minNumObj.
*/
public int getMinNumObj() {
return m_minNumObj;
}
/**
* Set the value of minNumObj.
*
* @param v Value to assign to minNumObj.
*/
public void setMinNumObj(int v) {
m_minNumObj = v;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String reducedErrorPruningTipText() {
return "Whether reduced-error pruning is used instead of C.4.5 pruning.";
}
/**
* Get the value of reducedErrorPruning.
*
* @return Value of reducedErrorPruning.
*/
public boolean getReducedErrorPruning() {
return m_reducedErrorPruning;
}
/**
* Set the value of reducedErrorPruning.
*
* @param v Value to assign to reducedErrorPruning.
*/
public void setReducedErrorPruning(boolean v) {
m_reducedErrorPruning = v;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String unprunedTipText() {
return "Whether pruning is performed.";
}
/**
* Get the value of unpruned.
*
* @return Value of unpruned.
*/
public boolean getUnpruned() {
return m_unpruned;
}
/**
* Set the value of unpruned.
*
* @param newunpruned Value to assign to unpruned.
*/
public void setUnpruned(boolean newunpruned) {
m_unpruned = newunpruned;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String useMDLcorrectionTipText() {
return "Whether MDL correction is used when finding splits on numeric attributes.";
}
/**
* Get the value of useMDLcorrection.
*
* @return Value of useMDLcorrection.
*/
public boolean getUseMDLcorrection() {
return m_useMDLcorrection;
}
/**
* Set the value of useMDLcorrection.
*
* @param newuseMDLcorrection Value to assign to useMDLcorrection.
*/
public void setUseMDLcorrection(boolean newuseMDLcorrection) {
m_useMDLcorrection = newuseMDLcorrection;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String numFoldsTipText() {
return "Determines the amount of data used for reduced-error pruning. "
+ " One fold is used for pruning, the rest for growing the rules.";
}
/**
* Get the value of numFolds.
*
* @return Value of numFolds.
*/
public int getNumFolds() {
return m_numFolds;
}
/**
* Set the value of numFolds.
*
* @param v Value to assign to numFolds.
*/
public void setNumFolds(int v) {
m_numFolds = v;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String seedTipText() {
return "The seed used for randomizing the data "
+ "when reduced-error pruning is used.";
}
/**
* Get the value of Seed.
*
* @return Value of Seed.
*/
public int getSeed() {
return m_Seed;
}
/**
* Set the value of Seed.
*
* @param newSeed Value to assign to Seed.
*/
public void setSeed(int newSeed) {
m_Seed = newSeed;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String binarySplitsTipText() {
return "Whether to use binary splits on nominal attributes when "
+ "building the partial trees.";
}
/**
* Get the value of binarySplits.
*
* @return Value of binarySplits.
*/
public boolean getBinarySplits() {
return m_binarySplits;
}
/**
* Set the value of binarySplits.
*
* @param v Value to assign to binarySplits.
*/
public void setBinarySplits(boolean v) {
m_binarySplits = v;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String doNotMakeSplitPointActualValueTipText() {
return "If true, the split point is not relocated to an actual data value."
+ " This can yield substantial speed-ups for large datasets with numeric attributes.";
}
/**
* Gets the value of doNotMakeSplitPointActualValue.
*
* @return the value
*/
public boolean getDoNotMakeSplitPointActualValue() {
return m_doNotMakeSplitPointActualValue;
}
/**
* Sets the value of doNotMakeSplitPointActualValue.
*
* @param m_doNotMakeSplitPointActualValue the value to set
*/
public void setDoNotMakeSplitPointActualValue(
boolean m_doNotMakeSplitPointActualValue) {
this.m_doNotMakeSplitPointActualValue = m_doNotMakeSplitPointActualValue;
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 11004 $");
}
/**
* Main method for testing this class.
*
* @param argv command line options
*/
public static void main(String[] argv) {
runClassifier(new PART(), argv);
}
}