weka.classifiers.trees.FT Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* FT.java
* Copyright (C) 2007 University of Porto, Porto, Portugal
*
*/
package weka.classifiers.trees;
import weka.classifiers.Classifier;
import weka.classifiers.trees.ft.FTInnerNode;
import weka.classifiers.trees.ft.FTLeavesNode;
import weka.classifiers.trees.ft.FTNode;
import weka.classifiers.trees.ft.FTtree;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import java.util.Enumeration;
import java.util.Vector;
/**
* Classifier for building 'Functional trees', which are classification trees that could have logistic regression functions at the inner nodes and/or leaves. The algorithm can deal with binary and multi-class target variables, numeric and nominal attributes and missing values.
*
* For more information see:
*
* Joao Gama (2004). Functional Trees.
*
* Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.
*
*
* BibTeX:
*
* @article{Gama2004,
* author = {Joao Gama},
* booktitle = {Machine Learning},
* number = {3},
* pages = {219-250},
* title = {Functional Trees},
* volume = {55},
* year = {2004}
* }
*
* @article{Landwehr2005,
* author = {Niels Landwehr and Mark Hall and Eibe Frank},
* booktitle = {Machine Learning},
* number = {1-2},
* pages = {161-205},
* title = {Logistic Model Trees},
* volume = {95},
* year = {2005}
* }
*
*
*
* Valid options are:
*
* -B
* Binary splits (convert nominal attributes to binary ones)
*
* -P
* Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.
*
* -I <numIterations>
* Set fixed number of iterations for LogitBoost (instead of using cross-validation)
*
* -F <modelType>
* Set Funtional Tree type to be generate: 0 for FT, 1 for FTLeaves and 2 for FTInner
*
* -M <numInstances>
* Set minimum number of instances at which a node can be split (default 15)
*
* -W <beta>
* Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.
*
* -A
* The AIC is used to choose the best iteration.
*
*
* @author Jo\~{a}o Gama
* @author Carlos Ferreira
* @version $Revision: 5535 $
*/
public class FT
extends Classifier
implements OptionHandler, AdditionalMeasureProducer, Drawable,
TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = -1113212459618105000L;
/** Filter to replace missing values*/
protected ReplaceMissingValues m_replaceMissing;
/** Filter to replace nominal attributes*/
protected NominalToBinary m_nominalToBinary;
/** root of the logistic model tree*/
protected FTtree m_tree;
/** convert nominal attributes to binary ?*/
protected boolean m_convertNominal;
/**use error on probabilties instead of misclassification for stopping criterion of LogitBoost?*/
protected boolean m_errorOnProbabilities;
/**minimum number of instances at which a node is considered for splitting*/
protected int m_minNumInstances;
/**if non-zero, use fixed number of iterations for LogitBoost*/
protected int m_numBoostingIterations;
/**Model Type, value: 0 is FT, 1 is FTLeaves, 2 is FTInner*/
protected int m_modelType;
/**Threshold for trimming weights. Instances with a weight lower than this (as a percentage
* of total weights) are not included in the regression fit.
**/
protected double m_weightTrimBeta;
/** If true, the AIC is used to choose the best LogitBoost iteration*/
protected boolean m_useAIC ;
/** model types */
public static final int MODEL_FT = 0;
public static final int MODEL_FTLeaves = 1;
public static final int MODEL_FTInner = 2;
/** possible model types. */
public static final Tag [] TAGS_MODEL = {
new Tag(MODEL_FT, "FT"),
new Tag(MODEL_FTLeaves, "FTLeaves"),
new Tag(MODEL_FTInner, "FTInner")
};
/**
* Creates an instance of FT with standard options
*/
public FT() {
m_numBoostingIterations=15;
m_minNumInstances = 15;
m_weightTrimBeta = 0;
m_useAIC = false;
m_modelType=0;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
return result;
}
/**
* Builds the classifier.
*
* @param data the data to train with
* @throws Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances data) throws Exception{
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
Instances filteredData = new Instances(data);
filteredData.deleteWithMissingClass();
//replace missing values
m_replaceMissing = new ReplaceMissingValues();
m_replaceMissing.setInputFormat(filteredData);
filteredData = Filter.useFilter(filteredData, m_replaceMissing);
//possibly convert nominal attributes globally
if (m_convertNominal) {
m_nominalToBinary = new NominalToBinary();
m_nominalToBinary.setInputFormat(filteredData);
filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
}
int minNumInstances = 2;
//create a FT tree root
if (m_modelType==0)
m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances,
m_weightTrimBeta, m_useAIC);
//create a FTLeaves tree root
if (m_modelType==1){
m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances,
m_weightTrimBeta, m_useAIC);
}
//create a FTInner tree root
if (m_modelType==2)
m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances,
m_weightTrimBeta, m_useAIC);
//build tree
m_tree.buildClassifier(filteredData);
// prune tree
m_tree.prune();
m_tree.assignIDs(0);
m_tree.cleanup();
}
/**
* Returns class probabilities for an instance.
*
* @param instance the instance to compute the distribution for
* @return the class probabilities
* @throws Exception if distribution can't be computed successfully
*/
public double [] distributionForInstance(Instance instance) throws Exception {
//replace missing values
m_replaceMissing.input(instance);
instance = m_replaceMissing.output();
//possibly convert nominal attributes
if (m_convertNominal) {
m_nominalToBinary.input(instance);
instance = m_nominalToBinary.output();
}
return m_tree.distributionForInstance(instance);
}
/**
* Classifies an instance.
*
* @param instance the instance to classify
* @return the classification
* @throws Exception if instance can't be classified successfully
*/
public double classifyInstance(Instance instance) throws Exception {
double maxProb = -1;
int maxIndex = 0;
//classify by maximum probability
double[] probs = distributionForInstance(instance);
for (int j = 0; j < instance.numClasses(); j++) {
if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
}
}
return (double)maxIndex;
}
/**
* Returns a description of the classifier.
*
* @return a string representation of the classifier
*/
public String toString() {
if (m_tree!=null) {
if (m_modelType==0)
return "FT tree \n------------------\n" + m_tree.toString();
else {
if (m_modelType==1)
return "FT Leaves tree \n------------------\n" + m_tree.toString();
else
return "FT Inner tree \n------------------\n" + m_tree.toString();
}
}else{
return "No tree built";
}
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(8);
newVector.addElement(new Option("\tBinary splits (convert nominal attributes to binary ones) ",
"B", 0, "-B"));
newVector.addElement(new Option("\tUse error on probabilities instead of misclassification error "+
"for stopping criterion of LogitBoost.",
"P", 0, "-P"));
newVector.addElement(new Option("\tSet fixed number of iterations for LogitBoost (instead of using "+
"cross-validation)",
"I",1,"-I "));
newVector.addElement(new Option("\tSet Funtional Tree type to be generate: "+
" 0 for FT, 1 for FTLeaves and 2 for FTInner",
"F",1,"-F "));
newVector.addElement(new Option("\tSet minimum number of instances at which a node can be split (default 15)",
"M",1,"-M "));
newVector.addElement(new Option("\tSet beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.",
"W",1,"-W "));
newVector.addElement(new Option("\tThe AIC is used to choose the best iteration.",
"A", 0, "-A"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:
*
* -B
* Binary splits (convert nominal attributes to binary ones)
*
* -P
* Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.
*
* -I <numIterations>
* Set fixed number of iterations for LogitBoost (instead of using cross-validation)
*
* -F <modelType>
* Set Funtional Tree type to be generate: 0 for FT, 1 for FTLeaves and 2 for FTInner
*
* -M <numInstances>
* Set minimum number of instances at which a node can be split (default 15)
*
* -W <beta>
* Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.
*
* -A
* The AIC is used to choose the best iteration.
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setBinSplit(Utils.getFlag('B', options));
setErrorOnProbabilities(Utils.getFlag('P', options));
String optionString = Utils.getOption('I', options);
if (optionString.length() != 0) {
setNumBoostingIterations((new Integer(optionString)).intValue());
}
optionString = Utils.getOption('F', options);
if (optionString.length() != 0) {
setModelType(new SelectedTag(Integer.parseInt(optionString), TAGS_MODEL));
// setModelType((new Integer(optionString)).intValue());
}
optionString = Utils.getOption('M', options);
if (optionString.length() != 0) {
setMinNumInstances((new Integer(optionString)).intValue());
}
optionString = Utils.getOption('W', options);
if (optionString.length() != 0) {
setWeightTrimBeta((new Double(optionString)).doubleValue());
}
setUseAIC(Utils.getFlag('A', options));
Utils.checkForRemainingOptions(options);
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
String[] options = new String[11];
int current = 0;
if (getBinSplit()) {
options[current++] = "-B";
}
if (getErrorOnProbabilities()) {
options[current++] = "-P";
}
options[current++] = "-I";
options[current++] = ""+getNumBoostingIterations();
options[current++] = "-F";
// options[current++] = ""+getModelType();
options[current++] = ""+getModelType().getSelectedTag().getID();
options[current++] = "-M";
options[current++] = ""+getMinNumInstances();
options[current++] = "-W";
options[current++] = ""+getWeightTrimBeta();
if (getUseAIC()) {
options[current++] = "-A";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Get the value of weightTrimBeta.
*/
public double getWeightTrimBeta(){
return m_weightTrimBeta;
}
/**
* Get the value of useAIC.
*
* @return Value of useAIC.
*/
public boolean getUseAIC(){
return m_useAIC;
}
/**
* Set the value of weightTrimBeta.
*/
public void setWeightTrimBeta(double n){
m_weightTrimBeta = n;
}
/**
* Set the value of useAIC.
*
* @param c Value to assign to useAIC.
*/
public void setUseAIC(boolean c){
m_useAIC = c;
}
/**
* Get the value of binarySplits.
*
* @return Value of binarySplits.
*/
public boolean getBinSplit(){
return m_convertNominal;
}
/**
* Get the value of errorOnProbabilities.
*
* @return Value of errorOnProbabilities.
*/
public boolean getErrorOnProbabilities(){
return m_errorOnProbabilities;
}
/**
* Get the value of numBoostingIterations.
*
* @return Value of numBoostingIterations.
*/
public int getNumBoostingIterations(){
return m_numBoostingIterations;
}
/**
* Get the type of functional tree model being used.
*
* @return the type of functional tree model.
*/
public SelectedTag getModelType() {
return new SelectedTag(m_modelType, TAGS_MODEL);
}
/**
* Set the Functional Tree type.
*
* @param c Value corresponding to tree type.
*/
public void setModelType(SelectedTag newMethod){
if (newMethod.getTags() == TAGS_MODEL) {
int c = newMethod.getSelectedTag().getID();
if (c==0 || c==1 || c==2) {
m_modelType = c;
} else {
throw new IllegalArgumentException("Wrong model type, -F value should be: 0, for FT, 1, " +
"for FTLeaves, and 2, for FTInner ");
}
}
}
/**
* Get the value of minNumInstances.
*
* @return Value of minNumInstances.
*/
public int getMinNumInstances(){
return m_minNumInstances;
}
/**
* Set the value of binarySplits.
*
* @param c Value to assign to binarySplits.
*/
public void setBinSplit(boolean c){
m_convertNominal=c;
}
/**
* Set the value of errorOnProbabilities.
*
* @param c Value to assign to errorOnProbabilities.
*/
public void setErrorOnProbabilities(boolean c){
m_errorOnProbabilities = c;
}
/**
* Set the value of numBoostingIterations.
*
* @param c Value to assign to numBoostingIterations.
*/
public void setNumBoostingIterations(int c){
m_numBoostingIterations = c;
}
/**
* Set the value of minNumInstances.
*
* @param c Value to assign to minNumInstances.
*/
public void setMinNumInstances(int c){
m_minNumInstances = c;
}
/**
* Returns the type of graph this classifier
* represents.
* @return Drawable.TREE
*/
public int graphType() {
return Drawable.TREE;
}
/**
* Returns graph describing the tree.
*
* @return the graph describing the tree
* @throws Exception if graph can't be computed
*/
public String graph() throws Exception {
return m_tree.graph();
}
/**
* Returns the size of the tree
* @return the size of the tree
*/
public int measureTreeSize(){
return m_tree.numNodes();
}
/**
* Returns the number of leaves in the tree
* @return the number of leaves in the tree
*/
public int measureNumLeaves(){
return m_tree.numLeaves();
}
/**
* Returns an enumeration of the additional measure names
* @return an enumeration of the measure names
*/
public Enumeration enumerateMeasures() {
Vector newVector = new Vector(2);
newVector.addElement("measureTreeSize");
newVector.addElement("measureNumLeaves");
return newVector.elements();
}
/**
* Returns the value of the named measure
* @param additionalMeasureName the name of the measure to query for its value
* @return the value of the named measure
* @throws IllegalArgumentException if the named measure is not supported
*/
public double getMeasure(String additionalMeasureName) {
if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) {
return measureTreeSize();
} else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) {
return measureNumLeaves();
} else {
throw new IllegalArgumentException(additionalMeasureName
+ " not supported (FT)");
}
}
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Classifier for building 'Functional trees', which are classification trees that could have "
+"logistic regression functions at the inner nodes and/or leaves. The algorithm can deal with "
+"binary and multi-class target variables, numeric and nominal attributes and missing values.\n\n"
+"For more information see: \n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing
* detailed information about the technical background of this class,
* e.g., paper reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
TechnicalInformation additional;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "Joao Gama");
result.setValue(Field.TITLE, "Functional Trees");
result.setValue(Field.BOOKTITLE, "Machine Learning");
result.setValue(Field.YEAR, "2004");
result.setValue(Field.VOLUME, "55");
result.setValue(Field.PAGES, "219-250");
result.setValue(Field.NUMBER, "3");
additional = result.add(Type.ARTICLE);
additional.setValue(Field.AUTHOR, "Niels Landwehr and Mark Hall and Eibe Frank");
additional.setValue(Field.TITLE, "Logistic Model Trees");
additional.setValue(Field.BOOKTITLE, "Machine Learning");
additional.setValue(Field.YEAR, "2005");
additional.setValue(Field.VOLUME, "95");
additional.setValue(Field.PAGES, "161-205");
additional.setValue(Field.NUMBER, "1-2");
return result;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String modelTypeTipText() {
return "The type of FT model. 0, for FT, 1, " +
"for FTLeaves, and 2, for FTInner";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String binSplitTipText() {
return "Convert all nominal attributes to binary ones before building the tree. "
+"This means that all splits in the final tree will be binary.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String errorOnProbabilitiesTipText() {
return "Minimize error on probabilities instead of misclassification error when cross-validating the number "
+"of LogitBoost iterations. When set, the number of LogitBoost iterations is chosen that minimizes "
+"the root mean squared error instead of the misclassification error.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numBoostingIterationsTipText() {
return "Set a fixed number of iterations for LogitBoost. If >= 0, this sets a fixed number of LogitBoost "
+"iterations that is used everywhere in the tree. If < 0, the number is cross-validated.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minNumInstancesTipText() {
return "Set the minimum number of instances at which a node is considered for splitting. "
+"The default value is 15.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String weightTrimBetaTipText() {
return "Set the beta value used for weight trimming in LogitBoost. "
+"Only instances carrying (1 - beta)% of the weight from previous iteration "
+"are used in the next iteration. Set to 0 for no weight trimming. "
+"The default value is 0.";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String useAICTipText() {
return "The AIC is used to determine when to stop LogitBoost iterations. "
+"The default is not to use AIC.";
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5535 $");
}
/**
* Main method for testing this class
*
* @param argv the commandline options
*/
public static void main (String [] argv) {
runClassifier(new FT(), argv);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy