Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* LMT.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.lmt.LMTNode;
import weka.classifiers.trees.lmt.ResidualModelSelection;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
/**
* Classifier for building 'logistic model trees',
* which are classification trees with logistic regression functions at the
* leaves. The algorithm can deal with binary and multi-class target variables,
* numeric and nominal attributes and missing values.
*
* For more information see:
*
* Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees. Machine
* Learning. 95(1-2):161-205.
*
* Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree
* Induction. In: 9th European Conference on Principles and Practice of
* Knowledge Discovery in Databases, 675-683, 2005.
*
*
*
* BibTeX:
*
*
* @article{Landwehr2005,
* author = {Niels Landwehr and Mark Hall and Eibe Frank},
* journal = {Machine Learning},
* number = {1-2},
* pages = {161-205},
* title = {Logistic Model Trees},
* volume = {95},
* year = {2005}
* }
*
* @inproceedings{Sumner2005,
* author = {Marc Sumner and Eibe Frank and Mark Hall},
* booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases},
* pages = {675-683},
* publisher = {Springer},
* title = {Speeding up Logistic Model Tree Induction},
* year = {2005}
* }
*
* -R
* Split on residuals instead of class values
*
*
*
* -C
* Use cross-validation for boosting at all nodes (i.e., disable heuristic)
*
*
*
* -P
* Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.
*
*
*
* -I <numIterations>
* Set fixed number of iterations for LogitBoost (instead of using cross-validation)
*
*
*
* -M <numInstances>
* Set minimum number of instances at which a node can be split (default 15)
*
*
*
* -W <beta>
* Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.
*
*
*
* -A
* The AIC is used to choose the best iteration.
*
*
*
* -doNotMakeSplitPointActualValue
* Do not make split point actual value.
*
*
*
*
* @author Niels Landwehr
* @author Marc Sumner
* @version $Revision: 15519 $
*/
public class LMT extends AbstractClassifier implements OptionHandler,
AdditionalMeasureProducer, Drawable, TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = -1113212459618104943L;
/** Filter to replace missing values */
protected ReplaceMissingValues m_replaceMissing;
/** Filter to replace nominal attributes */
protected NominalToBinary m_nominalToBinary;
/** root of the logistic model tree */
protected LMTNode m_tree;
/**
* use heuristic that determines the number of LogitBoost iterations only once
* in the beginning?
*/
protected boolean m_fastRegression;
/** convert nominal attributes to binary ? */
protected boolean m_convertNominal;
/** split on residuals? */
protected boolean m_splitOnResiduals;
/**
* use error on probabilties instead of misclassification for stopping
* criterion of LogitBoost?
*/
protected boolean m_errorOnProbabilities;
/** minimum number of instances at which a node is considered for splitting */
protected int m_minNumInstances;
/** if non-zero, use fixed number of iterations for LogitBoost */
protected int m_numBoostingIterations;
/**
* Threshold for trimming weights. Instances with a weight lower than this (as
* a percentage of total weights) are not included in the regression fit.
**/
protected double m_weightTrimBeta;
/** If true, the AIC is used to choose the best LogitBoost iteration */
private boolean m_useAIC = false;
/** Do not relocate split point to actual data value */
private boolean m_doNotMakeSplitPointActualValue;
/**
* Creates an instance of LMT with standard options
*/
public LMT() {
m_fastRegression = true;
m_numBoostingIterations = -1;
m_minNumInstances = 15;
m_weightTrimBeta = 0;
m_useAIC = false;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
return result;
}
/**
* Builds the classifier.
*
* @param data the data to train with
* @throws Exception if classifier can't be built successfully
*/
@Override
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
Instances filteredData = new Instances(data);
filteredData.deleteWithMissingClass();
// replace missing values
m_replaceMissing = new ReplaceMissingValues();
m_replaceMissing.setInputFormat(filteredData);
filteredData = Filter.useFilter(filteredData, m_replaceMissing);
// possibly convert nominal attributes globally
m_nominalToBinary = new NominalToBinary();
m_nominalToBinary.setInputFormat(filteredData);
if (m_convertNominal) {
filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
}
int minNumInstances = 2;
// create ModelSelection object, either for splits on the residuals or for
// splits on the class value
ModelSelection modSelection;
if (m_splitOnResiduals) {
modSelection = new ResidualModelSelection(minNumInstances);
} else {
modSelection = new C45ModelSelection(minNumInstances, filteredData, true,
m_doNotMakeSplitPointActualValue);
}
// create tree root
m_tree = new LMTNode(modSelection, m_numBoostingIterations,
m_fastRegression, m_errorOnProbabilities, m_minNumInstances,
m_weightTrimBeta, m_useAIC, m_nominalToBinary, m_numDecimalPlaces);
// build tree
m_tree.buildClassifier(filteredData);
if (modSelection instanceof C45ModelSelection) {
((C45ModelSelection) modSelection).cleanup();
}
}
/**
* Returns class probabilities for an instance.
*
* @param instance the instance to compute the distribution for
* @return the class probabilities
* @throws Exception if distribution can't be computed successfully
*/
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
// replace missing values
m_replaceMissing.input(instance);
instance = m_replaceMissing.output();
// possibly convert nominal attributes
if (m_convertNominal) {
m_nominalToBinary.input(instance);
instance = m_nominalToBinary.output();
}
return m_tree.distributionForInstance(instance);
}
/**
* Classifies an instance.
*
* @param instance the instance to classify
* @return the classification
* @throws Exception if instance can't be classified successfully
*/
@Override
public double classifyInstance(Instance instance) throws Exception {
double maxProb = -1;
int maxIndex = 0;
// classify by maximum probability
double[] probs = distributionForInstance(instance);
for (int j = 0; j < instance.numClasses(); j++) {
if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
}
}
return maxIndex;
}
/**
* Returns a description of the classifier.
*
* @return a string representation of the classifier
*/
@Override
public String toString() {
if (m_tree != null) {
return "Logistic model tree \n------------------\n" + m_tree.toString();
} else {
return "No tree build";
}
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration