Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* SimpleLogistic.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.functions;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.trees.lmt.LogisticBase;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
/**
* Classifier for building linear logistic regression
* models. LogitBoost with simple regression functions as base learners is used
* for fitting the logistic models. The optimal number of LogitBoost iterations
* to perform is cross-validated, which leads to automatic attribute selection.
* For more information see:
* Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.
*
* Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree
* Induction. In: 9th European Conference on Principles and Practice of
* Knowledge Discovery in Databases, 675-683, 2005.
*
*
* BibTeX:
*
*
* @article{Landwehr2005,
* author = {Niels Landwehr and Mark Hall and Eibe Frank},
* booktitle = {Machine Learning},
* number = {1-2},
* pages = {161-205},
* title = {Logistic Model Trees},
* volume = {95},
* year = {2005}
* }
*
* @inproceedings{Sumner2005,
* author = {Marc Sumner and Eibe Frank and Mark Hall},
* booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases},
* pages = {675-683},
* publisher = {Springer},
* title = {Speeding up Logistic Model Tree Induction},
* year = {2005}
* }
*
*
*
*
* Valid options are:
*
*
*
* -I <iterations>
* Set fixed number of iterations for LogitBoost
*
*
*
* -S
* Use stopping criterion on training set (instead of
* cross-validation)
*
*
*
* -P
* Use error on probabilities (rmse) instead of
* misclassification error for stopping criterion
*
*
*
* -M <iterations>
* Set maximum number of boosting iterations
*
*
*
* -H <iterations>
* Set parameter for heuristic for early stopping of
* LogitBoost.
* If enabled, the minimum is selected greedily, stopping
* if the current minimum has not changed for iter iterations.
* By default, heuristic is enabled with value 50. Set to
* zero to disable heuristic.
*
*
*
* -W <beta>
* Set beta for weight trimming for LogitBoost. Set to 0 for no weight trimming.
*
*
*
* -A
* The AIC is used to choose the best iteration (instead of CV or training error).
*
*
*
* @author Niels Landwehr
* @author Marc Sumner
* @version $Revision: 15519 $
*/
public class SimpleLogistic extends AbstractClassifier implements
OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler,
TechnicalInformationHandler {
/** for serialization */
static final long serialVersionUID = 7397710626304705059L;
/** The actual logistic regression model */
protected LogisticBase m_boostedModel;
/** Filter for converting nominal attributes to binary ones */
protected NominalToBinary m_NominalToBinary = null;
/** Filter for replacing missing values */
protected ReplaceMissingValues m_ReplaceMissingValues = null;
/** If non-negative, use this as fixed number of LogitBoost iterations */
protected int m_numBoostingIterations;
/** Maximum number of iterations for LogitBoost */
protected int m_maxBoostingIterations = 500;
/** Parameter for the heuristic for early stopping of LogitBoost */
protected int m_heuristicStop = 50;
/** If true, cross-validate number of LogitBoost iterations */
protected boolean m_useCrossValidation;
/**
* If true, use minimize error on probabilities instead of misclassification
* error
*/
protected boolean m_errorOnProbabilities;
/**
* Threshold for trimming weights. Instances with a weight lower than this (as
* a percentage of total weights) are not included in the regression fit.
*/
protected double m_weightTrimBeta = 0;
/** If true, the AIC is used to choose the best iteration */
private boolean m_useAIC = false;
/**
* Constructor for creating SimpleLogistic object with standard options.
*/
public SimpleLogistic() {
m_numBoostingIterations = 0;
m_useCrossValidation = true;
m_errorOnProbabilities = false;
m_weightTrimBeta = 0;
m_useAIC = false;
}
/**
* Constructor for creating SimpleLogistic object.
*
* @param numBoostingIterations if non-negative, use this as fixed number of
* iterations for LogitBoost
* @param useCrossValidation cross-validate number of LogitBoost iterations.
* @param errorOnProbabilities minimize error on probabilities instead of
* misclassification error
*/
public SimpleLogistic(int numBoostingIterations, boolean useCrossValidation,
boolean errorOnProbabilities) {
m_numBoostingIterations = numBoostingIterations;
m_useCrossValidation = useCrossValidation;
m_errorOnProbabilities = errorOnProbabilities;
m_weightTrimBeta = 0;
m_useAIC = false;
}
/**
* Main method for testing this class
*
* @param argv commandline options
*/
public static void main(String[] argv) {
runClassifier(new SimpleLogistic(), argv);
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
return result;
}
/**
* Builds the logistic regression using LogitBoost.
*
* @param data the training data
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(data);
// remove instances with missing class
data = new Instances(data);
data.deleteWithMissingClass();
// replace missing values
m_ReplaceMissingValues = new ReplaceMissingValues();
m_ReplaceMissingValues.setInputFormat(data);
data = Filter.useFilter(data, m_ReplaceMissingValues);
// convert nominal attributes
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(data);
data = Filter.useFilter(data, m_NominalToBinary);
// create actual logistic model
m_boostedModel =
new LogisticBase(m_numBoostingIterations, m_useCrossValidation,
m_errorOnProbabilities);
m_boostedModel.setMaxIterations(m_maxBoostingIterations);
m_boostedModel.setHeuristicStop(m_heuristicStop);
m_boostedModel.setWeightTrimBeta(m_weightTrimBeta);
m_boostedModel.setUseAIC(m_useAIC);
m_boostedModel.setNumDecimalPlaces(m_numDecimalPlaces);
// build logistic model
m_boostedModel.buildClassifier(data);
}
/**
* Returns class probabilities for an instance.
*
* @param inst the instance to compute the probabilities for
* @return the probabilities
* @throws Exception if distribution can't be computed successfully
*/
public double[] distributionForInstance(Instance inst) throws Exception {
// replace missing values / convert nominal atts
m_ReplaceMissingValues.input(inst);
inst = m_ReplaceMissingValues.output();
m_NominalToBinary.input(inst);
inst = m_NominalToBinary.output();
// obtain probs from logistic model
return m_boostedModel.distributionForInstance(inst);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration