All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.functions.SimpleLogistic Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    SimpleLogistic.java
 *    Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.functions;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.trees.lmt.LogisticBase;
import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/**
 
 * Classifier for building linear logistic regression
 * models. LogitBoost with simple regression functions as base learners is used
 * for fitting the logistic models. The optimal number of LogitBoost iterations
 * to perform is cross-validated, which leads to automatic attribute selection.
 * For more information see:
* Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.
*
* Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree * Induction. In: 9th European Conference on Principles and Practice of * Knowledge Discovery in Databases, 675-683, 2005. *

* * BibTeX: * *

 * @article{Landwehr2005,
 *    author = {Niels Landwehr and Mark Hall and Eibe Frank},
 *    booktitle = {Machine Learning},
 *    number = {1-2},
 *    pages = {161-205},
 *    title = {Logistic Model Trees},
 *    volume = {95},
 *    year = {2005}
 * }
 * 
 * @inproceedings{Sumner2005,
 *    author = {Marc Sumner and Eibe Frank and Mark Hall},
 *    booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases},
 *    pages = {675-683},
 *    publisher = {Springer},
 *    title = {Speeding up Logistic Model Tree Induction},
 *    year = {2005}
 * }
 * 
*

* * * Valid options are: *

* *

 * -I <iterations>
 *  Set fixed number of iterations for LogitBoost
 * 
* *
 * -S
 *  Use stopping criterion on training set (instead of
 *  cross-validation)
 * 
* *
 * -P
 *  Use error on probabilities (rmse) instead of
 *  misclassification error for stopping criterion
 * 
* *
 * -M <iterations>
 *  Set maximum number of boosting iterations
 * 
* *
 * -H <iterations>
 *  Set parameter for heuristic for early stopping of
 *  LogitBoost.
 *  If enabled, the minimum is selected greedily, stopping
 *  if the current minimum has not changed for iter iterations.
 *  By default, heuristic is enabled with value 50. Set to
 *  zero to disable heuristic.
 * 
* *
 * -W <beta>
 *  Set beta for weight trimming for LogitBoost. Set to 0 for no weight trimming.
 * 
* *
 * -A
 *  The AIC is used to choose the best iteration (instead of CV or training error).
 * 
* * * @author Niels Landwehr * @author Marc Sumner * @version $Revision: 11568 $ */ public class SimpleLogistic extends AbstractClassifier implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 7397710626304705059L; /** The actual logistic regression model */ protected LogisticBase m_boostedModel; /** Filter for converting nominal attributes to binary ones */ protected NominalToBinary m_NominalToBinary = null; /** Filter for replacing missing values */ protected ReplaceMissingValues m_ReplaceMissingValues = null; /** If non-negative, use this as fixed number of LogitBoost iterations */ protected int m_numBoostingIterations; /** Maximum number of iterations for LogitBoost */ protected int m_maxBoostingIterations = 500; /** Parameter for the heuristic for early stopping of LogitBoost */ protected int m_heuristicStop = 50; /** If true, cross-validate number of LogitBoost iterations */ protected boolean m_useCrossValidation; /** * If true, use minimize error on probabilities instead of misclassification * error */ protected boolean m_errorOnProbabilities; /** * Threshold for trimming weights. Instances with a weight lower than this (as * a percentage of total weights) are not included in the regression fit. */ protected double m_weightTrimBeta = 0; /** If true, the AIC is used to choose the best iteration */ private boolean m_useAIC = false; /** * Constructor for creating SimpleLogistic object with standard options. */ public SimpleLogistic() { m_numBoostingIterations = 0; m_useCrossValidation = true; m_errorOnProbabilities = false; m_weightTrimBeta = 0; m_useAIC = false; } /** * Constructor for creating SimpleLogistic object. * * @param numBoostingIterations if non-negative, use this as fixed number of * iterations for LogitBoost * @param useCrossValidation cross-validate number of LogitBoost iterations. * @param errorOnProbabilities minimize error on probabilities instead of * misclassification error */ public SimpleLogistic(int numBoostingIterations, boolean useCrossValidation, boolean errorOnProbabilities) { m_numBoostingIterations = numBoostingIterations; m_useCrossValidation = useCrossValidation; m_errorOnProbabilities = errorOnProbabilities; m_weightTrimBeta = 0; m_useAIC = false; } /** * Main method for testing this class * * @param argv commandline options */ public static void main(String[] argv) { runClassifier(new SimpleLogistic(), argv); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Builds the logistic regression using LogitBoost. * * @param data the training data * @throws Exception if something goes wrong */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // convert nominal attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); // create actual logistic model m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities); m_boostedModel.setMaxIterations(m_maxBoostingIterations); m_boostedModel.setHeuristicStop(m_heuristicStop); m_boostedModel.setWeightTrimBeta(m_weightTrimBeta); m_boostedModel.setUseAIC(m_useAIC); m_boostedModel.setNumDecimalPlaces(m_numDecimalPlaces); // build logistic model m_boostedModel.buildClassifier(data); } /** * Returns class probabilities for an instance. * * @param inst the instance to compute the probabilities for * @return the probabilities * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance inst) throws Exception { // replace missing values / convert nominal atts m_ReplaceMissingValues.input(inst); inst = m_ReplaceMissingValues.output(); m_NominalToBinary.input(inst); inst = m_NominalToBinary.output(); // obtain probs from logistic model return m_boostedModel.distributionForInstance(inst); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy