All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.functions.SimpleLogistic Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    SimpleLogistic.java
 *    Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.functions;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.trees.lmt.LogisticBase;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/**
 
 * Classifier for building linear logistic regression models. LogitBoost with simple regression functions as base learners is used for fitting the logistic models. The optimal number of LogitBoost iterations to perform is cross-validated, which leads to automatic attribute selection. For more information see:
* Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.
*
* Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree Induction. In: 9th European Conference on Principles and Practice of Knowledge Discovery in Databases, 675-683, 2005. *

* * BibTeX: *

 * @article{Landwehr2005,
 *    author = {Niels Landwehr and Mark Hall and Eibe Frank},
 *    booktitle = {Machine Learning},
 *    number = {1-2},
 *    pages = {161-205},
 *    title = {Logistic Model Trees},
 *    volume = {95},
 *    year = {2005}
 * }
 * 
 * @inproceedings{Sumner2005,
 *    author = {Marc Sumner and Eibe Frank and Mark Hall},
 *    booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases},
 *    pages = {675-683},
 *    publisher = {Springer},
 *    title = {Speeding up Logistic Model Tree Induction},
 *    year = {2005}
 * }
 * 
*

* * Valid options are:

* *

 -I <iterations>
 *  Set fixed number of iterations for LogitBoost
* *
 -S
 *  Use stopping criterion on training set (instead of
 *  cross-validation)
* *
 -P
 *  Use error on probabilities (rmse) instead of
 *  misclassification error for stopping criterion
* *
 -M <iterations>
 *  Set maximum number of boosting iterations
* *
 -H <iterations>
 *  Set parameter for heuristic for early stopping of
 *  LogitBoost.
 *  If enabled, the minimum is selected greedily, stopping
 *  if the current minimum has not changed for iter iterations.
 *  By default, heuristic is enabled with value 50. Set to
 *  zero to disable heuristic.
* *
 -W <beta>
 *  Set beta for weight trimming for LogitBoost. Set to 0 for no weight trimming.
 * 
* *
 -A
 *  The AIC is used to choose the best iteration (instead of CV or training error).
 * 
* * * @author Niels Landwehr * @author Marc Sumner * @version $Revision: 10141 $ */ public class SimpleLogistic extends AbstractClassifier implements OptionHandler, AdditionalMeasureProducer, WeightedInstancesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 7397710626304705059L; /**The actual logistic regression model */ protected LogisticBase m_boostedModel; /**Filter for converting nominal attributes to binary ones*/ protected NominalToBinary m_NominalToBinary = null; /**Filter for replacing missing values*/ protected ReplaceMissingValues m_ReplaceMissingValues = null; /**If non-negative, use this as fixed number of LogitBoost iterations*/ protected int m_numBoostingIterations; /**Maximum number of iterations for LogitBoost*/ protected int m_maxBoostingIterations = 500; /**Parameter for the heuristic for early stopping of LogitBoost*/ protected int m_heuristicStop = 50; /**If true, cross-validate number of LogitBoost iterations*/ protected boolean m_useCrossValidation; /**If true, use minimize error on probabilities instead of misclassification error*/ protected boolean m_errorOnProbabilities; /**Threshold for trimming weights. Instances with a weight lower than this (as a percentage * of total weights) are not included in the regression fit. */ protected double m_weightTrimBeta = 0; /** If true, the AIC is used to choose the best iteration*/ private boolean m_useAIC = false; /** * Constructor for creating SimpleLogistic object with standard options. */ public SimpleLogistic() { m_numBoostingIterations = 0; m_useCrossValidation = true; m_errorOnProbabilities = false; m_weightTrimBeta = 0; m_useAIC = false; } /** * Constructor for creating SimpleLogistic object. * @param numBoostingIterations if non-negative, use this as fixed number of iterations for LogitBoost * @param useCrossValidation cross-validate number of LogitBoost iterations. * @param errorOnProbabilities minimize error on probabilities instead of misclassification error */ public SimpleLogistic(int numBoostingIterations, boolean useCrossValidation, boolean errorOnProbabilities) { m_numBoostingIterations = numBoostingIterations; m_useCrossValidation = useCrossValidation; m_errorOnProbabilities = errorOnProbabilities; m_weightTrimBeta = 0; m_useAIC = false; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Builds the logistic regression using LogitBoost. * @param data the training data * @throws Exception if something goes wrong */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); //replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); //convert nominal attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); //create actual logistic model m_boostedModel = new LogisticBase(m_numBoostingIterations, m_useCrossValidation, m_errorOnProbabilities); m_boostedModel.setMaxIterations(m_maxBoostingIterations); m_boostedModel.setHeuristicStop(m_heuristicStop); m_boostedModel.setWeightTrimBeta(m_weightTrimBeta); m_boostedModel.setUseAIC(m_useAIC); //build logistic model m_boostedModel.buildClassifier(data); } /** * Returns class probabilities for an instance. * * @param inst the instance to compute the probabilities for * @return the probabilities * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance inst) throws Exception { //replace missing values / convert nominal atts m_ReplaceMissingValues.input(inst); inst = m_ReplaceMissingValues.output(); m_NominalToBinary.input(inst); inst = m_NominalToBinary.output(); //obtain probs from logistic model return m_boostedModel.distributionForInstance(inst); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy