Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Logistic.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.functions;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.pmml.producer.LogisticProducerHelper;
import weka.core.Aggregateable;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.ConjugateGradientOptimization;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Optimization;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.pmml.PMMLProducer;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.RemoveUseless;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
/**
* Class for building and using a multinomial logistic
* regression model with a ridge estimator.
*
* There are some modifications, however, compared to the paper of leCessie and
* van Houwelingen(1992):
*
* If there are k classes for n instances with m attributes, the parameter
* matrix B to be calculated will be an m*(k-1) matrix.
*
* The probability for class j with the exception of the last class is
*
* Pj(Xi) = exp(XiBj)/((sum[j=1..(k-1)]exp(Xi*Bj))+1)
*
* The last class has probability
*
* 1-(sum[j=1..(k-1)]Pj(Xi))
* = 1/((sum[j=1..(k-1)]exp(Xi*Bj))+1)
*
* The (negative) multinomial log-likelihood is thus:
*
* L = -sum[i=1..n]{
* sum[j=1..(k-1)](Yij * ln(Pj(Xi)))
* +(1 - (sum[j=1..(k-1)]Yij))
* * ln(1 - sum[j=1..(k-1)]Pj(Xi))
* } + ridge * (B^2)
*
* In order to find the matrix B for which L is minimised, a Quasi-Newton Method
* is used to search for the optimized values of the m*(k-1) variables. Note
* that before we use the optimization procedure, we 'squeeze' the matrix B into
* a m*(k-1) vector. For details of the optimization procedure, please check
* weka.core.Optimization class.
*
* Although original Logistic Regression does not deal with instance weights, we
* modify the algorithm a little bit to handle the instance weights.
*
* For more information see:
*
* le Cessie, S., van Houwelingen, J.C. (1992). Ridge Estimators in Logistic
* Regression. Applied Statistics. 41(1):191-201.
*
* Note: Missing values are replaced using a ReplaceMissingValuesFilter, and
* nominal attributes are transformed into numeric attributes using a
* NominalToBinaryFilter.
*
*
*
* BibTeX:
*
*
* @article{leCessie1992,
* author = {le Cessie, S. and van Houwelingen, J.C.},
* journal = {Applied Statistics},
* number = {1},
* pages = {191-201},
* title = {Ridge Estimators in Logistic Regression},
* volume = {41},
* year = {1992}
* }
*
*
*
*
* Valid options are:
*
*
*
* -D
* Turn on debugging output.
*
*
*
* -R <ridge>
* Set the ridge in the log-likelihood.
*
*
*
* -M <number>
* Set the maximum number of iterations (default -1, until convergence).
*
*
*
*
* @author Xin Xu ([email protected])
* @version $Revision: 12617 $
*/
public class Logistic extends AbstractClassifier implements OptionHandler,
WeightedInstancesHandler, TechnicalInformationHandler, PMMLProducer,
Aggregateable {
/** for serialization */
static final long serialVersionUID = 3932117032546553727L;
/** The coefficients (optimized parameters) of the model */
protected double[][] m_Par;
/** The data saved as a matrix */
protected double[][] m_Data;
/** The number of attributes in the model */
protected int m_NumPredictors;
/** The index of the class attribute */
protected int m_ClassIndex;
/** The number of the class labels */
protected int m_NumClasses;
/** The ridge parameter. */
protected double m_Ridge = 1e-8;
/** An attribute filter */
private RemoveUseless m_AttFilter;
/** The filter used to make attributes numeric. */
private NominalToBinary m_NominalToBinary;
/** The filter used to get rid of missing values. */
private ReplaceMissingValues m_ReplaceMissingValues;
/** Log-likelihood of the searched model */
protected double m_LL;
/** The maximum number of iterations. */
private int m_MaxIts = -1;
/** Wether to use conjugate gradient descent rather than BFGS updates. */
private boolean m_useConjugateGradientDescent = false;
private Instances m_structure;
/**
* Constructor that sets the default number of decimal places to 4.
*/
public Logistic() {
setNumDecimalPlaces(4);
}
/**
* Returns a string describing this classifier
*
* @return a description of the classifier suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Class for building and using a multinomial logistic "
+ "regression model with a ridge estimator.\n\n"
+ "There are some modifications, however, compared to the paper of "
+ "leCessie and van Houwelingen(1992): \n\n"
+ "If there are k classes for n instances with m attributes, the "
+ "parameter matrix B to be calculated will be an m*(k-1) matrix.\n\n"
+ "The probability for class j with the exception of the last class is\n\n"
+ "Pj(Xi) = exp(XiBj)/((sum[j=1..(k-1)]exp(Xi*Bj))+1) \n\n"
+ "The last class has probability\n\n"
+ "1-(sum[j=1..(k-1)]Pj(Xi)) \n\t= 1/((sum[j=1..(k-1)]exp(Xi*Bj))+1)\n\n"
+ "The (negative) multinomial log-likelihood is thus: \n\n"
+ "L = -sum[i=1..n]{\n\tsum[j=1..(k-1)](Yij * ln(Pj(Xi)))"
+ "\n\t+(1 - (sum[j=1..(k-1)]Yij)) \n\t* ln(1 - sum[j=1..(k-1)]Pj(Xi))"
+ "\n\t} + ridge * (B^2)\n\n"
+ "In order to find the matrix B for which L is minimised, a "
+ "Quasi-Newton Method is used to search for the optimized values of "
+ "the m*(k-1) variables. Note that before we use the optimization "
+ "procedure, we 'squeeze' the matrix B into a m*(k-1) vector. For "
+ "details of the optimization procedure, please check "
+ "weka.core.Optimization class.\n\n"
+ "Although original Logistic Regression does not deal with instance "
+ "weights, we modify the algorithm a little bit to handle the "
+ "instance weights.\n\n"
+ "For more information see:\n\n"
+ getTechnicalInformation().toString()
+ "\n\n"
+ "Note: Missing values are replaced using a ReplaceMissingValuesFilter, and "
+ "nominal attributes are transformed into numeric attributes using a "
+ "NominalToBinaryFilter.";
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "le Cessie, S. and van Houwelingen, J.C.");
result.setValue(Field.YEAR, "1992");
result.setValue(Field.TITLE, "Ridge Estimators in Logistic Regression");
result.setValue(Field.JOURNAL, "Applied Statistics");
result.setValue(Field.VOLUME, "41");
result.setValue(Field.NUMBER, "1");
result.setValue(Field.PAGES, "191-201");
return result;
}
/**
* Returns an enumeration describing the available options
*
* @return an enumeration of all the available options
*/
@Override
public Enumeration