Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* LogitBoost.java
* Copyright (C) 1999-2014 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.meta;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.classifiers.IterativeClassifier;
import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;
import weka.classifiers.Sourcable;
import weka.classifiers.rules.ZeroR;
import weka.core.Attribute;
import weka.core.BatchPredictor;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.UnassignedClassException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* Class for performing additive logistic regression.
* This class performs classification using a regression scheme as the base
* learner, and can handle multi-class problems. For more information, see
*
* J. Friedman, T. Hastie, R. Tibshirani (1998). Additive Logistic Regression: a
* Statistical View of Boosting. Stanford University.
*
*
*
*
* BibTeX:
*
*
* @techreport{Friedman1998,
* address = {Stanford University},
* author = {J. Friedman and T. Hastie and R. Tibshirani},
* title = {Additive Logistic Regression: a Statistical View of Boosting},
* year = {1998},
* PS = {http://www-stat.stanford.edu/\~jhf/ftp/boost.ps}
* }
*
*
*
*
* Valid options are:
*
*
*
* -Q
* Use resampling instead of reweighting for boosting.
*
*
*
* -use-estimated-priors
* Use estimated priors rather than uniform ones.
*
*
*
* -P <percent>
* Percentage of weight mass to base training on.
* (default 100, reduce to around 90 speed up)
*
*
*
* -L <num>
* Threshold on the improvement of the likelihood.
* (default -Double.MAX_VALUE)
*
*
*
* -H <num>
* Shrinkage parameter.
* (default 1)
*
*
*
* -Z <num>
* Z max threshold for responses.
* (default 3)
*
*
*
* -O <int>
* The size of the thread pool, for example, the number of cores in the CPU. (default 1)
*
*
*
* -E <int>
* The number of threads to use for batch prediction, which should be >= size of thread pool.
* (default 1)
*
*
*
* -S <num>
* Random number seed.
* (default 1)
*
*
*
* -I <num>
* Number of iterations.
* (default 10)
*
*
*
* -W
* Full name of base classifier.
* (default: weka.classifiers.trees.DecisionStump)
*
*
*
* -output-debug-info
* If set, classifier is run in debug mode and
* may output additional info to the console
*
*
*
* -do-not-check-capabilities
* If set, classifier capabilities are not checked before classifier is built
* (use with caution).
*
*
*
* Options specific to classifier weka.classifiers.trees.DecisionStump:
*
*
*
* -output-debug-info
* If set, classifier is run in debug mode and
* may output additional info to the console
*
*
*
* -do-not-check-capabilities
* If set, classifier capabilities are not checked before classifier is built
* (use with caution).
*
*
*
*
* Options after -- are passed to the designated learner.
*
*
* @author Len Trigg ([email protected])
* @author Eibe Frank ([email protected])
* @version $Revision: 14462 $
*/
public class LogitBoost extends RandomizableIteratedSingleClassifierEnhancer
implements Sourcable, WeightedInstancesHandler, TechnicalInformationHandler,
IterativeClassifier, BatchPredictor {
/** for serialization */
static final long serialVersionUID = -1105660358715833753L;
/**
* ArrayList for storing the generated base classifiers. Note: we are hiding
* the variable from IteratedSingleClassifierEnhancer
*/
protected ArrayList m_Classifiers;
/** The number of classes */
protected int m_NumClasses;
/** The number of successfully generated base classifiers. */
protected int m_NumGenerated;
/** Weight thresholding. The percentage of weight mass used in training */
protected int m_WeightThreshold = 100;
/** A threshold for responses (Friedman suggests between 2 and 4) */
protected static final double DEFAULT_Z_MAX = 3;
/** Dummy dataset with a numeric class */
protected Instances m_NumericClassData;
/** The actual class attribute (for getting class names) */
protected Attribute m_ClassAttribute;
/** Use boosting with reweighting? */
protected boolean m_UseResampling;
/** The threshold on the improvement of the likelihood */
protected double m_Precision = -Double.MAX_VALUE;
/** The value of the shrinkage parameter */
protected double m_Shrinkage = 1;
/** Whether to start with class priors estimated from the training data */
protected boolean m_UseEstimatedPriors = false;
/** The random number generator used */
protected Random m_RandomInstance = null;
/**
* The value by which the actual target value for the true class is offset.
*/
protected double m_Offset = 0.0;
/** A ZeroR model in case no model can be built from the data */
protected Classifier m_ZeroR;
/** The initial F scores (0 by default) */
protected double[] m_InitialFs;
/** The Z max value to use */
protected double m_zMax = DEFAULT_Z_MAX;
/** The y values used during the training process. */
protected double[][] m_trainYs;
/** The F scores used during the training process. */
protected double[][] m_trainFs;
/** The probabilities used during the training process. */
protected double[][] m_probs;
/** The current loglikelihood. */
protected double m_logLikelihood;
/** The total weight of the data. */
protected double m_sumOfWeights;
/** The training data. */
protected Instances m_data;
/** The number of threads to use at prediction time in batch prediction. */
protected int m_numThreads = 1;
/** The size of the thread pool. */
protected int m_poolSize = 1;
/**
* Returns a string describing classifier
*
* @return a description suitable for displaying in the explorer/experimenter
* gui
*/
public String globalInfo() {
return "Class for performing additive logistic regression. \n"
+ "This class performs classification using a regression scheme as the "
+ "base learner, and can handle multi-class problems. For more "
+ "information, see\n\n" + getTechnicalInformation().toString();
}
/**
* Constructor.
*/
public LogitBoost() {
m_Classifier = new weka.classifiers.trees.DecisionStump();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.TECHREPORT);
result
.setValue(Field.AUTHOR, "J. Friedman and T. Hastie and R. Tibshirani");
result.setValue(Field.YEAR, "1998");
result.setValue(Field.TITLE,
"Additive Logistic Regression: a Statistical View of Boosting");
result.setValue(Field.ADDRESS, "Stanford University");
result.setValue(Field.PS, "http://www-stat.stanford.edu/~jhf/ftp/boost.ps");
return result;
}
/**
* String describing default classifier.
*
* @return the default classifier classname
*/
protected String defaultClassifierString() {
return "weka.classifiers.trees.DecisionStump";
}
/**
* Select only instances with weights that contribute to the specified
* quantile of the weight distribution
*
* @param data the input instances
* @param quantile the specified quantile eg 0.9 to select 90% of the weight
* mass
* @return the selected instances
*/
protected Instances selectWeightQuantile(Instances data, double quantile) {
int numInstances = data.numInstances();
Instances trainData = new Instances(data, numInstances);
double[] weights = new double[numInstances];
double sumOfWeights = 0;
for (int i = 0; i < numInstances; i++) {
weights[i] = data.instance(i).weight();
sumOfWeights += weights[i];
}
double weightMassToSelect = sumOfWeights * quantile;
int[] sortedIndices = Utils.sort(weights);
// Select the instances
sumOfWeights = 0;
for (int i = numInstances - 1; i >= 0; i--) {
Instance instance = (Instance) data.instance(sortedIndices[i]).copy();
trainData.add(instance);
sumOfWeights += weights[sortedIndices[i]];
if ((sumOfWeights > weightMassToSelect) && (i > 0)
&& (weights[sortedIndices[i]] != weights[sortedIndices[i - 1]])) {
break;
}
}
if (m_Debug) {
System.err.println("Selected " + trainData.numInstances() + " out of "
+ numInstances);
}
return trainData;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration