weka.estimators.Estimator Maven / Gradle / Ivy
Show all versions of weka-dev Show documentation
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Estimator.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.estimators;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.CapabilitiesIgnorer;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.Utils;
/**
*
* Abstract class for all estimators.
*
* Example code for a nonincremental estimator
* // create a histogram for estimation
* EqualWidthEstimator est = new EqualWidthEstimator();
* est.addValues(instances, attrIndex);
*
*
*
* Example code for an incremental estimator (incremental estimators must
* implement interface IncrementalEstimator)
* // Create a discrete estimator that takes values 0 to 9
* DiscreteEstimator newEst = new DiscreteEstimator(10, true);
*
* // Create 50 random integers first predicting the probability of the
* // value, then adding the value to the estimator
* Random r = new Random(seed);
* for(int i = 0; i < 50; i++) {
* current = Math.abs(r.nextInt() % 10);
* System.out.println(newEst);
* System.out.println("Prediction for " + current
* + " = " + newEst.getProbability(current));
* newEst.addValue(current, 1);
* }
*
*
*
* Example code for a main method for an estimator.
*
*
* public static void main(String [] argv) {
*
* try {
* LoglikeliEstimator est = new LoglikeliEstimator();
* Estimator.buildEstimator((Estimator) est, argv, false);
* System.out.println(est.toString());
* } catch (Exception ex) {
* ex.printStackTrace();
* System.out.println(ex.getMessage());
* }
* }
*
*
*
* @author Gabi Schmidberger ([email protected])
* @author Len Trigg ([email protected])
* @version $Revision: 15521 $
*/
public abstract class Estimator implements Cloneable, Serializable,
OptionHandler, CapabilitiesHandler,
CapabilitiesIgnorer, RevisionHandler {
/** for serialization */
static final long serialVersionUID = -5902411487362274342L;
/** Debugging mode */
private boolean m_Debug = false;
/**
* The class value index is > -1 if subset is taken with specific class value
* only
*/
protected double m_classValueIndex = -1.0;
/** set if class is not important */
protected boolean m_noClass = true;
/**
* Class to support a building process of an estimator.
*/
private static class Builder implements Serializable, RevisionHandler {
/** for serialization */
private static final long serialVersionUID = -5810927990193597303L;
/** instances of the builder */
Instances m_instances = null;
/** attribute index of the builder */
int m_attrIndex = -1;
/** class index of the builder, only relevant if class value index is set */
int m_classIndex = -1;
/** class value index of the builder */
int m_classValueIndex = -1;
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 15521 $");
}
}
/** Whether capabilities should not be checked */
protected boolean m_DoNotCheckCapabilities = false;
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String doNotCheckCapabilitiesTipText() {
return "If set, estimator capabilities are not checked before estimator is built"
+ " (Use with caution to reduce runtime).";
}
/**
* Set whether not to check capabilities.
*
* @param doNotCheckCapabilities true if capabilities are not to be checked.
*/
public void setDoNotCheckCapabilities(boolean doNotCheckCapabilities) {
m_DoNotCheckCapabilities = doNotCheckCapabilities;
}
/**
* Get whether capabilities checking is turned off.
*
* @return true if capabilities checking is turned off.
*/
public boolean getDoNotCheckCapabilities() {
return m_DoNotCheckCapabilities;
}
/**
* Add a new data value to the current estimator.
*
* @param data the new data value
* @param weight the weight assigned to the data value
*/
public void addValue(double data, double weight) {
try {
throw new Exception("Method to add single value is not implemented!\n"
+ "Estimator should implement IncrementalEstimator.");
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
}
/**
* Initialize the estimator with a new dataset. Finds min and max first.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex) throws Exception {
// can estimator handle the data?
getCapabilities().testWithFail(data);
double[] minMax = new double[2];
try {
EstimatorUtils.getMinMax(data, attrIndex, minMax);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
double min = minMax[0];
double max = minMax[1];
// factor is 1.0, data set has not been reduced
addValues(data, attrIndex, min, max, 1.0);
}
/**
* Initialize the estimator with all values of one attribute of a dataset.
* Some estimator might ignore the min and max values and the factor. This default implementation does.
* This default implementation does not check whether the estimator can handle the data.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param min minimal border of range
* @param max maximal border of range
* @param factor number of instances has been reduced to that factor
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex, double min, double max,
double factor) throws Exception {
// no handling of factor, would have to be overridden
// no handling of min and max, would have to be overridden
int numInst = data.numInstances();
for (int i = 1; i < numInst; i++) {
addValue(data.instance(i).value(attrIndex), 1.0);
}
}
/**
* Initialize the estimator using only the instances of one class. It is using
* the values of one attribute only. Computes minimum and maximum based on the given data.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param classIndex index of the class attribute
* @param classValue the class value
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex, int classIndex,
int classValue) throws Exception {
// can estimator handle the data?
m_noClass = false;
getCapabilities().testWithFail(data);
// find the minimal and the maximal value
double[] minMax = new double[2];
try {
EstimatorUtils.getMinMax(data, attrIndex, minMax);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
double min = minMax[0];
double max = minMax[1];
// extract the instances with the given class value
Instances workData = new Instances(data, 0);
double factor = getInstancesFromClass(data, attrIndex, classIndex,
classValue, workData);
// if no data return
if (workData.numInstances() == 0) {
return;
}
addValues(workData, attrIndex, min, max, factor);
}
/**
* Initialize the estimator using only the instances of one class. It is using
* the values of one attribute only. Some estimator might ignore the min and max values.
* This default implementation does.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param classIndex index of the class attribute
* @param classValue the class value
* @param min minimal value of this attribute
* @param max maximal value of this attribute
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex, int classIndex,
int classValue, double min, double max) throws Exception {
// can estimator handle the data?
m_noClass = false;
getCapabilities().testWithFail(data);
// extract the instances with the given class value
Instances workData = new Instances(data, 0);
double factor = getInstancesFromClass(data, attrIndex, classIndex,
classValue, workData);
// if no data return
if (workData.numInstances() == 0) {
return;
}
addValues(workData, attrIndex, min, max, factor);
}
/**
* Returns a dataset that contains all instances of a certain class value.
*
* @param data dataset to select the instances from
* @param attrIndex index of the relevant attribute
* @param classIndex index of the class attribute
* @param classValue the relevant class value
* @return a dataset with only
*/
private double getInstancesFromClass(Instances data, int attrIndex,
int classIndex, double classValue, Instances workData) {
// DBO.pln("getInstancesFromClass classValue"+classValue+" workData"+data.numInstances());
int num = 0;
int numClassValue = 0;
for (int i = 0; i < data.numInstances(); i++) {
if (!data.instance(i).isMissing(attrIndex)) {
num++;
if (data.instance(i).value(classIndex) == classValue) {
workData.add(data.instance(i));
numClassValue++;
}
}
}
return(double) numClassValue / (double) num;
}
/**
* Get a probability estimate for a value.
*
* @param data the value to estimate the probability of
* @return the estimated probability of the supplied value
*/
public abstract double getProbability(double data);
/**
* Build an estimator using the options. The data is given in the options.
*
* @param est the estimator used
* @param options the list of options
* @param isIncremental true if estimator is incremental
* @exception Exception if something goes wrong or the user requests help on
* command options
*/
public static void buildEstimator(Estimator est, String[] options,
boolean isIncremental) throws Exception {
// DBO.pln("buildEstimator");
// read all options
Builder build = new Builder();
try {
setGeneralOptions(build, est, options);
if (est instanceof OptionHandler) {
((OptionHandler) est).setOptions(options);
}
Utils.checkForRemainingOptions(options);
buildEstimator(est, build.m_instances, build.m_attrIndex,
build.m_classIndex, build.m_classValueIndex, isIncremental);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
String specificOptions = "";
// Output the error and also the valid options
if (est instanceof OptionHandler) {
specificOptions += "\nEstimator options:\n\n";
Enumeration