weka.estimators.Estimator Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Estimator.java
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/
package weka.estimators;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;
/**
*
* Abstract class for all estimators.
*
* Example code for a nonincremental estimator
*
* // create a histogram for estimation
* EqualWidthEstimator est = new EqualWidthEstimator();
* est.addValues(instances, attrIndex);
*
*
*
* Example code for an incremental estimator (incremental
* estimators must implement interface IncrementalEstimator)
*
* // Create a discrete estimator that takes values 0 to 9
* DiscreteEstimator newEst = new DiscreteEstimator(10, true);
*
* // Create 50 random integers first predicting the probability of the
* // value, then adding the value to the estimator
* Random r = new Random(seed);
* for(int i = 0; i < 50; i++) {
* current = Math.abs(r.nextInt() % 10);
* System.out.println(newEst);
* System.out.println("Prediction for " + current
* + " = " + newEst.getProbability(current));
* newEst.addValue(current, 1);
* }
*
*
*
* Example code for a main method for an estimator.
*
* public static void main(String [] argv) {
*
* try {
* LoglikeliEstimator est = new LoglikeliEstimator();
* Estimator.buildEstimator((Estimator) est, argv, false);
* System.out.println(est.toString());
* } catch (Exception ex) {
* ex.printStackTrace();
* System.out.println(ex.getMessage());
* }
* }
*
*
*
* @author Gabi Schmidberger ([email protected])
* @author Len Trigg ([email protected])
* @version $Revision: 5539 $
*/
public abstract class Estimator
implements Cloneable, Serializable, OptionHandler, CapabilitiesHandler,
RevisionHandler {
/** for serialization */
static final long serialVersionUID = -5902411487362274342L;
/** Debugging mode */
private boolean m_Debug = false;
/** The class value index is > -1 if subset is taken with specific class value only*/
protected double m_classValueIndex = -1.0;
/** set if class is not important */
protected boolean m_noClass = true;
/**
* Class to support a building process of an estimator.
*/
private static class Builder
implements Serializable, RevisionHandler {
/** for serialization */
private static final long serialVersionUID = -5810927990193597303L;
/** instances of the builder */
Instances m_instances = null;
/** attribute index of the builder */
int m_attrIndex = -1;
/** class index of the builder, only relevant if class value index is set*/
int m_classIndex = -1;
/** class value index of the builder */
int m_classValueIndex = -1;
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5539 $");
}
}
/**
* Add a new data value to the current estimator.
*
* @param data the new data value
* @param weight the weight assigned to the data value
*/
public void addValue(double data, double weight) {
try {
throw new Exception("Method to add single value is not implemented!\n"+
"Estimator should implement IncrementalEstimator.");
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
}
/**
* Initialize the estimator with a new dataset.
* Finds min and max first.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex) throws Exception {
// can estimator handle the data?
getCapabilities().testWithFail(data);
double []minMax = new double[2];
try {
EstimatorUtils.getMinMax(data, attrIndex, minMax);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
double min = minMax[0];
double max = minMax[1];
// factor is 1.0, data set has not been reduced
addValues(data, attrIndex, min, max, 1.0);
}
/**
* Initialize the estimator with all values of one attribute of a dataset.
* Some estimator might ignore the min and max values.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param min minimal border of range
* @param max maximal border of range
* @param factor number of instances has been reduced to that factor
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex,
double min, double max, double factor) throws Exception {
// no handling of factor, would have to be overridden
// no handling of min and max, would have to be overridden
int numInst = data.numInstances();
for (int i = 1; i < numInst; i++) {
addValue(data.instance(i).value(attrIndex), 1.0);
}
}
/**
* Initialize the estimator using only the instance of one class.
* It is using the values of one attribute only.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param classIndex index of the class attribute
* @param classValue the class value
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex,
int classIndex, int classValue) throws Exception{
// can estimator handle the data?
m_noClass = false;
getCapabilities().testWithFail(data);
// find the minimal and the maximal value
double []minMax = new double[2];
try {
EstimatorUtils.getMinMax(data, attrIndex, minMax);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
}
double min = minMax[0];
double max = minMax[1];
// extract the instances with the given class value
Instances workData = new Instances(data, 0);
double factor = getInstancesFromClass(data, attrIndex,
classIndex,
(double)classValue, workData);
// if no data return
if (workData.numInstances() == 0) return;
addValues(data, attrIndex, min, max, factor);
}
/**
* Initialize the estimator using only the instance of one class.
* It is using the values of one attribute only.
*
* @param data the dataset used to build this estimator
* @param attrIndex attribute the estimator is for
* @param classIndex index of the class attribute
* @param classValue the class value
* @param min minimal value of this attribute
* @param max maximal value of this attribute
* @exception Exception if building of estimator goes wrong
*/
public void addValues(Instances data, int attrIndex,
int classIndex, int classValue,
double min, double max) throws Exception{
// extract the instances with the given class value
Instances workData = new Instances(data, 0);
double factor = getInstancesFromClass(data, attrIndex,
classIndex,
(double)classValue, workData);
// if no data return
if (workData.numInstances() == 0) return;
addValues(data, attrIndex, min, max, factor);
}
/**
* Returns a dataset that contains all instances of a certain class value.
*
* @param data dataset to select the instances from
* @param attrIndex index of the relevant attribute
* @param classIndex index of the class attribute
* @param classValue the relevant class value
* @return a dataset with only
*/
private double getInstancesFromClass(Instances data, int attrIndex,
int classIndex,
double classValue, Instances workData) {
//DBO.pln("getInstancesFromClass classValue"+classValue+" workData"+data.numInstances());
int num = 0;
int numClassValue = 0;
for (int i = 0; i < data.numInstances(); i++) {
if (!data.instance(i).isMissing(attrIndex)) {
num++;
if (data.instance(i).value(classIndex) == classValue) {
workData.add(data.instance(i));
numClassValue++;
}
}
}
Double alphaFactor = new Double((double)numClassValue/(double)num);
return alphaFactor;
}
/**
* Get a probability estimate for a value.
*
* @param data the value to estimate the probability of
* @return the estimated probability of the supplied value
*/
public abstract double getProbability(double data);
/**
* Build an estimator using the options. The data is given in the options.
*
* @param est the estimator used
* @param options the list of options
* @param isIncremental true if estimator is incremental
* @exception Exception if something goes wrong or the user requests help on
* command options
*/
public static void buildEstimator(Estimator est, String [] options,
boolean isIncremental)
throws Exception {
//DBO.pln("buildEstimator");
boolean debug = false;
boolean helpRequest;
// read all options
Builder build = new Builder();
try {
setGeneralOptions(build, est, options);
if (est instanceof OptionHandler) {
((OptionHandler)est).setOptions(options);
}
Utils.checkForRemainingOptions(options);
buildEstimator(est, build.m_instances, build.m_attrIndex,
build.m_classIndex, build.m_classValueIndex, isIncremental);
} catch (Exception ex) {
ex.printStackTrace();
System.out.println(ex.getMessage());
String specificOptions = "";
// Output the error and also the valid options
if (est instanceof OptionHandler) {
specificOptions += "\nEstimator options:\n\n";
Enumeration enumOptions = ((OptionHandler)est).listOptions();
while (enumOptions.hasMoreElements()) {
Option option = (Option) enumOptions.nextElement();
specificOptions += option.synopsis() + '\n'
+ option.description() + "\n";
}
}
String genericOptions = "\nGeneral options:\n\n"
+ "-h\n"
+ "\tGet help on available options.\n"
+ "-i \n"
+ "\tThe name of the file containing input instances.\n"
+ "\tIf not supplied then instances will be read from stdin.\n"
+ "-a \n"
+ "\tThe number of the attribute the probability distribution\n"
+ "\testimation is done for.\n"
+ "\t\"first\" and \"last\" are also valid entries.\n"
+ "\tIf not supplied then no class is assigned.\n"
+ "-c \n"
+ "\tIf class value index is set, this attribute is taken as class.\n"
+ "\t\"first\" and \"last\" are also valid entries.\n"
+ "\tIf not supplied then last is default.\n"
+ "-v \n"
+ "\tIf value is different to -1, select instances of this class value.\n"
+ "\t\"first\" and \"last\" are also valid entries.\n"
+ "\tIf not supplied then all instances are taken.\n";
throw new Exception('\n' + ex.getMessage()
+ specificOptions+genericOptions);
}
}
public static void buildEstimator(Estimator est,
Instances instances, int attrIndex,
int classIndex, int classValueIndex,
boolean isIncremental) throws Exception {
// DBO.pln("buildEstimator 2 " + classValueIndex);
// non-incremental estimator add all instances at once
if (!isIncremental) {
if (classValueIndex == -1) {
// DBO.pln("before addValues -- Estimator");
est.addValues(instances, attrIndex);
} else {
// DBO.pln("before addValues with classvalue -- Estimator");
est.addValues(instances, attrIndex,
classIndex, classValueIndex);
}
} else {
// incremental estimator, read one value at a time
Enumeration enumInsts = (instances).enumerateInstances();
while (enumInsts.hasMoreElements()) {
Instance instance =
(Instance) enumInsts.nextElement();
((IncrementalEstimator)est).addValue(instance.value(attrIndex),
instance.weight());
}
}
}
/**
* Parses and sets the general options
* @param build contains the data used
* @param est the estimator used
* @param options the options from the command line
*/
private static void setGeneralOptions(Builder build, Estimator est,
String [] options)
throws Exception {
Reader input = null;
// help request option
boolean helpRequest = Utils.getFlag('h', options);
if (helpRequest) {
throw new Exception("Help requested.\n");
}
// instances used
String infileName = Utils.getOption('i', options);
if (infileName.length() != 0) {
input = new BufferedReader(new FileReader(infileName));
} else {
input = new BufferedReader(new InputStreamReader(System.in));
}
build.m_instances = new Instances(input);
// attribute index
String attrIndex = Utils.getOption('a', options);
if (attrIndex.length() != 0) {
if (attrIndex.equals("first")) {
build.m_attrIndex = 0;
} else if (attrIndex.equals("last")) {
build.m_attrIndex = build.m_instances.numAttributes() - 1;
} else {
int index = Integer.parseInt(attrIndex) - 1;
if ((index < 0) || (index >= build.m_instances.numAttributes())) {
throw new IllegalArgumentException("Option a: attribute index out of range.");
}
build.m_attrIndex = index;
}
} else {
// default is the first attribute
build.m_attrIndex = 0;
}
//class index, if not given is set to last attribute
String classIndex = Utils.getOption('c', options);
if (classIndex.length() == 0) classIndex = "last";
if (classIndex.length() != 0) {
if (classIndex.equals("first")) {
build.m_classIndex = 0;
} else if (classIndex.equals("last")) {
build.m_classIndex = build.m_instances.numAttributes() - 1;
} else {
int cl = Integer.parseInt(classIndex);
if (cl == -1) {
build.m_classIndex = build.m_instances.numAttributes() - 1;
} else {
build.m_classIndex = cl - 1;
}
}
}
//class value index, if not given is set to -1
String classValueIndex = Utils.getOption('v', options);
if (classValueIndex.length() != 0) {
if (classValueIndex.equals("first")) {
build.m_classValueIndex = 0;
} else if (classValueIndex.equals("last")) {
build.m_classValueIndex = build.m_instances.numAttributes() - 1;
} else {
int cl = Integer.parseInt(classValueIndex);
if (cl == -1) {
build.m_classValueIndex = -1;
} else {
build.m_classValueIndex = cl - 1;
}
}
}
build.m_instances.setClassIndex(build.m_classIndex);
}
/**
* Creates a deep copy of the given estimator using serialization.
*
* @param model the estimator to copy
* @return a deep copy of the estimator
* @exception Exception if an error occurs
*/
public static Estimator clone(Estimator model) throws Exception {
return makeCopy(model);
}
/**
* Creates a deep copy of the given estimator using serialization.
*
* @param model the estimator to copy
* @return a deep copy of the estimator
* @exception Exception if an error occurs
*/
public static Estimator makeCopy(Estimator model) throws Exception {
return (Estimator)new SerializedObject(model).getObject();
}
/**
* Creates a given number of deep copies of the given estimator using serialization.
*
* @param model the estimator to copy
* @param num the number of estimator copies to create.
* @return an array of estimators.
* @exception Exception if an error occurs
*/
public static Estimator [] makeCopies(Estimator model,
int num) throws Exception {
if (model == null) {
throw new Exception("No model estimator set");
}
Estimator [] estimators = new Estimator [num];
SerializedObject so = new SerializedObject(model);
for(int i = 0; i < estimators.length; i++) {
estimators[i] = (Estimator) so.getObject();
}
return estimators;
}
/**
* Tests whether the current estimation object is equal to another
* estimation object
*
* @param obj the object to compare against
* @return true if the two objects are equal
*/
public boolean equals(Object obj) {
if ((obj == null) || !(obj.getClass().equals(this.getClass()))) {
return false;
}
Estimator cmp = (Estimator) obj;
if (m_Debug != cmp.m_Debug) return false;
if (m_classValueIndex != cmp.m_classValueIndex) return false;
if (m_noClass != cmp.m_noClass) return false;
return true;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(1);
newVector.addElement(new Option(
"\tIf set, estimator is run in debug mode and\n"
+ "\tmay output additional info to the console",
"D", 0, "-D"));
return newVector.elements();
}
/**
* Parses a given list of options. Valid options are:
*
* -D
* If set, estimator is run in debug mode and
* may output additional info to the console.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setDebug(Utils.getFlag('D', options));
}
/**
* Gets the current settings of the Estimator.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options;
if (getDebug()) {
options = new String[1];
options[0] = "-D";
} else {
options = new String[0];
}
return options;
}
/**
* Creates a new instance of a estimatorr given it's class name and
* (optional) arguments to pass to it's setOptions method. If the
* classifier implements OptionHandler and the options parameter is
* non-null, the classifier will have it's options set.
*
* @param name the fully qualified class name of the estimatorr
* @param options an array of options suitable for passing to setOptions. May
* be null.
* @return the newly created classifier, ready for use.
* @exception Exception if the classifier name is invalid, or the options
* supplied are not acceptable to the classifier
*/
public static Estimator forName(String name,
String [] options) throws Exception {
return (Estimator)Utils.forName(Estimator.class,
name,
options);
}
/**
* Set debugging mode.
*
* @param debug true if debug output should be printed
*/
public void setDebug(boolean debug) {
m_Debug = debug;
}
/**
* Get whether debugging is turned on.
*
* @return true if debugging output is on
*/
public boolean getDebug() {
return m_Debug;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "If set to true, estimator may output additional info to " +
"the console.";
}
/**
* Returns the Capabilities of this Estimator. Derived estimators have to
* override this method to enable capabilities.
*
* @return the capabilities of this object
* @see Capabilities
*/
public Capabilities getCapabilities() {
Capabilities result = new Capabilities(this);
result.enableAll();
/* // class
if (!m_noClass) {
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
} else {
result.enable(Capability.NO_CLASS);
} */
return result;
}
/**
* Test if the estimator can handle the data.
* @param data the dataset the estimator takes an attribute from
* @param attrIndex the index of the attribute
* @see Capabilities
*/
public void testCapabilities(Instances data, int attrIndex) throws Exception {
getCapabilities().testWithFail(data);
getCapabilities().testWithFail(data.attribute(attrIndex));
}
}