weka.estimators.EstimatorUtils Maven / Gradle / Ivy
Show all versions of weka-dev Show documentation
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* EstimatorUtils.java
* Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.estimators;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
/**
* Contains static utility functions for Estimators.
*
*
* @author Gabi Schmidberger ([email protected])
* @version $Revision: 15521 $
*/
public class EstimatorUtils implements RevisionHandler {
/**
* Find the minimum distance between values. Data is assumed to be sorted based on the given attribute.
* Missing values are skipped if they are at the beginning or the end of the data.
*
* @param inst sorted instances, sorted
* @param attrIndex index of the attribute, they are sorted after
* @return the minimal distance
*/
public static double findMinDistance(Instances inst, int attrIndex) {
double min = Double.MAX_VALUE;
int numInst = inst.numInstances();
double diff;
if (numInst < 2) {
return min;
}
int begin = -1;
Instance instance = null;
do {
begin++;
if (begin < numInst) {
instance = inst.instance(begin);
}
} while (begin < numInst && instance.isMissing(attrIndex));
double secondValue = inst.instance(begin).value(attrIndex);
for (int i = begin; i < numInst && !inst.instance(i).isMissing(attrIndex); i++) {
double firstValue = secondValue;
secondValue = inst.instance(i).value(attrIndex);
if (secondValue != firstValue) {
diff = secondValue - firstValue;
if (diff < min && diff > 0.0) {
min = diff;
}
}
}
return min;
}
/**
* Find the minimum and the maximum of the attribute and return it in the last
* parameter.
*
* @param inst instances used to build the estimator
* @param attrIndex index of the attribute
* @param minMax the array to return minimum and maximum in
* @return number of not missing values
* @exception Exception if parameter minMax wasn't initialized properly
*/
public static int getMinMax(Instances inst, int attrIndex, double[] minMax)
throws Exception {
double min = Double.NaN;
double max = Double.NaN;
Instance instance = null;
int numNotMissing = 0;
if ((minMax == null) || (minMax.length < 2)) {
throw new Exception("Error in Program, public method getMinMax");
}
Enumeration enumInst = inst.enumerateInstances();
if (enumInst.hasMoreElements()) {
do {
instance = enumInst.nextElement();
} while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements()));
// add values if not missing
if (!instance.isMissing(attrIndex)) {
numNotMissing++;
min = instance.value(attrIndex);
max = instance.value(attrIndex);
}
while (enumInst.hasMoreElements()) {
instance = enumInst.nextElement();
if (!instance.isMissing(attrIndex)) {
numNotMissing++;
if (instance.value(attrIndex) < min) {
min = (instance.value(attrIndex));
} else {
if (instance.value(attrIndex) > max) {
max = (instance.value(attrIndex));
}
}
}
}
}
minMax[0] = min;
minMax[1] = max;
return numNotMissing;
}
/**
* Returns a dataset that contains all instances of a certain class value.
*
* @param data dataset to select the instances from
* @param attrIndex index of the relevant attribute
* @param classIndex index of the class attribute
* @param classValue the relevant class value
* @return a dataset with only
*/
public static Vector