weka.estimators.EstimatorUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    EstimatorUtils.java
 *    Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.estimators;

import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

/**
 * Contains static utility functions for Estimators.
 * 
 * 
 * @author Gabi Schmidberger ([email protected])
 * @version $Revision: 15521 $
 */
public class EstimatorUtils implements RevisionHandler {

  /**
   * Find the minimum distance between values. Data is assumed to be sorted based on the given attribute.
   * Missing values are skipped if they are at the beginning or the end of the data.
   * 
   * @param inst sorted instances, sorted
   * @param attrIndex index of the attribute, they are sorted after
   * @return the minimal distance
   */
  public static double findMinDistance(Instances inst, int attrIndex) {
    double min = Double.MAX_VALUE;
    int numInst = inst.numInstances();
    double diff;
    if (numInst < 2) {
      return min;
    }
    int begin = -1;
    Instance instance = null;
    do {
      begin++;
      if (begin < numInst) {
        instance = inst.instance(begin);
      }
    } while (begin < numInst && instance.isMissing(attrIndex));

    double secondValue = inst.instance(begin).value(attrIndex);
    for (int i = begin; i < numInst && !inst.instance(i).isMissing(attrIndex); i++) {
      double firstValue = secondValue;
      secondValue = inst.instance(i).value(attrIndex);
      if (secondValue != firstValue) {
        diff = secondValue - firstValue;
        if (diff < min && diff > 0.0) {
          min = diff;
        }
      }
    }
    return min;
  }

  /**
   * Find the minimum and the maximum of the attribute and return it in the last
   * parameter.
   * 
   * @param inst instances used to build the estimator
   * @param attrIndex index of the attribute
   * @param minMax the array to return minimum and maximum in
   * @return number of not missing values
   * @exception Exception if parameter minMax wasn't initialized properly
   */
  public static int getMinMax(Instances inst, int attrIndex, double[] minMax)
    throws Exception {
    double min = Double.NaN;
    double max = Double.NaN;
    Instance instance = null;
    int numNotMissing = 0;
    if ((minMax == null) || (minMax.length < 2)) {
      throw new Exception("Error in Program, public method getMinMax");
    }

    Enumeration enumInst = inst.enumerateInstances();
    if (enumInst.hasMoreElements()) {
      do {
        instance = enumInst.nextElement();
      } while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements()));

      // add values if not missing
      if (!instance.isMissing(attrIndex)) {
        numNotMissing++;
        min = instance.value(attrIndex);
        max = instance.value(attrIndex);
      }
      while (enumInst.hasMoreElements()) {
        instance = enumInst.nextElement();
        if (!instance.isMissing(attrIndex)) {
          numNotMissing++;
          if (instance.value(attrIndex) < min) {
            min = (instance.value(attrIndex));
          } else {
            if (instance.value(attrIndex) > max) {
              max = (instance.value(attrIndex));
            }
          }
        }
      }
    }
    minMax[0] = min;
    minMax[1] = max;
    return numNotMissing;
  }

  /**
   * Returns a dataset that contains all instances of a certain class value.
   * 
   * @param data dataset to select the instances from
   * @param attrIndex index of the relevant attribute
   * @param classIndex index of the class attribute
   * @param classValue the relevant class value
   * @return a dataset with only
   */
  public static Vector