weka.classifiers.functions.GaussianProcesses Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    GaussianProcesses.java
 *    Copyright (C) 2005-2012 University of Waikato
 */

package weka.classifiers.functions;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.ConditionalDensityEstimator;
import weka.classifiers.IntervalEstimator;
import weka.classifiers.functions.supportVector.CachedKernel;
import weka.classifiers.functions.supportVector.Kernel;
import weka.classifiers.functions.supportVector.PolyKernel;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Statistics;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.matrix.Matrix;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Standardize;

/**
 *  Implements Gaussian processes for regression
 * without hyperparameter-tuning. To make choosing an appropriate noise level
 * easier, this implementation applies normalization/standardization to the
 * target attribute as well (if normalization/standardizaton is turned on).
 * Missing values are replaced by the global mean/mode. Nominal attributes are
 * converted to binary ones. 
 * 
 *  BibTeX:
 * 
 *  *        @misc{Mackay1998,
 *          address = {Dept. of Physics, Cambridge University, UK},
 *          author = {David J.C. Mackay},
 *          title = {Introduction to Gaussian Processes},
 *          year = {1998},
 *          PS = {http://wol.ra.phy.cam.ac.uk/mackay/gpB.ps.gz}
 *       }
 * 
 * 
 * 
 * 
 * 
 *  Valid options are:
 * 

 * 
 * 
 *       -D
 *        If set, classifier is run in debug mode and
 *        may output additional info to the console
 * 
 * 
 *  *       -L <double>
 *        Level of Gaussian Noise. (default 0.1)
 * 
 * 
 *  *       -N
 *        Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)
 * 
 * 
 *  *       -K <classname and parameters>
 *        The Kernel to use.
 *        (default: weka.classifiers.functions.supportVector.PolyKernel)
 * 
 * 
 *  * 
 *       Options specific to kernel weka.classifiers.functions.supportVector.RBFKernel:
 * 
 * 
 *  *       -D
 *        Enables debugging output (if available) to be printed.
 *        (default: off)
 * 
 * 
 *  *       -no-checks
 *        Turns off all checks - use with caution!
 *        (default: checks on)
 * 
 * 
 *  *       -C <num>
 *        The size of the cache (a prime number).
 *        (default: 250007)
 * 
 * 
 *  *       -G <num>
 *        The Gamma parameter.
 *        (default: 0.01)
 * 
 * 
 * 
 * 
 * @author Kurt Driessens ([email protected])
 * @author Remco Bouckaert ([email protected])
 * @version $Revision: 10381 $
 */
public class GaussianProcesses extends AbstractClassifier implements
  OptionHandler, IntervalEstimator, ConditionalDensityEstimator,
  TechnicalInformationHandler, WeightedInstancesHandler {

  /** for serialization */
  static final long serialVersionUID = -8620066949967678545L;

  /** The filter used to make attributes numeric. */
  protected NominalToBinary m_NominalToBinary;

  /** normalizes the data */
  public static final int FILTER_NORMALIZE = 0;

  /** standardizes the data */
  public static final int FILTER_STANDARDIZE = 1;

  /** no filter */
  public static final int FILTER_NONE = 2;

  /** The filter to apply to the training data */
  public static final Tag[] TAGS_FILTER = {
    new Tag(FILTER_NORMALIZE, "Normalize training data"),
    new Tag(FILTER_STANDARDIZE, "Standardize training data"),
    new Tag(FILTER_NONE, "No normalization/standardization"), };

  /** The filter used to standardize/normalize all values. */
  protected Filter m_Filter = null;

  /** Whether to normalize/standardize/neither */
  protected int m_filterType = FILTER_NORMALIZE;

  /** The filter used to get rid of missing values. */
  protected ReplaceMissingValues m_Missing;

  /**
   * Turn off all checks and conversions? Turning them off assumes that data is
   * purely numeric, doesn't contain any missing values, and has a numeric
   * class.
   */
  protected boolean m_checksTurnedOff = false;

  /** Gaussian Noise Value. */
  protected double m_delta = 1;

  /** The squared noise value. */
  protected double m_deltaSquared = 1;

  /**
   * The parameters of the linear transformation realized by the filter on the
   * class attribute
   */
  protected double m_Alin;
  protected double m_Blin;

  /** Kernel to use * */
  protected Kernel m_kernel = new PolyKernel();

  /** The number of training instances */
  protected int m_NumTrain = 0;

  /** The training data. */
  protected double m_avg_target;

  /** (negative) covariance matrix in symmetric matrix representation **/
  public double[][] m_L;

  /** The vector of target values. */
  protected Matrix m_t;

  /**
   * Returns a string describing classifier
   * 
   * @return a description suitable for displaying in the explorer/experimenter
   *         gui
   */
  public String globalInfo() {

    return " Implements Gaussian processes for "
      + "regression without hyperparameter-tuning. To make choosing an "
      + "appropriate noise level easier, this implementation applies "
      + "normalization/standardization to the target attribute as well "
      + "as the other attributes (if "
      + " normalization/standardizaton is turned on). Missing values "
      + "are replaced by the global mean/mode. Nominal attributes are "
      + "converted to binary ones. Note that kernel caching is turned off "
      + "if the kernel used implements CachedKernel.";
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;

    result = new TechnicalInformation(Type.MISC);
    result.setValue(Field.AUTHOR, "David J.C. Mackay");
    result.setValue(Field.YEAR, "1998");
    result.setValue(Field.TITLE, "Introduction to Gaussian Processes");
    result
      .setValue(Field.ADDRESS, "Dept. of Physics, Cambridge University, UK");
    result.setValue(Field.PS, "http://wol.ra.phy.cam.ac.uk/mackay/gpB.ps.gz");

    return result;
  }

  /**
   * Returns default capabilities of the classifier.
   * 
   * @return the capabilities of this classifier
   */
  @Override
  public Capabilities getCapabilities() {
    Capabilities result = getKernel().getCapabilities();
    result.setOwner(this);

    // attribute
    result.enableAllAttributeDependencies();
    // with NominalToBinary we can also handle nominal attributes, but only
    // if the kernel can handle numeric attributes
    if (result.handles(Capability.NUMERIC_ATTRIBUTES)) {
      result.enable(Capability.NOMINAL_ATTRIBUTES);
    }
    result.enable(Capability.MISSING_VALUES);

    // class
    result.disableAllClasses();
    result.disableAllClassDependencies();
    result.enable(Capability.NUMERIC_CLASS);
    result.enable(Capability.DATE_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);

    return result;
  }

  /**
   * Method for building the classifier.
   * 
   * @param insts the set of training instances
   * @throws Exception if the classifier can't be built successfully
   */
  @Override
  public void buildClassifier(Instances insts) throws Exception {

    /* check the set of training instances */
    if (!m_checksTurnedOff) {
      // can classifier handle the data?
      getCapabilities().testWithFail(insts);

      // remove instances with missing class
      insts = new Instances(insts);
      insts.deleteWithMissingClass();
      m_Missing = new ReplaceMissingValues();
      m_Missing.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Missing);
    } else {
      m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
      boolean onlyNumeric = true;
      if (!m_checksTurnedOff) {
        for (int i = 0; i < insts.numAttributes(); i++) {
          if (i != insts.classIndex()) {
            if (!insts.attribute(i).isNumeric()) {
              onlyNumeric = false;
              break;
            }
          }
        }
      }

      if (!onlyNumeric) {
        m_NominalToBinary = new NominalToBinary();
        m_NominalToBinary.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_NominalToBinary);
      } else {
        m_NominalToBinary = null;
      }
    } else {
      m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
      m_Filter = new Standardize();
      ((Standardize) m_Filter).setIgnoreClass(true);
      m_Filter.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
      m_Filter = new Normalize();
      ((Normalize) m_Filter).setIgnoreClass(true);
      m_Filter.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Filter);
    } else {
      m_Filter = null;
    }

    m_NumTrain = insts.numInstances();

    // determine which linear transformation has been
    // applied to the class by the filter
    if (m_Filter != null) {
      Instance witness = (Instance) insts.instance(0).copy();
      witness.setValue(insts.classIndex(), 0);
      m_Filter.input(witness);
      m_Filter.batchFinished();
      Instance res = m_Filter.output();
      m_Blin = res.value(insts.classIndex());
      witness.setValue(insts.classIndex(), 1);
      m_Filter.input(witness);
      m_Filter.batchFinished();
      res = m_Filter.output();
      m_Alin = res.value(insts.classIndex()) - m_Blin;
    } else {
      m_Alin = 1.0;
      m_Blin = 0.0;
    }

    // Initialize kernel
    try {
      CachedKernel cachedKernel = (CachedKernel) m_kernel;
      cachedKernel.setCacheSize(0);
    } catch (Exception e) {
      // ignore
    }
    m_kernel.buildKernel(insts);

    // Compute average target value
    double sum = 0.0;
    for (int i = 0; i < insts.numInstances(); i++) {
      sum += insts.instance(i).classValue();
    }
    m_avg_target = sum / insts.numInstances();

    // Store squared noise level
    m_deltaSquared = m_delta * m_delta;

    // initialize kernel matrix/covariance matrix
    int n = insts.numInstances();
    m_L = new double[n][];
    double kv = 0;
    for (int i = 0; i < n; i++) {
      m_L[i] = new double[i + 1];
      for (int j = 0; j < i; j++) {
        kv = m_kernel.eval(i, j, insts.instance(i));
        m_L[i][j] = kv;
      }
      kv = m_kernel.eval(i, i, insts.instance(i));
      m_L[i][i] = kv + m_deltaSquared;
    }

    // Save memory (can't use Kernel.clean() because of polynominal kernel with
    // exponent 1)
    if (m_kernel instanceof CachedKernel) {
      m_kernel = Kernel.makeCopy(m_kernel);
      ((CachedKernel) m_kernel).setCacheSize(-1);
      m_kernel.buildKernel(insts);
    }

    // Calculate inverse matrix exploiting symmetry of covariance matrix
    // NB this replaces the kernel matrix with (the negative of) its inverse and
    // does
    // not require any extra memory for a solution matrix
    double[] tmprow = new double[n];
    double tmp2 = 0, tmp = 0;
    for (int i = 0; i < n; i++) {
      tmp = -m_L[i][i];
      m_L[i][i] = 1.0 / tmp;
      for (int j = 0; j < n; j++) {
        if (j != i) {
          if (j < i) {
            tmprow[j] = m_L[i][j];
            m_L[i][j] /= tmp;
            tmp2 = m_L[i][j];
            m_L[j][j] += tmp2 * tmp2 * tmp;
          } else if (j > i) {
            tmprow[j] = m_L[j][i];
            m_L[j][i] /= tmp;
            tmp2 = m_L[j][i];
            m_L[j][j] += tmp2 * tmp2 * tmp;
          }
        }
      }

      for (int j = 0; j < n; j++) {
        if (j != i) {
          if (i < j) {
            for (int k = 0; k < i; k++) {
              m_L[j][k] += tmprow[j] * m_L[i][k];
            }
          } else {
            for (int k = 0; k < j; k++) {
              m_L[j][k] += tmprow[j] * m_L[i][k];
            }

          }
          for (int k = i + 1; k < j; k++) {
            m_L[j][k] += tmprow[j] * m_L[k][i];
          }
        }
      }
    }

    m_t = new Matrix(insts.numInstances(), 1);
    double[] tt = new double[n];
    for (int i = 0; i < n; i++) {
      tt[i] = insts.instance(i).classValue() - m_avg_target;
    }

    // calculate m_t = tt . m_L
    for (int i = 0; i < n; i++) {
      double s = 0;
      for (int k = 0; k < i; k++) {
        s -= m_L[i][k] * tt[k];
      }
      for (int k = i; k < n; k++) {
        s -= m_L[k][i] * tt[k];
      }
      m_t.set(i, 0, s);
    }

  } // buildClassifier

  /**
   * Classifies a given instance.
   * 
   * @param inst the instance to be classified
   * @return the classification
   * @throws Exception if instance could not be classified successfully
   */
  @Override
  public double classifyInstance(Instance inst) throws Exception {

    // Filter instance
    inst = filterInstance(inst);

    // Build K vector
    Matrix k = new Matrix(m_NumTrain, 1);
    for (int i = 0; i < m_NumTrain; i++) {
      k.set(i, 0, m_kernel.eval(-1, i, inst));
    }

    double result = k.transpose().times(m_t).get(0, 0) + m_avg_target;
    result = (result - m_Blin) / m_Alin;

    return result;

  }

  /**
   * Filters an instance.
   */
  protected Instance filterInstance(Instance inst) throws Exception {

    if (!m_checksTurnedOff) {
      m_Missing.input(inst);
      m_Missing.batchFinished();
      inst = m_Missing.output();
    }

    if (m_NominalToBinary != null) {
      m_NominalToBinary.input(inst);
      m_NominalToBinary.batchFinished();
      inst = m_NominalToBinary.output();
    }

    if (m_Filter != null) {
      m_Filter.input(inst);
      m_Filter.batchFinished();
      inst = m_Filter.output();
    }
    return inst;
  }

  /**
   * Computes standard deviation for given instance, without transforming target
   * back into original space.
   */
  protected double computeStdDev(Instance inst, Matrix k) throws Exception {

    double kappa = m_kernel.eval(-1, -1, inst) + m_deltaSquared;

    double s = 0;
    int n = m_L.length;
    for (int i = 0; i < n; i++) {
      double t = 0;
      for (int j = 0; j < n; j++) {
        t -= k.get(j, 0) * (i > j ? m_L[i][j] : m_L[j][i]);
      }
      s += t * k.get(i, 0);
    }

    double sigma = m_delta;
    if (kappa > s) {
      sigma = Math.sqrt(kappa - s);
    }

    return sigma;
  }

  /**
   * Computes a prediction interval for the given instance and confidence level.
   * 
   * @param inst the instance to make the prediction for
   * @param confidenceLevel the percentage of cases the interval should cover
   * @return a 1*2 array that contains the boundaries of the interval
   * @throws Exception if interval could not be estimated successfully
   */
  @Override
  public double[][] predictIntervals(Instance inst, double confidenceLevel)
    throws Exception {

    inst = filterInstance(inst);

    // Build K vector (and Kappa)
    Matrix k = new Matrix(m_NumTrain, 1);
    for (int i = 0; i < m_NumTrain; i++) {
      k.set(i, 0, m_kernel.eval(-1, i, inst));
    }

    double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target;

    double sigma = computeStdDev(inst, k);

    confidenceLevel = 1.0 - ((1.0 - confidenceLevel) / 2.0);

    double z = Statistics.normalInverse(confidenceLevel);

    double[][] interval = new double[1][2];

    interval[0][0] = estimate - z * sigma;
    interval[0][1] = estimate + z * sigma;

    interval[0][0] = (interval[0][0] - m_Blin) / m_Alin;
    interval[0][1] = (interval[0][1] - m_Blin) / m_Alin;

    return interval;

  }

  /**
   * Gives standard deviation of the prediction at the given instance.
   * 
   * @param inst the instance to get the standard deviation for
   * @return the standard deviation
   * @throws Exception if computation fails
   */
  public double getStandardDeviation(Instance inst) throws Exception {

    inst = filterInstance(inst);

    // Build K vector (and Kappa)
    Matrix k = new Matrix(m_NumTrain, 1);
    for (int i = 0; i < m_NumTrain; i++) {
      k.set(i, 0, m_kernel.eval(-1, i, inst));
    }

    return computeStdDev(inst, k) / m_Alin;
  }

  /**
   * Returns natural logarithm of density estimate for given value based on
   * given instance.
   * 
   * @param instance the instance to make the prediction for.
   * @param value the value to make the prediction for.
   * @return the natural logarithm of the density estimate
   * @exception Exception if the density cannot be computed
   */
  @Override
  public double logDensity(Instance inst, double value) throws Exception {

    inst = filterInstance(inst);

    // Build K vector (and Kappa)
    Matrix k = new Matrix(m_NumTrain, 1);
    for (int i = 0; i < m_NumTrain; i++) {
      k.set(i, 0, m_kernel.eval(-1, i, inst));
    }

    double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target;

    double sigma = computeStdDev(inst, k);

    // transform to GP space
    value = value * m_Alin + m_Blin;
    // center around estimate
    value = value - estimate;
    double z = -Math.log(sigma * Math.sqrt(2 * Math.PI)) - value * value
      / (2.0 * sigma * sigma);

    return z + Math.log(m_Alin);
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration