All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.functions.GaussianProcesses Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    GaussianProcesses.java
 *    Copyright (C) 2005-2012 University of Waikato
 */

package weka.classifiers.functions;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.ConditionalDensityEstimator;
import weka.classifiers.IntervalEstimator;
import weka.classifiers.functions.supportVector.CachedKernel;
import weka.classifiers.functions.supportVector.Kernel;
import weka.classifiers.functions.supportVector.PolyKernel;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Statistics;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.matrix.Matrix;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Standardize;

/**
 *  Implements Gaussian processes for regression
 * without hyperparameter-tuning. To make choosing an appropriate noise level
 * easier, this implementation applies normalization/standardization to the
 * target attribute as well (if normalization/standardizaton is turned on).
 * Missing values are replaced by the global mean/mode. Nominal attributes are
 * converted to binary ones. 
 * 
 *  BibTeX:
 * 
 * 
 *        @misc{Mackay1998,
 *          address = {Dept. of Physics, Cambridge University, UK},
 *          author = {David J.C. Mackay},
 *          title = {Introduction to Gaussian Processes},
 *          year = {1998},
 *          PS = {http://wol.ra.phy.cam.ac.uk/mackay/gpB.ps.gz}
 *       }
 * 
* *

* * * Valid options are: *

* *

 *       -D
 *        If set, classifier is run in debug mode and
 *        may output additional info to the console
 * 
* *
 *       -L <double>
 *        Level of Gaussian Noise. (default 0.1)
 * 
* *
 *       -N
 *        Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)
 * 
* *
 *       -K <classname and parameters>
 *        The Kernel to use.
 *        (default: weka.classifiers.functions.supportVector.PolyKernel)
 * 
* *
 * 
 *       Options specific to kernel weka.classifiers.functions.supportVector.RBFKernel:
 * 
* *
 *       -D
 *        Enables debugging output (if available) to be printed.
 *        (default: off)
 * 
* *
 *       -no-checks
 *        Turns off all checks - use with caution!
 *        (default: checks on)
 * 
* *
 *       -C <num>
 *        The size of the cache (a prime number).
 *        (default: 250007)
 * 
* *
 *       -G <num>
 *        The Gamma parameter.
 *        (default: 0.01)
 * 
* * * * @author Kurt Driessens ([email protected]) * @author Remco Bouckaert ([email protected]) * @version $Revision: 10381 $ */ public class GaussianProcesses extends AbstractClassifier implements OptionHandler, IntervalEstimator, ConditionalDensityEstimator, TechnicalInformationHandler, WeightedInstancesHandler { /** for serialization */ static final long serialVersionUID = -8620066949967678545L; /** The filter used to make attributes numeric. */ protected NominalToBinary m_NominalToBinary; /** normalizes the data */ public static final int FILTER_NORMALIZE = 0; /** standardizes the data */ public static final int FILTER_STANDARDIZE = 1; /** no filter */ public static final int FILTER_NONE = 2; /** The filter to apply to the training data */ public static final Tag[] TAGS_FILTER = { new Tag(FILTER_NORMALIZE, "Normalize training data"), new Tag(FILTER_STANDARDIZE, "Standardize training data"), new Tag(FILTER_NONE, "No normalization/standardization"), }; /** The filter used to standardize/normalize all values. */ protected Filter m_Filter = null; /** Whether to normalize/standardize/neither */ protected int m_filterType = FILTER_NORMALIZE; /** The filter used to get rid of missing values. */ protected ReplaceMissingValues m_Missing; /** * Turn off all checks and conversions? Turning them off assumes that data is * purely numeric, doesn't contain any missing values, and has a numeric * class. */ protected boolean m_checksTurnedOff = false; /** Gaussian Noise Value. */ protected double m_delta = 1; /** The squared noise value. */ protected double m_deltaSquared = 1; /** * The parameters of the linear transformation realized by the filter on the * class attribute */ protected double m_Alin; protected double m_Blin; /** Kernel to use * */ protected Kernel m_kernel = new PolyKernel(); /** The number of training instances */ protected int m_NumTrain = 0; /** The training data. */ protected double m_avg_target; /** (negative) covariance matrix in symmetric matrix representation **/ public double[][] m_L; /** The vector of target values. */ protected Matrix m_t; /** * Returns a string describing classifier * * @return a description suitable for displaying in the explorer/experimenter * gui */ public String globalInfo() { return " Implements Gaussian processes for " + "regression without hyperparameter-tuning. To make choosing an " + "appropriate noise level easier, this implementation applies " + "normalization/standardization to the target attribute as well " + "as the other attributes (if " + " normalization/standardizaton is turned on). Missing values " + "are replaced by the global mean/mode. Nominal attributes are " + "converted to binary ones. Note that kernel caching is turned off " + "if the kernel used implements CachedKernel."; } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.MISC); result.setValue(Field.AUTHOR, "David J.C. Mackay"); result.setValue(Field.YEAR, "1998"); result.setValue(Field.TITLE, "Introduction to Gaussian Processes"); result .setValue(Field.ADDRESS, "Dept. of Physics, Cambridge University, UK"); result.setValue(Field.PS, "http://wol.ra.phy.cam.ac.uk/mackay/gpB.ps.gz"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ @Override public Capabilities getCapabilities() { Capabilities result = getKernel().getCapabilities(); result.setOwner(this); // attribute result.enableAllAttributeDependencies(); // with NominalToBinary we can also handle nominal attributes, but only // if the kernel can handle numeric attributes if (result.handles(Capability.NUMERIC_ATTRIBUTES)) { result.enable(Capability.NOMINAL_ATTRIBUTES); } result.enable(Capability.MISSING_VALUES); // class result.disableAllClasses(); result.disableAllClassDependencies(); result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Method for building the classifier. * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ @Override public void buildClassifier(Instances insts) throws Exception { /* check the set of training instances */ if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_NumTrain = insts.numInstances(); // determine which linear transformation has been // applied to the class by the filter if (m_Filter != null) { Instance witness = (Instance) insts.instance(0).copy(); witness.setValue(insts.classIndex(), 0); m_Filter.input(witness); m_Filter.batchFinished(); Instance res = m_Filter.output(); m_Blin = res.value(insts.classIndex()); witness.setValue(insts.classIndex(), 1); m_Filter.input(witness); m_Filter.batchFinished(); res = m_Filter.output(); m_Alin = res.value(insts.classIndex()) - m_Blin; } else { m_Alin = 1.0; m_Blin = 0.0; } // Initialize kernel try { CachedKernel cachedKernel = (CachedKernel) m_kernel; cachedKernel.setCacheSize(0); } catch (Exception e) { // ignore } m_kernel.buildKernel(insts); // Compute average target value double sum = 0.0; for (int i = 0; i < insts.numInstances(); i++) { sum += insts.instance(i).classValue(); } m_avg_target = sum / insts.numInstances(); // Store squared noise level m_deltaSquared = m_delta * m_delta; // initialize kernel matrix/covariance matrix int n = insts.numInstances(); m_L = new double[n][]; double kv = 0; for (int i = 0; i < n; i++) { m_L[i] = new double[i + 1]; for (int j = 0; j < i; j++) { kv = m_kernel.eval(i, j, insts.instance(i)); m_L[i][j] = kv; } kv = m_kernel.eval(i, i, insts.instance(i)); m_L[i][i] = kv + m_deltaSquared; } // Save memory (can't use Kernel.clean() because of polynominal kernel with // exponent 1) if (m_kernel instanceof CachedKernel) { m_kernel = Kernel.makeCopy(m_kernel); ((CachedKernel) m_kernel).setCacheSize(-1); m_kernel.buildKernel(insts); } // Calculate inverse matrix exploiting symmetry of covariance matrix // NB this replaces the kernel matrix with (the negative of) its inverse and // does // not require any extra memory for a solution matrix double[] tmprow = new double[n]; double tmp2 = 0, tmp = 0; for (int i = 0; i < n; i++) { tmp = -m_L[i][i]; m_L[i][i] = 1.0 / tmp; for (int j = 0; j < n; j++) { if (j != i) { if (j < i) { tmprow[j] = m_L[i][j]; m_L[i][j] /= tmp; tmp2 = m_L[i][j]; m_L[j][j] += tmp2 * tmp2 * tmp; } else if (j > i) { tmprow[j] = m_L[j][i]; m_L[j][i] /= tmp; tmp2 = m_L[j][i]; m_L[j][j] += tmp2 * tmp2 * tmp; } } } for (int j = 0; j < n; j++) { if (j != i) { if (i < j) { for (int k = 0; k < i; k++) { m_L[j][k] += tmprow[j] * m_L[i][k]; } } else { for (int k = 0; k < j; k++) { m_L[j][k] += tmprow[j] * m_L[i][k]; } } for (int k = i + 1; k < j; k++) { m_L[j][k] += tmprow[j] * m_L[k][i]; } } } } m_t = new Matrix(insts.numInstances(), 1); double[] tt = new double[n]; for (int i = 0; i < n; i++) { tt[i] = insts.instance(i).classValue() - m_avg_target; } // calculate m_t = tt . m_L for (int i = 0; i < n; i++) { double s = 0; for (int k = 0; k < i; k++) { s -= m_L[i][k] * tt[k]; } for (int k = i; k < n; k++) { s -= m_L[k][i] * tt[k]; } m_t.set(i, 0, s); } } // buildClassifier /** * Classifies a given instance. * * @param inst the instance to be classified * @return the classification * @throws Exception if instance could not be classified successfully */ @Override public double classifyInstance(Instance inst) throws Exception { // Filter instance inst = filterInstance(inst); // Build K vector Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double result = k.transpose().times(m_t).get(0, 0) + m_avg_target; result = (result - m_Blin) / m_Alin; return result; } /** * Filters an instance. */ protected Instance filterInstance(Instance inst) throws Exception { if (!m_checksTurnedOff) { m_Missing.input(inst); m_Missing.batchFinished(); inst = m_Missing.output(); } if (m_NominalToBinary != null) { m_NominalToBinary.input(inst); m_NominalToBinary.batchFinished(); inst = m_NominalToBinary.output(); } if (m_Filter != null) { m_Filter.input(inst); m_Filter.batchFinished(); inst = m_Filter.output(); } return inst; } /** * Computes standard deviation for given instance, without transforming target * back into original space. */ protected double computeStdDev(Instance inst, Matrix k) throws Exception { double kappa = m_kernel.eval(-1, -1, inst) + m_deltaSquared; double s = 0; int n = m_L.length; for (int i = 0; i < n; i++) { double t = 0; for (int j = 0; j < n; j++) { t -= k.get(j, 0) * (i > j ? m_L[i][j] : m_L[j][i]); } s += t * k.get(i, 0); } double sigma = m_delta; if (kappa > s) { sigma = Math.sqrt(kappa - s); } return sigma; } /** * Computes a prediction interval for the given instance and confidence level. * * @param inst the instance to make the prediction for * @param confidenceLevel the percentage of cases the interval should cover * @return a 1*2 array that contains the boundaries of the interval * @throws Exception if interval could not be estimated successfully */ @Override public double[][] predictIntervals(Instance inst, double confidenceLevel) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target; double sigma = computeStdDev(inst, k); confidenceLevel = 1.0 - ((1.0 - confidenceLevel) / 2.0); double z = Statistics.normalInverse(confidenceLevel); double[][] interval = new double[1][2]; interval[0][0] = estimate - z * sigma; interval[0][1] = estimate + z * sigma; interval[0][0] = (interval[0][0] - m_Blin) / m_Alin; interval[0][1] = (interval[0][1] - m_Blin) / m_Alin; return interval; } /** * Gives standard deviation of the prediction at the given instance. * * @param inst the instance to get the standard deviation for * @return the standard deviation * @throws Exception if computation fails */ public double getStandardDeviation(Instance inst) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } return computeStdDev(inst, k) / m_Alin; } /** * Returns natural logarithm of density estimate for given value based on * given instance. * * @param instance the instance to make the prediction for. * @param value the value to make the prediction for. * @return the natural logarithm of the density estimate * @exception Exception if the density cannot be computed */ @Override public double logDensity(Instance inst, double value) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target; double sigma = computeStdDev(inst, k); // transform to GP space value = value * m_Alin + m_Blin; // center around estimate value = value - estimate; double z = -Math.log(sigma * Math.sqrt(2 * Math.PI)) - value * value / (2.0 * sigma * sigma); return z + Math.log(m_Alin); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy