smile.math.kernel.MercerKernel Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of smile-math Show documentation
smile-math
The newest version!
/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see .
 ******************************************************************************/

package smile.math.kernel;

import java.io.Serializable;
import java.util.function.ToDoubleBiFunction;
import java.util.stream.IntStream;
import smile.math.blas.UPLO;
import smile.math.matrix.Matrix;

/**
 * Mercer kernel, also called covariance function in Gaussian process.
 * A kernel is a continuous function that takes two variables x and y and
 * map them to a real value such that k(x,y) = k(y,x).
 * A Mercer kernel is a kernel that is positive Semi-definite. When a kernel
 * is positive semi-definite, one may exploit the kernel trick, the idea of
 * implicitly mapping data to a high-dimensional feature space where some
 * linear algorithm is applied that works exclusively with inner products.
 * Assume we have some mapping Φ from an input space X to a feature space H,
 * then a kernel k(u, v) = <Φ(u), Φ(v)> may be used
 * to define the inner product in feature space H.
 * 
 * Positive definiteness in the context of kernel functions also implies that
 * a kernel matrix created using a particular kernel is positive semi-definite.
 * A matrix is positive semi-definite if its associated eigenvalues are nonnegative.
 * 

 * We can combine or modify existing kernel functions to make new one.
 * For example, the sum of two kernels is a kernel. The product of two kernels
 * is also a kernel.
 * 
 * A stationary covariance function is a function of distance x − y.
 * Thus it is invariant stationarity to translations in the input space.
 * If further the covariance function is a function only of |x − y|
 * then it is called isotropic; it is thus invariant to all rigid motions.
 * If a covariance function depends only on the dot product of x and y,
 * we call it a dot product covariance function.
 *
 * @author Haifeng Li
 */
public interface MercerKernel extends ToDoubleBiFunction, Serializable {

    /**
     * Kernel function.
     */
    double k(T x, T y);

    /**
     * Computes the kernel and its gradient over hyperparameters.
     */
    double[] kg(T x, T y);

    /**
     * Kernel function.
     * This is simply for Scala convenience.
     */
    default double apply(T x, T y) {
        return k(x, y);
    }

    @Override
    default double applyAsDouble(T x, T y) {
        return k(x, y);
    }

    /**
     * Computes the kernel and gradient matrices.
     *
     * @param x samples.
     * @return the kernel and gradient matrices.
     */
    default Matrix[] KG(T[] x) {
        int n = x.length;
        int m = lo().length;
        Matrix[] K = new Matrix[m + 1];
        for (int i = 0; i <= m; i++) {
            K[i] = new Matrix(n, n);
            K[i].uplo(UPLO.LOWER);
        }

        IntStream.range(0, n).parallel().forEach(j -> {
            T xj = x[j];
            for (int i = 0; i < n; i++) {
                double[] kg = kg(x[i], xj);
                for (int l = 0; l <= m; l++) {
                    K[l].set(i, j, kg[l]);
                }
            }
        });

        return K;
    }

    /**
     * Computes the kernel matrix.
     *
     * @param x samples.
     * @return the kernel matrix.
     */
    default Matrix K(T[] x) {
        int n = x.length;
        Matrix K = new Matrix(n, n);
        IntStream.range(0, n).parallel().forEach(j -> {
            T xj = x[j];
            for (int i = 0; i < n; i++) {
                K.set(i, j, k(x[i], xj));
            }
        });

        K.uplo(UPLO.LOWER);
        return K;
    }

    /**
     * Returns the kernel matrix.
     *
     * @param x samples.
     * @param y samples.
     * @return the kernel matrix.
     */
    default Matrix K(T[] x, T[] y) {
        int m = x.length;
        int n = y.length;
        Matrix K = new Matrix(m, n);
        IntStream.range(0, n).parallel().forEach(j -> {
            T yj = y[j];
            for (int i = 0; i < m; i++) {
                K.set(i, j, k(x[i], yj));
            }
        });

        return K;
    }

    /** Returns the same kind kernel with the new hyperparameters. */
    MercerKernel of(double[] params);

    /** Returns the hyperparameters for tuning. */
    double[] hyperparameters();

    /** Returns the lower bound of hyperparameters. */
    double[] lo();

    /** Returns the upper bound of hyperparameters. */
    double[] hi();
}