smile.math.kernel.MercerKernel Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of smile-base Show documentation
smile-base
There is a newer version: 4.2.0
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */
package smile.math.kernel;

import java.io.Serializable;
import java.util.Locale;
import java.util.function.ToDoubleBiFunction;
import java.util.regex.Matcher;
import java.util.stream.IntStream;
import smile.math.blas.UPLO;
import smile.math.matrix.Matrix;
import smile.util.SparseArray;

/**
 * Mercer kernel, also called covariance function in Gaussian process.
 * A kernel is a continuous function that takes two variables x and y and
 * map them to a real value such that {@code k(x,y) = k(y,x)}.
 * A Mercer kernel is a kernel that is positive Semi-definite. When a kernel
 * is positive semi-definite, one may exploit the kernel trick, the idea of
 * implicitly mapping data to a high-dimensional feature space where some
 * linear algorithm is applied that works exclusively with inner products.
 * Assume we have some mapping Φ from an input space X to a feature space H,
 * then a kernel {@code k(u, v) = <Φ(u), Φ(v)>} may be used
 * to define the inner product in feature space H.
 * 
 * Positive definiteness in the context of kernel functions also implies that
 * a kernel matrix created using a particular kernel is positive semi-definite.
 * A matrix is positive semi-definite if its associated eigenvalues are non-negative.
 * 

 * We can combine or modify existing kernel functions to make new one.
 * For example, the sum of two kernels is a kernel. The product of two kernels
 * is also a kernel.
 * 
 * A stationary covariance function is a function of distance x − y.
 * Thus, it is invariant stationarity to translations in the input space.
 * If further the covariance function is a function only of |x − y|
 * then it is called isotropic; it is thus invariant to all rigid motions.
 * If a covariance function depends only on the dot product of x and y,
 * we call it a dot product covariance function.
 *
 * @param  the input type of kernel function.
 *
 * @author Haifeng Li
 */
public interface MercerKernel extends ToDoubleBiFunction, Serializable {

    /**
     * Kernel function.
     * @param x an object.
     * @param y an object.
     * @return the kernel value.
     */
    double k(T x, T y);

    /**
     * Computes the kernel and its gradient over hyperparameters.
     * @param x an object.
     * @param y an object.
     * @return the kernel value and gradient.
     */
    double[] kg(T x, T y);

    /**
     * Kernel function.
     * This is simply for Scala convenience.
     * @param x an object.
     * @param y an object.
     * @return the kernel value.
     */
    default double apply(T x, T y) {
        return k(x, y);
    }

    @Override
    default double applyAsDouble(T x, T y) {
        return k(x, y);
    }

    /**
     * Computes the kernel and gradient matrices.
     *
     * @param x objects.
     * @return the kernel and gradient matrices.
     */
    default Matrix[] KG(T[] x) {
        int n = x.length;
        int m = lo().length;
        Matrix[] K = new Matrix[m + 1];
        for (int i = 0; i <= m; i++) {
            K[i] = new Matrix(n, n);
            K[i].uplo(UPLO.LOWER);
        }

        IntStream.range(0, n).parallel().forEach(j -> {
            T xj = x[j];
            for (int i = 0; i < n; i++) {
                double[] kg = kg(x[i], xj);
                for (int l = 0; l <= m; l++) {
                    K[l].set(i, j, kg[l]);
                }
            }
        });

        return K;
    }

    /**
     * Computes the kernel matrix.
     *
     * @param x objects.
     * @return the kernel matrix.
     */
    default Matrix K(T[] x) {
        int n = x.length;
        Matrix K = new Matrix(n, n);
        IntStream.range(0, n).parallel().forEach(j -> {
            T xj = x[j];
            for (int i = 0; i < n; i++) {
                K.set(i, j, k(x[i], xj));
            }
        });

        K.uplo(UPLO.LOWER);
        return K;
    }

    /**
     * Returns the kernel matrix.
     *
     * @param x objects.
     * @param y objects.
     * @return the kernel matrix.
     */
    default Matrix K(T[] x, T[] y) {
        int m = x.length;
        int n = y.length;
        Matrix K = new Matrix(m, n);
        IntStream.range(0, n).parallel().forEach(j -> {
            T yj = y[j];
            for (int i = 0; i < m; i++) {
                K.set(i, j, k(x[i], yj));
            }
        });

        return K;
    }

    /**
     * Returns the same kind kernel with the new hyperparameters.
     * @param params the hyperparameters.
     * @return the same kind kernel with the new hyperparameters.
     */
    MercerKernel of(double[] params);

    /**
     * Returns the hyperparameters of kernel.
     * @return the hyperparameters of kernel.
     */
    double[] hyperparameters();

    /**
     * Returns the lower bound of hyperparameters (in hyperparameter tuning).
     * @return the lower bound of hyperparameters.
     */
    double[] lo();

    /**
     * Returns the upper bound of hyperparameters (in hyperparameter tuning).
     * @return the upper bound of hyperparameters.
     */
    double[] hi();

    /**
     * Returns a kernel function.
     * @param kernel the kernel function string representation.
     * @return the kernel function.
     */
    static MercerKernel of(String kernel) {
        kernel = kernel.trim().toLowerCase(Locale.ROOT);

        Matcher m = KernelPatterns.linear.matcher(kernel);
        if (m.matches()) {
            return new LinearKernel();
        }

        m = KernelPatterns.polynomial.matcher(kernel);
        if (m.matches()) {
            int degree = Integer.parseInt(m.group(1));
            double scale = Double.parseDouble(m.group(2));
            double offset = Double.parseDouble(m.group(3));
            return new PolynomialKernel(degree, scale, offset);
        }

        m = KernelPatterns.gaussian.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new GaussianKernel(sigma);
        }

        m = KernelPatterns.matern.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            double nu = Double.parseDouble(m.group(2));
            return new MaternKernel(sigma, nu);
        }

        m = KernelPatterns.laplacian.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            return new LaplacianKernel(scale);
        }

        m = KernelPatterns.tanh.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            double offset = Double.parseDouble(m.group(2));
            return new HyperbolicTangentKernel(scale, offset);
        }

        m = KernelPatterns.thinPlateSpline.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new ThinPlateSplineKernel(sigma);
        }

        m = KernelPatterns.pearson.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            double omega = Double.parseDouble(m.group(2));
            return new PearsonKernel(sigma, omega);
        }

        m = KernelPatterns.hellinger.matcher(kernel);
        if (m.matches()) {
            return new HellingerKernel();
        }

        throw new IllegalArgumentException("Unknown kernel: " + kernel);
    }

    /**
     * Returns a sparse kernel function.
     * @param kernel the kernel function string representation.
     * @return the kernel function.
     */
    static MercerKernel sparse(String kernel) {
        kernel = kernel.trim();

        Matcher m = KernelPatterns.linear.matcher(kernel);
        if (m.matches()) {
            return new SparseLinearKernel();
        }

        m = KernelPatterns.polynomial.matcher(kernel);
        if (m.matches()) {
            int degree = Integer.parseInt(m.group(1));
            double scale = Double.parseDouble(m.group(2));
            double offset = Double.parseDouble(m.group(3));
            return new SparsePolynomialKernel(degree, scale, offset);
        }

        m = KernelPatterns.gaussian.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new SparseGaussianKernel(sigma);
        }

        m = KernelPatterns.matern.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            double nu = Double.parseDouble(m.group(2));
            return new SparseMaternKernel(sigma, nu);
        }

        m = KernelPatterns.laplacian.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            return new SparseLaplacianKernel(scale);
        }

        m = KernelPatterns.tanh.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            double offset = Double.parseDouble(m.group(2));
            return new SparseHyperbolicTangentKernel(scale, offset);
        }

        m = KernelPatterns.thinPlateSpline.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new SparseThinPlateSplineKernel(sigma);
        }

        throw new IllegalArgumentException("Unknown kernel: " + kernel);
    }

    /**
     * Returns a binary sparse kernel function.
     * @param kernel the kernel function string representation.
     * @return the kernel function.
     */
    static MercerKernel binary(String kernel) {
        kernel = kernel.trim();

        Matcher m = KernelPatterns.linear.matcher(kernel);
        if (m.matches()) {
            return new BinarySparseLinearKernel();
        }

        m = KernelPatterns.polynomial.matcher(kernel);
        if (m.matches()) {
            int degree = Integer.parseInt(m.group(1));
            double scale = Double.parseDouble(m.group(2));
            double offset = Double.parseDouble(m.group(3));
            return new BinarySparsePolynomialKernel(degree, scale, offset);
        }

        m = KernelPatterns.gaussian.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new BinarySparseGaussianKernel(sigma);
        }

        m = KernelPatterns.matern.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            double nu = Double.parseDouble(m.group(2));
            return new BinarySparseMaternKernel(sigma, nu);
        }

        m = KernelPatterns.laplacian.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            return new BinarySparseLaplacianKernel(scale);
        }

        m = KernelPatterns.tanh.matcher(kernel);
        if (m.matches()) {
            double scale = Double.parseDouble(m.group(1));
            double offset = Double.parseDouble(m.group(2));
            return new BinarySparseHyperbolicTangentKernel(scale, offset);
        }

        m = KernelPatterns.thinPlateSpline.matcher(kernel);
        if (m.matches()) {
            double sigma = Double.parseDouble(m.group(1));
            return new BinarySparseThinPlateSplineKernel(sigma);
        }

        throw new IllegalArgumentException("Unknown kernel: " + kernel);
    }
}