
smile.math.kernel.MercerKernel Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Smile. If not, see .
******************************************************************************/
package smile.math.kernel;
import java.io.Serializable;
import java.util.function.ToDoubleBiFunction;
import java.util.stream.IntStream;
import smile.math.blas.UPLO;
import smile.math.matrix.Matrix;
/**
* Mercer kernel, also called covariance function in Gaussian process.
* A kernel is a continuous function that takes two variables x and y and
* map them to a real value such that k(x,y) = k(y,x)
.
* A Mercer kernel is a kernel that is positive Semi-definite. When a kernel
* is positive semi-definite, one may exploit the kernel trick, the idea of
* implicitly mapping data to a high-dimensional feature space where some
* linear algorithm is applied that works exclusively with inner products.
* Assume we have some mapping Φ from an input space X to a feature space H,
* then a kernel k(u, v) = <Φ(u), Φ(v)>
may be used
* to define the inner product in feature space H.
*
* Positive definiteness in the context of kernel functions also implies that
* a kernel matrix created using a particular kernel is positive semi-definite.
* A matrix is positive semi-definite if its associated eigenvalues are nonnegative.
*
* We can combine or modify existing kernel functions to make new one.
* For example, the sum of two kernels is a kernel. The product of two kernels
* is also a kernel.
*
* A stationary covariance function is a function of distance x − y
.
* Thus it is invariant stationarity to translations in the input space.
* If further the covariance function is a function only of |x − y|
* then it is called isotropic; it is thus invariant to all rigid motions.
* If a covariance function depends only on the dot product of x and y,
* we call it a dot product covariance function.
*
* @author Haifeng Li
*/
public interface MercerKernel extends ToDoubleBiFunction, Serializable {
/**
* Kernel function.
*/
double k(T x, T y);
/**
* Computes the kernel and its gradient over hyperparameters.
*/
double[] kg(T x, T y);
/**
* Kernel function.
* This is simply for Scala convenience.
*/
default double apply(T x, T y) {
return k(x, y);
}
@Override
default double applyAsDouble(T x, T y) {
return k(x, y);
}
/**
* Computes the kernel and gradient matrices.
*
* @param x samples.
* @return the kernel and gradient matrices.
*/
default Matrix[] KG(T[] x) {
int n = x.length;
int m = lo().length;
Matrix[] K = new Matrix[m + 1];
for (int i = 0; i <= m; i++) {
K[i] = new Matrix(n, n);
K[i].uplo(UPLO.LOWER);
}
IntStream.range(0, n).parallel().forEach(j -> {
T xj = x[j];
for (int i = 0; i < n; i++) {
double[] kg = kg(x[i], xj);
for (int l = 0; l <= m; l++) {
K[l].set(i, j, kg[l]);
}
}
});
return K;
}
/**
* Computes the kernel matrix.
*
* @param x samples.
* @return the kernel matrix.
*/
default Matrix K(T[] x) {
int n = x.length;
Matrix K = new Matrix(n, n);
IntStream.range(0, n).parallel().forEach(j -> {
T xj = x[j];
for (int i = 0; i < n; i++) {
K.set(i, j, k(x[i], xj));
}
});
K.uplo(UPLO.LOWER);
return K;
}
/**
* Returns the kernel matrix.
*
* @param x samples.
* @param y samples.
* @return the kernel matrix.
*/
default Matrix K(T[] x, T[] y) {
int m = x.length;
int n = y.length;
Matrix K = new Matrix(m, n);
IntStream.range(0, n).parallel().forEach(j -> {
T yj = y[j];
for (int i = 0; i < m; i++) {
K.set(i, j, k(x[i], yj));
}
});
return K;
}
/** Returns the same kind kernel with the new hyperparameters. */
MercerKernel of(double[] params);
/** Returns the hyperparameters for tuning. */
double[] hyperparameters();
/** Returns the lower bound of hyperparameters. */
double[] lo();
/** Returns the upper bound of hyperparameters. */
double[] hi();
}