smile.stat.distribution.KernelDensity Maven / Gradle / Ivy
/******************************************************************************
* Confidential Proprietary *
* (c) Copyright Haifeng Li 2011, All Rights Reserved *
******************************************************************************/
package smile.stat.distribution;
import java.util.Arrays;
import smile.math.Math;
/**
* Kernel density estimation is a non-parametric way of estimating the
* probability density function of a random variable. Kernel density estimation
* is a fundamental data smoothing problem where inferences about the population
* are made, based on a finite data sample. It is also known as the
* Parzen window method.
*
* @author Haifeng Li
*/
public class KernelDensity implements Distribution {
/**
* The samples to estimate the density function.
*/
private double[] x;
/**
* The kernel -- a symmetric but not necessarily positive function that
* integrates to one. Here we just Gaussian density function.
*/
private GaussianDistribution gaussian;
/**
* h > 0 is a smoothing parameter called the bandwidth.
*/
private double h;
/**
* The mean value.
*/
private double mean;
/**
* The standard deviation.
*/
private double sd;
/**
* The variance.
*/
private double var;
/**
* Constructor. The bandwidth of kernel will be estimated by the rule of thumb.
* @param x the samples to estimate the density function.
*/
public KernelDensity(double[] x) {
this.x = x;
this.mean = Math.mean(x);
this.var = Math.var(x);
this.sd = Math.sqrt(var);
Arrays.sort(x);
int n = x.length;
double iqr = x[n*3/4] - x[n/4];
h = 1.06 * Math.min(sd, iqr/1.34) / Math.pow(x.length, 0.2);
gaussian = new GaussianDistribution(0, h);
}
/**
* Constructor.
* @param x the samples to estimate the density function.
* @param h a bandwidth parameter for smoothing.
*/
public KernelDensity(double[] x, double h) {
if (h <= 0) {
throw new IllegalArgumentException("Invalid bandwidth: " + h);
}
this.x = x;
this.h = h;
this.mean = Math.mean(x);
this.var = Math.var(x);
this.sd = Math.sqrt(var);
gaussian = new GaussianDistribution(0, h);
Arrays.sort(x);
}
/**
* Returns the bandwidth of kernel.
* @return the bandwidth of kernel
*/
public double bandwidth() {
return h;
}
@Override
public int npara() {
return 0;
}
@Override
public double mean() {
return mean;
}
@Override
public double var() {
return var;
}
@Override
public double sd() {
return sd;
}
/**
* Shannon entropy. Not supported.
*/
@Override
public double entropy() {
throw new UnsupportedOperationException("Not supported.");
}
/**
* Random number generator. Not supported.
*/
@Override
public double rand() {
throw new UnsupportedOperationException("Not supported.");
}
@Override
public double p(double x) {
int start = Arrays.binarySearch(this.x, x-5*h);
if (start < 0) {
start = -start - 1;
}
int end = Arrays.binarySearch(this.x, x+5*h);
if (end < 0) {
end = -end - 1;
}
double p = 0.0;
for (int i = start; i < end; i++) {
p += gaussian.p(this.x[i] - x);
}
return p / this.x.length;
}
@Override
public double logp(double x) {
return Math.log(p(x));
}
/**
* Cumulative distribution function. Not supported.
*/
@Override
public double cdf(double x) {
throw new UnsupportedOperationException("Not supported.");
}
/**
* Inverse of CDF. Not supported.
*/
@Override
public double quantile(double p) {
throw new UnsupportedOperationException("Not supported.");
}
/**
* The likelihood of the samples. Not supported.
*/
@Override
public double likelihood(double[] x) {
throw new UnsupportedOperationException("Not supported.");
}
/**
* The log likelihood of the samples. Not supported.
*/
@Override
public double logLikelihood(double[] x) {
throw new UnsupportedOperationException("Not supported.");
}
}