All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.stat.distribution.KernelDensity Maven / Gradle / Ivy

There is a newer version: 2.6.0
Show newest version
/******************************************************************************
 *                   Confidential Proprietary                                 *
 *         (c) Copyright Haifeng Li 2011, All Rights Reserved                 *
 ******************************************************************************/

package smile.stat.distribution;

import java.util.Arrays;
import smile.math.Math;

/**
 * Kernel density estimation is a non-parametric way of estimating the
 * probability density function of a random variable. Kernel density estimation
 * is a fundamental data smoothing problem where inferences about the population
 * are made, based on a finite data sample. It is also known as the
 * Parzen window method.
 * 
 * @author Haifeng Li
 */
public class KernelDensity implements Distribution {

    /**
     * The samples to estimate the density function.
     */
    private double[] x;
    /**
     * The kernel -- a symmetric but not necessarily positive function that
     * integrates to one. Here we just Gaussian density function.
     */
    private GaussianDistribution gaussian;
    /**
     * h > 0 is a smoothing parameter called the bandwidth.
     */
    private double h;
    /**
     * The mean value.
     */
    private double mean;
    /**
     * The standard deviation.
     */
    private double sd;
    /**
     * The variance.
     */
    private double var;

    /**
     * Constructor. The bandwidth of kernel will be estimated by the rule of thumb.
     * @param x the samples to estimate the density function.
     */
    public KernelDensity(double[] x) {
        this.x = x;
        this.mean = Math.mean(x);
        this.var = Math.var(x);
        this.sd = Math.sqrt(var);

        Arrays.sort(x);

        int n = x.length;
        double iqr = x[n*3/4] - x[n/4];
        h = 1.06 * Math.min(sd, iqr/1.34) / Math.pow(x.length, 0.2);
        gaussian = new GaussianDistribution(0, h);
    }

    /**
     * Constructor.
     * @param x the samples to estimate the density function.
     * @param h a bandwidth parameter for smoothing.
     */
    public KernelDensity(double[] x, double h) {
        if (h <= 0) {
            throw new IllegalArgumentException("Invalid bandwidth: " + h);
        }

        this.x = x;
        this.h = h;
        this.mean = Math.mean(x);
        this.var = Math.var(x);
        this.sd = Math.sqrt(var);
        gaussian = new GaussianDistribution(0, h);

        Arrays.sort(x);
    }

    /**
     * Returns the bandwidth of kernel.
     * @return the bandwidth of kernel
     */
    public double bandwidth() {
        return h;
    }

    @Override
    public int npara() {
        return 0;
    }

    @Override
    public double mean() {
        return mean;
    }

    @Override
    public double var() {
        return var;
    }

    @Override
    public double sd() {
        return sd;
    }

    /**
     * Shannon entropy. Not supported.
     */
    @Override
    public double entropy() {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * Random number generator. Not supported.
     */
    @Override
    public double rand() {
        throw new UnsupportedOperationException("Not supported.");
    }

    @Override
    public double p(double x) {
        int start = Arrays.binarySearch(this.x, x-5*h);
        if (start < 0) {
            start = -start - 1;
        }

        int end = Arrays.binarySearch(this.x, x+5*h);
        if (end < 0) {
            end = -end - 1;
        }

        double p = 0.0;
        for (int i = start; i < end; i++) {
            p += gaussian.p(this.x[i] - x);
        }

        return p / this.x.length;
    }

    @Override
    public double logp(double x) {
        return Math.log(p(x));
    }

    /**
     * Cumulative distribution function. Not supported.
     */
    @Override
    public double cdf(double x) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * Inverse of CDF. Not supported.
     */
    @Override
    public double quantile(double p) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * The likelihood of the samples. Not supported.
     */
    @Override
    public double likelihood(double[] x) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * The log likelihood of the samples. Not supported.
     */
    @Override
    public double logLikelihood(double[] x) {
        throw new UnsupportedOperationException("Not supported.");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy