All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.stat.distribution.KernelDensity Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see .
 ******************************************************************************/

package smile.stat.distribution;

import java.util.Arrays;
import smile.math.MathEx;

/**
 * Kernel density estimation is a non-parametric way of estimating the
 * probability density function of a random variable. Kernel density estimation
 * is a fundamental data smoothing problem where inferences about the population
 * are made, based on a finite data sample. It is also known as the
 * Parzen window method.
 * 
 * @author Haifeng Li
 */
public class KernelDensity implements Distribution {
    private static final long serialVersionUID = 2L;

    /**
     * The samples to estimate the density function.
     */
    private double[] x;
    /**
     * The kernel -- a symmetric but not necessarily positive function that
     * integrates to one. Here we just Gaussian density function.
     */
    private GaussianDistribution gaussian;
    /*
     * h > 0 is a smoothing parameter called the bandwidth.
     */
    private double h;
    /**
     * The mean value.
     */
    private double mean;
    /**
     * The standard deviation.
     */
    private double sd;
    /**
     * The variance.
     */
    private double variance;

    /**
     * Constructor. The bandwidth of kernel will be estimated by the rule of thumb.
     * @param x the samples to estimate the density function.
     */
    public KernelDensity(double[] x) {
        this.x = x;
        this.mean = MathEx.mean(x);
        this.variance = MathEx.var(x);
        this.sd = Math.sqrt(variance);

        Arrays.sort(x);

        int n = x.length;
        double iqr = x[n*3/4] - x[n/4];
        h = 1.06 * Math.min(sd, iqr/1.34) / Math.pow(x.length, 0.2);
        gaussian = new GaussianDistribution(0, h);
    }

    /**
     * Constructor.
     * @param x the samples to estimate the density function.
     * @param h a bandwidth parameter for smoothing.
     */
    public KernelDensity(double[] x, double h) {
        if (h <= 0) {
            throw new IllegalArgumentException("Invalid bandwidth: " + h);
        }

        this.x = x;
        this.h = h;
        this.mean = MathEx.mean(x);
        this.variance = MathEx.var(x);
        this.sd = Math.sqrt(variance);
        gaussian = new GaussianDistribution(0, h);

        Arrays.sort(x);
    }

    /**
     * Returns the bandwidth of kernel.
     * @return the bandwidth of kernel
     */
    public double bandwidth() {
        return h;
    }

    @Override
    public int length() {
        return 0;
    }

    @Override
    public double mean() {
        return mean;
    }

    @Override
    public double variance() {
        return variance;
    }

    @Override
    public double sd() {
        return sd;
    }

    /**
     * Shannon entropy. Not supported.
     */
    @Override
    public double entropy() {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * Random number generator. Not supported.
     */
    @Override
    public double rand() {
        throw new UnsupportedOperationException("Not supported.");
    }

    @Override
    public double p(double x) {
        int start = Arrays.binarySearch(this.x, x-5*h);
        if (start < 0) {
            start = -start - 1;
        }

        int end = Arrays.binarySearch(this.x, x+5*h);
        if (end < 0) {
            end = -end - 1;
        }

        double p = 0.0;
        for (int i = start; i < end; i++) {
            p += gaussian.p(this.x[i] - x);
        }

        return p / this.x.length;
    }

    @Override
    public double logp(double x) {
        return Math.log(p(x));
    }

    /**
     * Cumulative distribution function. Not supported.
     */
    @Override
    public double cdf(double x) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * Inverse of CDF. Not supported.
     */
    @Override
    public double quantile(double p) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * The likelihood of the samples. Not supported.
     */
    @Override
    public double likelihood(double[] x) {
        throw new UnsupportedOperationException("Not supported.");
    }

    /**
     * The log likelihood of the samples. Not supported.
     */
    @Override
    public double logLikelihood(double[] x) {
        throw new UnsupportedOperationException("Not supported.");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy