org.apache.commons.statistics.distribution.ParetoDistribution Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.statistics.distribution;

import java.util.function.DoubleUnaryOperator;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.InverseTransformParetoSampler;

/**
 * Implementation of the Pareto (Type I) distribution.
 *
 * The probability density function of \( X \) is:
 *
 * 
\[ f(x; k, \alpha) = \frac{\alpha  k^\alpha}{x^{\alpha + 1}} \]
 *
 * 
for \( k > 0 \),
 * \( \alpha > 0 \), and
 * \( x \in [k, \infty) \).
 *
 * 
\( k \) is a scale parameter: this is the minimum possible value of \( X \).
 * 
\( \alpha \) is a shape parameter: this is the Pareto index.
 *
 * @see  Pareto distribution (Wikipedia)
 * @see  Pareto distribution (MathWorld)
 */
public final class ParetoDistribution extends AbstractContinuousDistribution {
    /** The minimum value for the shape parameter when computing when computing the variance. */
    private static final double MIN_SHAPE_FOR_VARIANCE = 2.0;

    /** The scale parameter of this distribution. Also known as {@code k};
     * the minimum possible value for the random variable {@code X}. */
    private final double scale;
    /** The shape parameter of this distribution. */
    private final double shape;
    /** Implementation of PDF(x). Assumes that {@code x >= scale}. */
    private final DoubleUnaryOperator pdf;
    /** Implementation of log PDF(x). Assumes that {@code x >= scale}. */
    private final DoubleUnaryOperator logpdf;

    /**
     * @param scale Scale parameter (minimum possible value of X).
     * @param shape Shape parameter (Pareto index).
     */
    private ParetoDistribution(double scale,
                               double shape) {
        this.scale = scale;
        this.shape = shape;

        // The Pareto distribution approaches a Dirac delta function when shape -> inf.
        // Parameterisations can also lead to underflow in the standard computation.
        // Extract the PDF and CDF to specialized implementations to handle edge cases.

        // Pre-compute factors for the standard computation
        final double shapeByScalePowShape = shape * Math.pow(scale, shape);
        final double logShapePlusShapeByLogScale = Math.log(shape) + Math.log(scale) * shape;

        if (shapeByScalePowShape < Double.POSITIVE_INFINITY &&
            shapeByScalePowShape >= Double.MIN_NORMAL) {
            // Standard computation
            pdf = x -> shapeByScalePowShape / Math.pow(x, shape + 1);
            logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
        } else {
            // Standard computation overflow; underflow to sub-normal or zero; or nan (pow(1.0, inf))
            if (Double.isFinite(logShapePlusShapeByLogScale)) {
                // Log computation is valid
                logpdf = x -> logShapePlusShapeByLogScale - Math.log(x) * (shape + 1);
                pdf = x -> Math.exp(logpdf.applyAsDouble(x));
            } else  {
                // Assume Dirac function
                logpdf = x -> x > scale ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
                // PDF has infinite value at lower bound
                pdf = x -> x > scale ? 0 : Double.POSITIVE_INFINITY;
            }
        }
    }

    /**
     * Creates a Pareto distribution.
     *
     * @param scale Scale parameter (minimum possible value of X).
     * @param shape Shape parameter (Pareto index).
     * @return the distribution
     * @throws IllegalArgumentException if {@code scale <= 0}, {@code scale} is
     * infinite, or {@code shape <= 0}.
     */
    public static ParetoDistribution of(double scale,
                                        double shape) {
        if (scale <= 0 || scale == Double.POSITIVE_INFINITY) {
            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE_FINITE, scale);
        }
        if (shape <= 0) {
            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, shape);
        }
        return new ParetoDistribution(scale, shape);
    }

    /**
     * Gets the scale parameter of this distribution.
     * This is the minimum possible value of X.
     *
     * @return the scale parameter.
     */
    public double getScale() {
        return scale;
    }

    /**
     * Gets the shape parameter of this distribution.
     * This is the Pareto index.
     *
     * @return the shape parameter.
     */
    public double getShape() {
        return shape;
    }

    /**
     * {@inheritDoc}
     *
     * 
For scale parameter \( k \) and shape parameter \( \alpha \), the PDF is:
     *
     * 
\[ f(x; k, \alpha) = \begin{cases}
     *       0                                       & \text{for } x \lt k \\
     *       \frac{\alpha  k^\alpha}{x^{\alpha + 1}} & \text{for } x \ge k
     *       \end{cases} \]
     */
    @Override
    public double density(double x) {
        if (x < scale) {
            return 0;
        }
        return pdf.applyAsDouble(x);
    }

    /** {@inheritDoc}
     *
     * 
See documentation of {@link #density(double)} for computation details.
     */
    @Override
    public double logDensity(double x) {
        if (x < scale) {
            return Double.NEGATIVE_INFINITY;
        }
        return logpdf.applyAsDouble(x);
    }

    /**
     * {@inheritDoc}
     *
     * 
For scale parameter \( k \) and shape parameter \( \alpha \), the CDF is:
     *
     * 
\[ F(x; k, \alpha) = \begin{cases}
     *       0                                     & \text{for } x \le k \\
     *       1 - \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k
     *       \end{cases} \]
     */
    @Override
    public double cumulativeProbability(double x)  {
        if (x <= scale) {
            return 0;
        }
        // Increase accuracy for CDF close to 0 by using a log calculation:
        // 1 - exp(α * ln(k / x)) == -(exp(α * ln(k / x)) - 1)
        return -Math.expm1(shape * Math.log(scale / x));
    }

    /**
     * {@inheritDoc}
     *
     * 
For scale parameter \( k \) and shape parameter \( \alpha \), the survival function is:
     *
     * 
\[ S(x; k, \alpha) = \begin{cases}
     *       1                                 & \text{for } x \le k \\
     *       \left( \frac{k}{x} \right)^\alpha & \text{for } x \gt k
     *       \end{cases} \]
     */
    @Override
    public double survivalProbability(double x)  {
        if (x <= scale) {
            return 1;
        }
        return Math.pow(scale / x, shape);
    }

    /** {@inheritDoc} */
    @Override
    public double inverseCumulativeProbability(double p) {
        ArgumentUtils.checkProbability(p);
        if (p == 0) {
            return getSupportLowerBound();
        }
        if (p == 1) {
            return getSupportUpperBound();
        }
        return scale / Math.exp(Math.log1p(-p) / shape);
    }

    /** {@inheritDoc} */
    @Override
    public double inverseSurvivalProbability(double p) {
        ArgumentUtils.checkProbability(p);
        if (p == 1) {
            return getSupportLowerBound();
        }
        if (p == 0) {
            return getSupportUpperBound();
        }
        return scale / Math.pow(p, 1 / shape);
    }

    /**
     * {@inheritDoc}
     *
     * 
For scale parameter \( k \) and shape parameter \( \alpha \), the mean is:
     *
     * 
\[ \mathbb{E}[X] = \begin{cases}
     *       \infty                      & \text{for } \alpha \le 1 \\
     *       \frac{k \alpha}{(\alpha-1)} & \text{for } \alpha \gt 1
     *       \end{cases} \]
     */
    @Override
    public double getMean() {
        if (shape <= 1) {
            return Double.POSITIVE_INFINITY;
        }
        if (shape == Double.POSITIVE_INFINITY) {
            return scale;
        }
        return scale * (shape / (shape - 1));
    }

    /**
     * {@inheritDoc}
     *
     * 
For scale parameter \( k \) and shape parameter \( \alpha \), the variance is:
     *
     * 
\[ \operatorname{var}[X] = \begin{cases}
     *       \infty                                     & \text{for } \alpha \le 2 \\
     *       \frac{k^2 \alpha}{(\alpha-1)^2 (\alpha-2)} & \text{for } \alpha \gt 2
     *       \end{cases} \]
     */
    @Override
    public double getVariance() {
        if (shape <= MIN_SHAPE_FOR_VARIANCE) {
            return Double.POSITIVE_INFINITY;
        }
        if (shape == Double.POSITIVE_INFINITY) {
            return 0;
        }
        final double s = shape - 1;
        final double z = shape / s / s / (shape - 2);
        // Avoid intermediate overflow of scale^2 if z is small
        return z < 1 ? z * scale * scale : scale * scale * z;
    }

    /**
     * {@inheritDoc}
     * 

     * The lower bound of the support is equal to the scale parameter {@code k}.
     *
     * @return scale.
     */
    @Override
    public double getSupportLowerBound() {
        return getScale();
    }

    /**
     * {@inheritDoc}
     * 
     * The upper bound of the support is always positive infinity.
     *
     * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}.
     */
    @Override
    public double getSupportUpperBound() {
        return Double.POSITIVE_INFINITY;
    }

    /** {@inheritDoc} */
    @Override
    public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
        // Pareto distribution sampler
        return InverseTransformParetoSampler.of(rng, scale, shape)::sample;
    }
}