All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math3.distribution.ZipfDistribution Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math3.distribution;

import org.apache.commons.math3.exception.NotStrictlyPositiveException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;

/**
 * Implementation of the Zipf distribution.
 *
 * @see Zipf distribution (MathWorld)
 * @version $Id: ZipfDistribution.java 1416643 2012-12-03 19:37:14Z tn $
 */
public class ZipfDistribution extends AbstractIntegerDistribution {
    /** Serializable version identifier. */
    private static final long serialVersionUID = -140627372283420404L;
    /** Number of elements. */
    private final int numberOfElements;
    /** Exponent parameter of the distribution. */
    private final double exponent;
    /** Cached numerical mean */
    private double numericalMean = Double.NaN;
    /** Whether or not the numerical mean has been calculated */
    private boolean numericalMeanIsCalculated = false;
    /** Cached numerical variance */
    private double numericalVariance = Double.NaN;
    /** Whether or not the numerical variance has been calculated */
    private boolean numericalVarianceIsCalculated = false;

    /**
     * Create a new Zipf distribution with the given number of elements and
     * exponent.
     *
     * @param numberOfElements Number of elements.
     * @param exponent Exponent.
     * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
     * or {@code exponent <= 0}.
     */
    public ZipfDistribution(final int numberOfElements, final double exponent) {
        this(new Well19937c(), numberOfElements, exponent);
    }

    /**
     * Creates a Zipf distribution.
     *
     * @param rng Random number generator.
     * @param numberOfElements Number of elements.
     * @param exponent Exponent.
     * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
     * or {@code exponent <= 0}.
     * @since 3.1
     */
    public ZipfDistribution(RandomGenerator rng,
                            int numberOfElements,
                            double exponent)
        throws NotStrictlyPositiveException {
        super(rng);

        if (numberOfElements <= 0) {
            throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
                                                   numberOfElements);
        }
        if (exponent <= 0) {
            throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
                                                   exponent);
        }

        this.numberOfElements = numberOfElements;
        this.exponent = exponent;
    }

    /**
     * Get the number of elements (e.g. corpus size) for the distribution.
     *
     * @return the number of elements
     */
    public int getNumberOfElements() {
        return numberOfElements;
    }

    /**
     * Get the exponent characterizing the distribution.
     *
     * @return the exponent
     */
    public double getExponent() {
        return exponent;
    }

    /** {@inheritDoc} */
    public double probability(final int x) {
        if (x <= 0 || x > numberOfElements) {
            return 0.0;
        }

        return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
    }

    /** {@inheritDoc} */
    public double cumulativeProbability(final int x) {
        if (x <= 0) {
            return 0.0;
        } else if (x >= numberOfElements) {
            return 1.0;
        }

        return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
    }

    /**
     * {@inheritDoc}
     *
     * For number of elements {@code N} and exponent {@code s}, the mean is
     * {@code Hs1 / Hs}, where
     * 
    *
  • {@code Hs1 = generalizedHarmonic(N, s - 1)},
  • *
  • {@code Hs = generalizedHarmonic(N, s)}.
  • *
*/ public double getNumericalMean() { if (!numericalMeanIsCalculated) { numericalMean = calculateNumericalMean(); numericalMeanIsCalculated = true; } return numericalMean; } /** * Used by {@link #getNumericalMean()}. * * @return the mean of this distribution */ protected double calculateNumericalMean() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return Hs1 / Hs; } /** * {@inheritDoc} * * For number of elements {@code N} and exponent {@code s}, the mean is * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where *
    *
  • {@code Hs2 = generalizedHarmonic(N, s - 2)},
  • *
  • {@code Hs1 = generalizedHarmonic(N, s - 1)},
  • *
  • {@code Hs = generalizedHarmonic(N, s)}.
  • *
*/ public double getNumericalVariance() { if (!numericalVarianceIsCalculated) { numericalVariance = calculateNumericalVariance(); numericalVarianceIsCalculated = true; } return numericalVariance; } /** * Used by {@link #getNumericalVariance()}. * * @return the variance of this distribution */ protected double calculateNumericalVariance() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs2 = generalizedHarmonic(N, s - 2); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); } /** * Calculates the Nth generalized harmonic number. See * Harmonic * Series. * * @param n Term in the series to calculate (must be larger than 1) * @param m Exponent (special case {@code m = 1} is the harmonic series). * @return the nth generalized harmonic number. */ private double generalizedHarmonic(final int n, final double m) { double value = 0; for (int k = n; k > 0; --k) { value += 1.0 / FastMath.pow(k, m); } return value; } /** * {@inheritDoc} * * The lower bound of the support is always 1 no matter the parameters. * * @return lower bound of the support (always 1) */ public int getSupportLowerBound() { return 1; } /** * {@inheritDoc} * * The upper bound of the support is the number of elements. * * @return upper bound of the support */ public int getSupportUpperBound() { return getNumberOfElements(); } /** * {@inheritDoc} * * The support of this distribution is connected. * * @return {@code true} */ public boolean isSupportConnected() { return true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy