All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math3.distribution.ZipfDistribution Maven / Gradle / Ivy

Go to download

The Apache Commons Math project is a library of lightweight, self-contained mathematics and statistics components addressing the most common practical problems not immediately available in the Java programming language or commons-lang.

There is a newer version: 3.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math3.distribution;

import org.apache.commons.math3.exception.NotStrictlyPositiveException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.commons.math3.util.FastMath;

/**
 * Implementation of the Zipf distribution.
 *
 * @see Zipf distribution (MathWorld)
 */
public class ZipfDistribution extends AbstractIntegerDistribution {
    /** Serializable version identifier. */
    private static final long serialVersionUID = -140627372283420404L;
    /** Number of elements. */
    private final int numberOfElements;
    /** Exponent parameter of the distribution. */
    private final double exponent;
    /** Cached numerical mean */
    private double numericalMean = Double.NaN;
    /** Whether or not the numerical mean has been calculated */
    private boolean numericalMeanIsCalculated = false;
    /** Cached numerical variance */
    private double numericalVariance = Double.NaN;
    /** Whether or not the numerical variance has been calculated */
    private boolean numericalVarianceIsCalculated = false;

    /**
     * Create a new Zipf distribution with the given number of elements and
     * exponent.
     * 

* Note: this constructor will implicitly create an instance of * {@link Well19937c} as random generator to be used for sampling only (see * {@link #sample()} and {@link #sample(int)}). In case no sampling is * needed for the created distribution, it is advised to pass {@code null} * as random generator via the appropriate constructors to avoid the * additional initialisation overhead. * * @param numberOfElements Number of elements. * @param exponent Exponent. * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public ZipfDistribution(final int numberOfElements, final double exponent) { this(new Well19937c(), numberOfElements, exponent); } /** * Creates a Zipf distribution. * * @param rng Random number generator. * @param numberOfElements Number of elements. * @param exponent Exponent. * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. * @since 3.1 */ public ZipfDistribution(RandomGenerator rng, int numberOfElements, double exponent) throws NotStrictlyPositiveException { super(rng); if (numberOfElements <= 0) { throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION, numberOfElements); } if (exponent <= 0) { throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT, exponent); } this.numberOfElements = numberOfElements; this.exponent = exponent; } /** * Get the number of elements (e.g. corpus size) for the distribution. * * @return the number of elements */ public int getNumberOfElements() { return numberOfElements; } /** * Get the exponent characterizing the distribution. * * @return the exponent */ public double getExponent() { return exponent; } /** {@inheritDoc} */ public double probability(final int x) { if (x <= 0 || x > numberOfElements) { return 0.0; } return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); } /** {@inheritDoc} */ @Override public double logProbability(int x) { if (x <= 0 || x > numberOfElements) { return Double.NEGATIVE_INFINITY; } return -FastMath.log(x) * exponent - FastMath.log(generalizedHarmonic(numberOfElements, exponent)); } /** {@inheritDoc} */ public double cumulativeProbability(final int x) { if (x <= 0) { return 0.0; } else if (x >= numberOfElements) { return 1.0; } return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); } /** * {@inheritDoc} * * For number of elements {@code N} and exponent {@code s}, the mean is * {@code Hs1 / Hs}, where *

    *
  • {@code Hs1 = generalizedHarmonic(N, s - 1)},
  • *
  • {@code Hs = generalizedHarmonic(N, s)}.
  • *
*/ public double getNumericalMean() { if (!numericalMeanIsCalculated) { numericalMean = calculateNumericalMean(); numericalMeanIsCalculated = true; } return numericalMean; } /** * Used by {@link #getNumericalMean()}. * * @return the mean of this distribution */ protected double calculateNumericalMean() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return Hs1 / Hs; } /** * {@inheritDoc} * * For number of elements {@code N} and exponent {@code s}, the mean is * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where *
    *
  • {@code Hs2 = generalizedHarmonic(N, s - 2)},
  • *
  • {@code Hs1 = generalizedHarmonic(N, s - 1)},
  • *
  • {@code Hs = generalizedHarmonic(N, s)}.
  • *
*/ public double getNumericalVariance() { if (!numericalVarianceIsCalculated) { numericalVariance = calculateNumericalVariance(); numericalVarianceIsCalculated = true; } return numericalVariance; } /** * Used by {@link #getNumericalVariance()}. * * @return the variance of this distribution */ protected double calculateNumericalVariance() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs2 = generalizedHarmonic(N, s - 2); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); } /** * Calculates the Nth generalized harmonic number. See * Harmonic * Series. * * @param n Term in the series to calculate (must be larger than 1) * @param m Exponent (special case {@code m = 1} is the harmonic series). * @return the nth generalized harmonic number. */ private double generalizedHarmonic(final int n, final double m) { double value = 0; for (int k = n; k > 0; --k) { value += 1.0 / FastMath.pow(k, m); } return value; } /** * {@inheritDoc} * * The lower bound of the support is always 1 no matter the parameters. * * @return lower bound of the support (always 1) */ public int getSupportLowerBound() { return 1; } /** * {@inheritDoc} * * The upper bound of the support is the number of elements. * * @return upper bound of the support */ public int getSupportUpperBound() { return getNumberOfElements(); } /** * {@inheritDoc} * * The support of this distribution is connected. * * @return {@code true} */ public boolean isSupportConnected() { return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy