All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler Maven / Gradle / Ivy

Go to download

The Apache Commons RNG Sampling module provides samplers for various distributions.

There is a newer version: 1.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.rng.sampling.distribution;

import org.apache.commons.rng.UniformRandomProvider;

/**
 * Implementation of the Zipf distribution.
 *
 * 

Sampling uses {@link UniformRandomProvider#nextDouble()}.

* * @since 1.0 */ public class RejectionInversionZipfSampler extends SamplerBase implements SharedStateDiscreteSampler { /** The implementation of the sample method. */ private final SharedStateDiscreteSampler delegate; /** * Implements the rejection-inversion method for the Zipf distribution. */ private static class RejectionInversionZipfSamplerImpl implements SharedStateDiscreteSampler { /** Threshold below which Taylor series will be used. */ private static final double TAYLOR_THRESHOLD = 1e-8; /** 1/2. */ private static final double F_1_2 = 0.5; /** 1/3. */ private static final double F_1_3 = 1d / 3; /** 1/4. */ private static final double F_1_4 = 0.25; /** Number of elements. */ private final int numberOfElements; /** Exponent parameter of the distribution. */ private final double exponent; /** {@code hIntegral(1.5) - 1}. */ private final double hIntegralX1; /** {@code hIntegral(numberOfElements + 0.5)}. */ private final double hIntegralNumberOfElements; /** {@code hIntegralX1 - hIntegralNumberOfElements}. */ private final double r; /** {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */ private final double s; /** Underlying source of randomness. */ private final UniformRandomProvider rng; /** * @param rng Generator of uniformly distributed random numbers. * @param numberOfElements Number of elements (must be > 0). * @param exponent Exponent (must be > 0). */ RejectionInversionZipfSamplerImpl(UniformRandomProvider rng, int numberOfElements, double exponent) { this.rng = rng; this.numberOfElements = numberOfElements; this.exponent = exponent; this.hIntegralX1 = hIntegral(1.5) - 1; this.hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2); this.r = hIntegralX1 - hIntegralNumberOfElements; this.s = 2 - hIntegralInverse(hIntegral(2.5) - h(2)); } /** * @param rng Generator of uniformly distributed random numbers. * @param source Source to copy. */ private RejectionInversionZipfSamplerImpl(UniformRandomProvider rng, RejectionInversionZipfSamplerImpl source) { this.rng = rng; this.numberOfElements = source.numberOfElements; this.exponent = source.exponent; this.hIntegralX1 = source.hIntegralX1; this.hIntegralNumberOfElements = source.hIntegralNumberOfElements; this.r = source.r; this.s = source.s; } @Override public int sample() { // The paper describes an algorithm for exponents larger than 1 // (Algorithm ZRI). // The original method uses // H(x) = (v + x)^(1 - q) / (1 - q) // as the integral of the hat function. // This function is undefined for q = 1, which is the reason for // the limitation of the exponent. // If instead the integral function // H(x) = ((v + x)^(1 - q) - 1) / (1 - q) // is used, // for which a meaningful limit exists for q = 1, the method works // for all positive exponents. // The following implementation uses v = 0 and generates integral // number in the range [1, numberOfElements]. // This is different to the original method where v is defined to // be positive and numbers are taken from [0, i_max]. // This explains why the implementation looks slightly different. while (true) { final double u = hIntegralNumberOfElements + rng.nextDouble() * r; // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements] final double x = hIntegralInverse(u); int k = (int) (x + F_1_2); // Limit k to the range [1, numberOfElements] if it would be outside // due to numerical inaccuracies. if (k < 1) { k = 1; } else if (k > numberOfElements) { k = numberOfElements; } // Here, the distribution of k is given by: // // P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C // P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2 // // where C = 1 / (hIntegralNumberOfElements - hIntegralX1) if (k - x <= s || u >= hIntegral(k + F_1_2) - h(k)) { // Case k = 1: // // The right inequality is always true, because replacing k by 1 gives // u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from // (hIntegralX1, hIntegralNumberOfElements]. // // Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1 // and the probability that 1 is returned as random value is // P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent // // Case k >= 2: // // The left inequality (k - x <= s) is just a short cut // to avoid the more expensive evaluation of the right inequality // (u >= hIntegral(k + 0.5) - h(k)) in many cases. // // If the left inequality is true, the right inequality is also true: // Theorem 2 in the paper is valid for all positive exponents, because // the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and // (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0 // are both fulfilled. // Therefore, f(x) = x - hIntegralInverse(hIntegral(x + 0.5) - h(x)) // is a non-decreasing function. If k - x <= s holds, // k - x <= s + f(k) - f(2) is obviously also true which is equivalent to // -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), // -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), // and finally u >= hIntegral(k + 0.5) - h(k). // // Hence, the right inequality determines the acceptance rate: // P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2)) // The probability that m is returned is given by // P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent. // // In both cases the probabilities are proportional to the probability mass function // of the Zipf distribution. return k; } } } /** {@inheritDoc} */ @Override public String toString() { return "Rejection inversion Zipf deviate [" + rng.toString() + "]"; } @Override public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) { return new RejectionInversionZipfSamplerImpl(rng, this); } /** * {@code H(x)} is defined as *
    *
  • {@code (x^(1 - exponent) - 1) / (1 - exponent)}, if {@code exponent != 1}
  • *
  • {@code log(x)}, if {@code exponent == 1}
  • *
* H(x) is an integral function of h(x), the derivative of H(x) is h(x). * * @param x Free parameter. * @return {@code H(x)}. */ private double hIntegral(final double x) { final double logX = Math.log(x); return helper2((1 - exponent) * logX) * logX; } /** * {@code h(x) = 1 / x^exponent}. * * @param x Free parameter. * @return {@code h(x)}. */ private double h(final double x) { return Math.exp(-exponent * Math.log(x)); } /** * The inverse function of {@code H(x)}. * * @param x Free parameter * @return y for which {@code H(y) = x}. */ private double hIntegralInverse(final double x) { double t = x * (1 - exponent); if (t < -1) { // Limit value to the range [-1, +inf). // t could be smaller than -1 in some rare cases due to numerical errors. t = -1; } return Math.exp(helper1(t) * x); } /** * Helper function that calculates {@code log(1 + x) / x}. *

* A Taylor series expansion is used, if x is close to 0. *

* * @param x A value larger than or equal to -1. * @return {@code log(1 + x) / x}. */ private static double helper1(final double x) { if (Math.abs(x) > TAYLOR_THRESHOLD) { return Math.log1p(x) / x; } return 1 - x * (F_1_2 - x * (F_1_3 - F_1_4 * x)); } /** * Helper function to calculate {@code (exp(x) - 1) / x}. *

* A Taylor series expansion is used, if x is close to 0. *

* * @param x Free parameter. * @return {@code (exp(x) - 1) / x} if x is non-zero, or 1 if x = 0. */ private static double helper2(final double x) { if (Math.abs(x) > TAYLOR_THRESHOLD) { return Math.expm1(x) / x; } return 1 + x * F_1_2 * (1 + x * F_1_3 * (1 + F_1_4 * x)); } } /** * This instance delegates sampling. Use the factory method * {@link #of(UniformRandomProvider, int, double)} to create an optimal sampler. * * @param rng Generator of uniformly distributed random numbers. * @param numberOfElements Number of elements. * @param exponent Exponent. * @throws IllegalArgumentException if {@code numberOfElements <= 0} * or {@code exponent < 0}. */ public RejectionInversionZipfSampler(UniformRandomProvider rng, int numberOfElements, double exponent) { super(null); // Delegate all work to specialised samplers. this.delegate = of(rng, numberOfElements, exponent); } /** * Rejection inversion sampling method for a discrete, bounded Zipf * distribution that is based on the method described in *
* Wolfgang Hörmann and Gerhard Derflinger. * "Rejection-inversion to generate variates from monotone discrete * distributions",
* ACM Transactions on Modeling and Computer Simulation (TOMACS) 6.3 (1996): 169-184. *
*/ @Override public int sample() { return delegate.sample(); } /** {@inheritDoc} */ @Override public String toString() { return delegate.toString(); } /** * {@inheritDoc} * * @since 1.3 */ @Override public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) { return delegate.withUniformRandomProvider(rng); } /** * Creates a new Zipf distribution sampler. * *

Note when {@code exponent = 0} the Zipf distribution reduces to a * discrete uniform distribution over the interval {@code [1, n]} with {@code n} * the number of elements. * * @param rng Generator of uniformly distributed random numbers. * @param numberOfElements Number of elements. * @param exponent Exponent. * @return the sampler * @throws IllegalArgumentException if {@code numberOfElements <= 0} or * {@code exponent < 0}. * @since 1.3 */ public static SharedStateDiscreteSampler of(UniformRandomProvider rng, int numberOfElements, double exponent) { if (numberOfElements <= 0) { throw new IllegalArgumentException("number of elements is not strictly positive: " + numberOfElements); } if (exponent < 0) { throw new IllegalArgumentException("exponent is not positive: " + exponent); } // When the exponent is at the limit of 0 the distribution PMF reduces to 1 / n // and sampling can use a discrete uniform sampler. return exponent > 0 ? new RejectionInversionZipfSamplerImpl(rng, numberOfElements, exponent) : DiscreteUniformSampler.of(rng, 1, numberOfElements); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy