All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.rng.sampling.distribution.PoissonSamplerCache Maven / Gradle / Ivy

Go to download

Statistical sampling library for use in virtdata libraries, based on apache commons math 4

There is a newer version: 5.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.rng.sampling.distribution;

import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler.LargeMeanPoissonSamplerState;

/**
 * Create a sampler for the
 * Poisson
 * distribution using a cache to minimise construction cost.
 *
 * 

The cache will return a sampler equivalent to * {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}.

* *

The cache allows the {@link PoissonSampler} construction cost to be minimised * for low size Poisson samples. The cache stores state for a range of integers where * integer value {@code n} can be used to construct a sampler for the range * {@code n <= mean < n+1}.

* *

The cache is advantageous under the following conditions:

* *
    *
  • The mean of the Poisson distribution falls within a known range. *
  • The sample size to be made with the same sampler is * small. *
  • The Poisson samples have different means with the same integer * value(s) after rounding down. *
* *

If the sample size to be made with the same sampler is large * then the construction cost is low compared to the sampling time and the cache * has minimal benefit.

* *

Performance improvement is dependent on the speed of the * {@link UniformRandomProvider}. A fast provider can obtain a two-fold speed * improvement for a single-use Poisson sampler.

* *

The cache is thread safe. Note that concurrent threads using the cache * must ensure a thread safe {@link UniformRandomProvider} is used when creating * samplers, e.g. a unique sampler per thread.

* *

Sampling uses:

* *
    *
  • {@link UniformRandomProvider#nextDouble()} *
  • {@link UniformRandomProvider#nextLong()} (large means only) *
* * @since 1.2 */ public class PoissonSamplerCache { /** * The minimum N covered by the cache where * {@code N = (int)Math.floor(mean)}. */ private final int minN; /** * The maximum N covered by the cache where * {@code N = (int)Math.floor(mean)}. */ private final int maxN; /** The cache of states between {@link minN} and {@link maxN}. */ private final LargeMeanPoissonSamplerState[] values; /** * @param minMean The minimum mean covered by the cache. * @param maxMean The maximum mean covered by the cache. * @throws IllegalArgumentException if {@code maxMean < minMean} */ public PoissonSamplerCache(double minMean, double maxMean) { checkMeanRange(minMean, maxMean); // The cache can only be used for the LargeMeanPoissonSampler. if (maxMean < PoissonSampler.PIVOT) { // The upper limit is too small so no cache will be used. // This class will just construct new samplers. minN = 0; maxN = 0; values = null; } else { // Convert the mean into integers. // Note the minimum is clipped to the algorithm switch point. this.minN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); this.maxN = (int) Math.floor(Math.min(maxMean, Integer.MAX_VALUE)); values = new LargeMeanPoissonSamplerState[maxN - minN + 1]; } } /** * @param minN The minimum N covered by the cache where {@code N = (int)Math.floor(mean)}. * @param maxN The maximum N covered by the cache where {@code N = (int)Math.floor(mean)}. * @param states The precomputed states. */ private PoissonSamplerCache(int minN, int maxN, LargeMeanPoissonSamplerState[] states) { this.minN = minN; this.maxN = maxN; // Stored directly as the states were newly created within this class. this.values = states; } /** * Check the mean range. * * @param minMean The minimum mean covered by the cache. * @param maxMean The maximum mean covered by the cache. * @throws IllegalArgumentException if {@code maxMean < minMean} */ private static void checkMeanRange(double minMean, double maxMean) { // Note: // Although a mean of 0 is invalid for a Poisson sampler this case // is handled to make the cache user friendly. Any low means will // be handled by the SmallMeanPoissonSampler and not cached. // For this reason it is also OK if the means are negative. // Allow minMean == maxMean so that the cache can be used // to create samplers with distinct RNGs and the same mean. if (maxMean < minMean) { throw new IllegalArgumentException( "Max mean: " + maxMean + " < " + minMean); } } /** * Creates a new Poisson sampler. * *

The returned sampler will function exactly the * same as {@link PoissonSampler#PoissonSampler(UniformRandomProvider, double)}. * * @param rng Generator of uniformly distributed random numbers. * @param mean Mean. * @return A Poisson sampler * @throws IllegalArgumentException if {@code mean <= 0} or * {@code mean >} {@link Integer#MAX_VALUE}. */ public DiscreteSampler createPoissonSampler(UniformRandomProvider rng, double mean) { // Ensure the same functionality as the PoissonSampler by // using a SmallMeanPoissonSampler under the switch point. if (mean < PoissonSampler.PIVOT) { return new SmallMeanPoissonSampler(rng, mean); } if (mean > maxN) { // Outside the range of the cache. // This avoids extra parameter checks and handles the case when // the cache is empty or if Math.floor(mean) is not an integer. return new LargeMeanPoissonSampler(rng, mean); } // Convert the mean into an integer. final int n = (int) Math.floor(mean); if (n < minN) { // Outside the lower range of the cache. return new LargeMeanPoissonSampler(rng, mean); } // Look in the cache for a state that can be reused. // Note: The cache is offset by minN. final int index = n - minN; final LargeMeanPoissonSamplerState state = values[index]; if (state == null) { // Create a sampler and store the state for reuse. // Do not worry about thread contention // as the state is effectively immutable. // If recomputed and replaced it will the same. final LargeMeanPoissonSampler sampler = new LargeMeanPoissonSampler(rng, mean); values[index] = sampler.getState(); return sampler; } // Compute the remaining fraction of the mean final double lambdaFractional = mean - n; return new LargeMeanPoissonSampler(rng, state, lambdaFractional); } /** * Check if the mean is within the range where the cache can minimise the * construction cost of the {@link PoissonSampler}. * * @param mean * the mean * @return true, if within the cache range */ public boolean withinRange(double mean) { if (mean < PoissonSampler.PIVOT) { // Construction is optimal return true; } // Convert the mean into an integer. final int n = (int) Math.floor(mean); return n <= maxN && n >= minN; } /** * Checks if the cache covers a valid range of mean values. * *

Note that the cache is only valid for one of the Poisson sampling * algorithms. In the instance that a range was requested that was too * low then there is nothing to cache and this functions returns * {@code false}. * *

The cache can still be used to create a {@link PoissonSampler} using * {@link #createPoissonSampler(UniformRandomProvider, double)}. * *

This method can be used to determine if the cache has a potential * performance benefit. * * @return true, if the cache covers a range of mean values */ public boolean isValidRange() { return values != null; } /** * Gets the minimum mean covered by the cache. * *

This value is the inclusive lower bound and is equal to * the lowest integer-valued mean that is covered by the cache. * *

Note that this value may not match the value passed to the constructor * due to the following reasons: * *

    *
  • At small mean values a different algorithm is used for Poisson * sampling and the cache is unnecessary. *
  • The minimum is always an integer so may be below the constructor * minimum mean. *
* *

If {@link #isValidRange()} returns {@code true} the cache will store * state to reduce construction cost of samplers in * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} * inclusive. Otherwise this method returns 0; * * @return The minimum mean covered by the cache. */ public double getMinMean() { return minN; } /** * Gets the maximum mean covered by the cache. * *

This value is the inclusive upper bound and is equal to * the double value below the first integer-valued mean that is * above range covered by the cache. * *

Note that this value may not match the value passed to the constructor * due to the following reasons: *

    *
  • At small mean values a different algorithm is used for Poisson * sampling and the cache is unnecessary. *
  • The maximum is always the double value below an integer so * may be above the constructor maximum mean. *
* *

If {@link #isValidRange()} returns {@code true} the cache will store * state to reduce construction cost of samplers in * the range {@link #getMinMean()} inclusive to {@link #getMaxMean()} * inclusive. Otherwise this method returns 0; * * @return The maximum mean covered by the cache. */ public double getMaxMean() { if (isValidRange()) { return Math.nextAfter(maxN + 1.0, -1); } return 0; } /** * Gets the minimum mean value that can be cached. * *

Any {@link PoissonSampler} created with a mean below this level will not * have any state that can be cached. * * @return the minimum cached mean */ public static double getMinimumCachedMean() { return PoissonSampler.PIVOT; } /** * Create a new {@link PoissonSamplerCache} with the given range * reusing the current cache values. * *

This will create a new object even if the range is smaller or the * same as the current cache. * * @param minMean The minimum mean covered by the cache. * @param maxMean The maximum mean covered by the cache. * @throws IllegalArgumentException if {@code maxMean < minMean} * @return the poisson sampler cache */ public PoissonSamplerCache withRange(double minMean, double maxMean) { if (values == null) { // Nothing to reuse return new PoissonSamplerCache(minMean, maxMean); } checkMeanRange(minMean, maxMean); // The cache can only be used for the LargeMeanPoissonSampler. if (maxMean < PoissonSampler.PIVOT) { return new PoissonSamplerCache(0, 0); } // Convert the mean into integers. // Note the minimum is clipped to the algorithm switch point. final int withMinN = (int) Math.floor(Math.max(minMean, PoissonSampler.PIVOT)); final int withMaxN = (int) Math.floor(maxMean); final LargeMeanPoissonSamplerState[] states = new LargeMeanPoissonSamplerState[withMaxN - withMinN + 1]; // Preserve values from the current array to the next int currentIndex; int nextIndex; if (this.minN <= withMinN) { // The current array starts before the new array currentIndex = withMinN - this.minN; nextIndex = 0; } else { // The new array starts before the current array currentIndex = 0; nextIndex = this.minN - withMinN; } final int length = Math.min(values.length - currentIndex, states.length - nextIndex); if (length > 0) { System.arraycopy(values, currentIndex, states, nextIndex, length); } return new PoissonSamplerCache(withMinN, withMaxN, states); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy