gov.sandia.cognition.statistics.method.InverseTransformSampling Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: InverseTransformSampling.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Jan 28, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.statistics.method;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.root.RootFinder;
import gov.sandia.cognition.learning.algorithm.root.RootFinderRiddersMethod;
import gov.sandia.cognition.learning.algorithm.root.SolverFunction;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.math.ProbabilityUtil;
import gov.sandia.cognition.statistics.CumulativeDistributionFunction;
import gov.sandia.cognition.statistics.DiscreteDistribution;
import gov.sandia.cognition.statistics.ProbabilityMassFunctionUtil;
import gov.sandia.cognition.statistics.UnivariateProbabilityDensityFunction;
import gov.sandia.cognition.statistics.SmoothCumulativeDistributionFunction;
import gov.sandia.cognition.statistics.SmoothUnivariateDistribution;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Random;
/**
* Inverse transform sampling is a method by which one can sample from an
* arbitrary distribution using only a uniform random-number generator and
* the ability to empirically invert the CDF. This allows us to convert
* Java's random-number generator (Random) into a Beta Distribution, Gamma
* distribution, or any other scalar distribution. It does, however, tend
* to require several function evaluations to invert the CDF and is typically
* fairly costly rather than those distributions that can be inverted
* algebraically.
* @author Kevin R. Dixon
* @since 3.0
*/
@PublicationReference(
author="Wikipedia",
title="Inverse transform sampling",
type=PublicationType.WebPage,
year=2008,
url="http://en.wikipedia.org/wiki/Inverse_transform_sampling"
)
public class InverseTransformSampling
{
/**
* Default root finding method for the algorithm, RootFinderRiddersMethod.
*/
public final static RootFinder DEFAULT_ROOT_FINDER =
new RootFinderRiddersMethod();
// new MinimizerBasedRootFinder();
// new RootFinderSecantMethod();
/**
* Tolerance for Newton's method, {@value}.
*/
public static final double DEFAULT_TOLERANCE = 1e-10;
/**
* Number of function evaluations needed to invert the distribution.
*/
public static int FUNCTION_EVALUATIONS;
/**
* Samples from the given CDF using the inverseRootFinder transform sampling method.
* @param cdf
* CDF from which to sample.
* @param random
* Random number generator.
* @param numSamples
* Number of samples to draw from the CDF.
* @return
* Samples draw according to the given CDF.
*/
public static ArrayList sample(
CumulativeDistributionFunction cdf,
Random random,
int numSamples )
{
ArrayList samples = new ArrayList( numSamples );
sampleInto(cdf, random, numSamples, samples);
return samples;
}
/**
* Samples from the given CDF using the inverseRootFinder transform sampling method.
* @param cdf
* CDF from which to sample.
* @param random
* Random number generator.
* @param numSamples
* Number of samples to draw from the CDF.
* @param output
* Collection to put samples drawn according to the given CDF.
*/
public static void sampleInto(
final CumulativeDistributionFunction cdf,
final Random random,
final int numSamples,
final Collection super Double> output)
{
for( int n = 0; n < numSamples; n++ )
{
double p = random.nextDouble();
InputOutputPair result = inverse( cdf, p );
if( Math.abs(result.getOutput()-p) > DEFAULT_TOLERANCE )
{
throw new IllegalArgumentException(
"Could not invert the CDF (" + cdf + ") at p = " + p + "(" + result.getOutput() + ")" );
}
output.add( result.getInput() );
}
}
/**
* Inverts the given CDF, finding the value of "x" so that CDF(x)=p using
* a root-finding algorithm.
* @param
* Type of number observation.
* @param cdf
* CDF to invert
* @param p
* Probability, [0,1].
* @return
* Root of the CDF.
*/
@SuppressWarnings("unchecked")
public static InputOutputPair inverse(
CumulativeDistributionFunction cdf,
double p )
{
if( cdf instanceof DiscreteDistribution )
{
return ProbabilityMassFunctionUtil.inverse(cdf, p);
}
else if( cdf instanceof SmoothUnivariateDistribution )
{
return (InputOutputPair) inverseNewtonsMethod(
(SmoothUnivariateDistribution) cdf, p, DEFAULT_TOLERANCE );
}
else
{
return (InputOutputPair) inverseRootFinder(
DEFAULT_ROOT_FINDER, (CumulativeDistributionFunction) cdf, p);
}
}
/**
* Inverts the given CDF, finding the value of "x" so that CDF(x)=p using
* a root-finding algorithm.
* @param rootFinder
* Root-finding algorithm to use.
* @param cdf
* CDF to invert
* @param p
* Probability, [0,1].
* @return
* Root of the CDF.
*/
public static InputOutputPair inverseRootFinder(
RootFinder rootFinder,
CumulativeDistributionFunction cdf,
double p )
{
ProbabilityUtil.assertIsProbability(p);
rootFinder.setTolerance(DEFAULT_TOLERANCE);
double mean = cdf.getMean();
double stddev = Math.sqrt( cdf.getVariance() );
// If our standard deviation is too large, just cap it.
final double maxDev = 10.0;
if( (stddev > maxDev) || Double.isInfinite(stddev) )
{
stddev = maxDev;
}
double delta = stddev * 2.0 * (p-0.5);
double xhat = mean + delta;
SolverFunction fhat = new SolverFunction( p, cdf );
rootFinder.setInitialGuess(xhat);
InputOutputPair root = rootFinder.learn(fhat);
return DefaultInputOutputPair.create(
root.getInput(), root.getOutput() + p);
}
/**
* Inverts the given CDF, finding the value of "x" so that CDF(x)=p using
* a root-finding algorithm.
* @param distribution
* Distribution to invert
* @param p
* Probability, [0,1].
* @param tolerance
* Tolerance below which we stop.
* @return
* Root of the CDF.
*/
public static InputOutputPair inverseNewtonsMethod(
SmoothUnivariateDistribution distribution,
double p,
double tolerance )
{
ProbabilityUtil.assertIsProbability(p);
SmoothCumulativeDistributionFunction cdf = distribution.getCDF();
InputOutputPair initialGuess =
initializeNewtonsMethod(cdf, p, tolerance);
double xhat = initialGuess.getInput();
double phat = initialGuess.getOutput();
if( Math.abs(p-phat) <= tolerance )
{
return initialGuess;
}
else
{
final double xmin = cdf.getMinSupport();
final double xmax = cdf.getMaxSupport();
UnivariateProbabilityDensityFunction pdf =
distribution.getProbabilityFunction();
double err = p - phat;
double m = pdf.evaluate(xhat);
final double stepScale = 0.5;
double stepMultiplier = 1.0;
final double maxStep = Math.min( (xmax-xmin)/2.0, 1000.0*cdf.getVariance() );
for( int iteration = 0; iteration < 100; iteration++ )
{
// The PDF is flat at xhat, so just step over by
// the error.
double xproposed;
if( Math.abs(m) <= Double.MIN_VALUE )
{
xproposed = xhat + err*stepMultiplier;
}
// The PDF has a slope, so use this to estimate where
// the value of "p" is... this is Newton's method.
else
{
double step = -(phat-p)*stepMultiplier / m;
if( Math.abs(step) > maxStep )
{
step = stepMultiplier * Math.signum(step) * maxStep;
}
xproposed = xhat + step;
}
// The proposed step takes us too far to the left.
if( xproposed <= xmin )
{
stepMultiplier *= stepScale;
}
// The proposed step takes us too far to the right.
else if( xproposed >= xmax )
{
stepMultiplier *= stepScale;
}
// The proposed step is within the support of the distribution.
else
{
// Let's see if the proposal is worse or better...
double pproposed = cdf.evaluate(xproposed);
double errproposed = p - pproposed;
if( Math.abs(errproposed) <= Math.abs(err) )
{
stepMultiplier = 1.0;
xhat = xproposed;
phat = pproposed;
// The PDF is the slope (derivative) of the CDF...
m = pdf.evaluate(xhat);
err = errproposed;
}
else
{
stepMultiplier *= stepScale;
}
}
if( Math.abs(err) <= tolerance)
{
break;
}
}
}
return new DefaultInputOutputPair( xhat, phat );
}
/**
* Initializes Newton's method for inverse transform sampling.
* @param cdf
* CDF to invert.
* @param p
* Target value to invert, that is to find "x" such that p=cdf(x).
* @param tolerance
* Tolerance before stopping.
* @return
* Estimated (xhat,phat) to initialize Newton's method.
*/
protected static InputOutputPair initializeNewtonsMethod(
SmoothCumulativeDistributionFunction cdf,
double p,
double tolerance )
{
double xmin = cdf.getMinSupport();
double xmax = cdf.getMaxSupport();
double xhat;
double phat;
if( Math.abs(p) <= tolerance )
{
xhat = xmin;
phat = cdf.evaluate(xhat);
}
else if( p == 1.0 )
{
xhat = xmax;
phat = cdf.evaluate(xhat);
}
else
{
final double mean = cdf.getMean();
final double pmean = cdf.evaluate(mean);
xhat = mean;
phat = pmean;
if( Math.abs(p-pmean) <= tolerance )
{
xhat = mean;
phat = pmean;
}
// We're in the left segment.
else if( p < pmean )
{
final double leftSegment = mean-xmin;
final double leftFraction = p / pmean;
xhat = leftFraction * leftSegment;
// If the estimate is effectively zero, then
// just guess the mean.
phat = cdf.evaluate(xhat);
if( Math.abs(phat) <= tolerance )
{
xhat = mean;
phat = pmean;
}
}
else
{
final double rightSegment = xmax-mean;
final double rightFraction = (p-pmean) / (1-pmean);
xhat = rightFraction * rightSegment + mean;
// If the estimate is effectively 1.0, then
// just guess the mean
phat = cdf.evaluate(xhat);
if( Math.abs(1.0-phat) <= tolerance )
{
xhat = mean;
phat = pmean;
}
}
if( Double.isInfinite(xhat) )
{
xhat = mean;
phat = pmean;
}
}
return new DefaultInputOutputPair( xhat, phat );
}
}