All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.statistics.ProbabilityMassFunctionUtil Maven / Gradle / Ivy

The newest version!
/*
 * File:                ProbabilityMassFunctionUtil.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright Feb 3, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government. 
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.statistics;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.collection.CollectionUtil;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.math.ProbabilityUtil;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Random;

/**
 * Utility methods for helping computations in PMFs.
 * @author Kevin R. Dixon
 * @since 3.0
 */
public class ProbabilityMassFunctionUtil
{

    /**
     * Computes the information-theoretic entropy of the PMF in bits.
     * @param 
     * Type of data on the domain of the PMF.
     * @param pmf
     * PMF to compute the entropy.
     * @return
     * Entropy in bits of the given PMF.
     */
    @PublicationReference(
        author="Wikipedia",
        title="Entropy (information theory)",
        type=PublicationType.WebPage,
        year=2009,
        url="http://en.wikipedia.org/wiki/Entropy_(Information_theory)"
    )
    public static  double getEntropy(
        final ProbabilityMassFunction pmf )
    {
        // Compute the entropy by looping over the values in the maps
        Collection domain = pmf.getDomain();
        ArrayList data = new ArrayList( domain.size() );
        for( DataType input : domain )
        {
            data.add( pmf.evaluate( input ) );
        }

        return UnivariateStatisticsUtil.computeEntropy(data);

    }
    
    /**
     * Samples from the ProbabilityMassFunction.  The return value will be
     * sampled according to the given PMF.
     * @param  Type of data to use
     * @param pmf PMF from which to sample
     * @param random Random to sample from
     * @param numSamples Number of samples to draw from the given PMF.
     * @return
     * Samples drawn according to the given PMF.
     */
    public static  ArrayList sample(
        final ProbabilityMassFunction pmf,
        final Random random,
        final int numSamples )
    {
        ArrayList samples = new ArrayList(numSamples);
        sampleInto(pmf, random, numSamples, samples);
        return samples;
    }
    
    /**
     * Samples from the ProbabilityMassFunction.  The return value will be
     * sampled according to the given PMF.
     * @param  Type of data to use
     * @param pmf PMF from which to sample
     * @param random Random to sample from
     * @param numSamples Number of samples to draw from the given PMF.
     * @param output Collection to add samples drawn according to the given PMF.
     */
    public static  void sampleInto(
        final ProbabilityMassFunction pmf,
        final Random random,
        final int numSamples,
        final Collection output)
    {
        if (numSamples == 1)
        {
            output.add(sampleSingle(pmf, random));
        }
        else if (numSamples > 1)
        {
            sampleMultipleInto(pmf, random, numSamples, output);
        }
    }

    /**
     * Draws a single sample from the given PMF
     * @param 
     * Type of observations generated by the PMF
     * @param pmf
     * PMF from which to draw.
     * @param random
     * Random number generator
     * @return
     * Single sample from the PMF
     */
    public static  DataType sampleSingle(
        final ProbabilityMassFunction pmf,
        final Random random )
    {
        double p = random.nextDouble();
        for( DataType x : pmf.getDomain() )
        {
            p -= pmf.evaluate(x);
            if( p <= 0.0 )
            {
                return x;
            }
        }
        return null;
    }

    /**
     * Samples from the ProbabilityMassFunction.  The return value will be
     * sampled according to the given PMF.
     * @param  Type of data to use
     * @param pmf PMF from which to sample
     * @param random Random to sample from
     * @param numSamples Number of samples to draw from the given PMF.
     * @return
     * Samples drawn according to the given PMF.
     */
    @SuppressWarnings("unchecked")
    public static  ArrayList sampleMultiple(
        final ProbabilityMassFunction pmf,
        final Random random,
        final int numSamples )
    {
        final ArrayList result = new ArrayList(numSamples);
        sampleMultipleInto(pmf, random, numSamples, result);
        return result;
    }
    
    /**
     * Samples from the ProbabilityMassFunction.  The return value will be
     * sampled according to the given PMF.
     * @param  Type of data to use
     * @param pmf PMF from which to sample
     * @param random Random to sample from
     * @param numSamples Number of samples to draw from the given PMF.
     * @param output Collection to add samples drawn according to the given PMF.
     */
    @SuppressWarnings("unchecked")
    public static  void sampleMultipleInto(
        final ProbabilityMassFunction pmf,
        final Random random,
        final int numSamples,
        final Collection output)
    {
        // Compute the cumulative probability counts.
        // We can then use binary search to make the lookup process VERY zoomy.
        int N = pmf.getDomain().size();
        double[] cumulativeProbabilities = new double[ N ];
        ArrayList domain =
            CollectionUtil.asArrayList( pmf.getDomain() );
        double psum = 0.0;
        final int domainSize = domain.size();
        for( int index = 0; index < domainSize; index++ )
        {
            final DataType x = domain.get(index);
            psum += pmf.evaluate( x );
            cumulativeProbabilities[index] = psum;
        }

        sampleMultipleInto(cumulativeProbabilities, domain, random, numSamples, 
            output);
    }

    /**
     * Samples multiple elements from the domain proportionately to the
     * cumulative weights in the given weight array using a fast
     * binary search algorithm
     * @param 
     * Type of data to be sampled
     * @param cumulativeWeights
     * Cumulative weights to sample from
     * @param domain
     * Domain from which to sample
     * @param random
     * Random number generator
     * @param numSamples
     * Number of samples to draw from the distribution
     * @return
     * Samples draw proportionately from the cumulative weights
     */
    public static  ArrayList sampleMultiple(
        final double[] cumulativeWeights,
        final List domain,
        final Random random,
        final int numSamples )
    {
        final ArrayList result = new ArrayList(numSamples);
        sampleMultipleInto(cumulativeWeights, domain, random,
            numSamples, result);
        return result;
    }
    
    /**
     * Samples multiple elements from the domain proportionately to the
     * cumulative weights in the given weight array using a fast
     * binary search algorithm
     * @param 
     * Type of data to be sampled
     * @param cumulativeWeights
     * Cumulative weights to sample from
     * @param domain
     * Domain from which to sample
     * @param random
     * Random number generator
     * @param numSamples
     * Number of samples to draw from the distribution
     * @param   output
     *      The collection to put the samples in.
     */
    public static  void sampleMultipleInto(
        final double[] cumulativeWeights,
        final List domain,
        final Random random,
        final int numSamples,
        final Collection output)
    {
        for( int n = 0; n < numSamples; n++ )
        {
            output.add(sample(cumulativeWeights, domain, random));
        }
    }
    
    /**
     * Samples an element from the domain proportionately to the
     * cumulative weights in the given weight array using a fast
     * binary search algorithm.
     * @param 
     * Type of data to be sampled
     * @param cumulativeWeights
     * Cumulative weights to sample from
     * @param domain
     * Domain from which to sample
     * @param random
     * Random number generator
     * @return
     * A sample from the domain according to the weights.
     */
    public static  DataType sample(
        final double[] cumulativeWeights,
        final List domain,
        final Random random)
    {
        final int index = DiscreteSamplingUtil.sampleIndexFromCumulativeProportions(
            random, cumulativeWeights);
        return domain.get(index);
    }

    /**
     * Samples a single element from the domain proportionately to the given
     * weights
     * @param 
     * Type of data to be sampled
     * @param weights
     * Weights from which we will sample proportionately
     * @param domain
     * Domain from which to return the result
     * @param random
     * Random number generator
     * @return
     * A single sample from the domain proportionately from the weights
     */
    public static  DataType sampleSingle(
        final double[] weights,
        final Collection domain,
        final Random random )
    {

        double sum = 0.0;
        final int N = weights.length;
        for( int n = 0; n < N; n++ )
        {
            sum += weights[n];
        }

        double s = sum * random.nextDouble();
        int n = 0;
        for( DataType value : domain )
        {
            s -= weights[n];
            if( s <= 0.0 )
            {
                return value;
            }
        }
        return null;
        
    }

    /**
     * Inverts the discrete CDF, that is p=Pr{x<=X}.
     * @param  Type of number from the distribution
     * @param cdf
     * CDF of a discrete distribution.
     * @param p
     * Probability to invert, must be [0,1].
     * @return
     * Value of x such that p >= CDF(x) and p <= CDF(x_next).
     */
    public static  InputOutputPair inverse(
        final CumulativeDistributionFunction cdf,
        final double p )
    {
        ProbabilityUtil.assertIsProbability(p);
        for( DataType x : ((DiscreteDistribution) cdf).getDomain() )
        {
            final double px = cdf.evaluate(x);
            if( p <= px )
            {
                return new DefaultInputOutputPair( x, px );
            }
        }
        throw new IllegalArgumentException(
            "Could not invert CDF for p=" + p );
    }

    /**
     * Computes the CDF value for the given PMF for the input.  That is,
     * the value of P=CDF(input)=sum(PMF(x<=input)).
     * @param input
     * Input to compute the CDF of.
     * @param distribution
     * Distribution to consider.
     * @return
     * CDF value of the distirbution for the given input
     */
    public static double computeCumulativeValue(
        final int input,
        final ClosedFormDiscreteUnivariateDistribution distribution )
    {

        int minx = distribution.getMinSupport().intValue();
        ProbabilityMassFunction pmf =
            distribution.getProbabilityFunction();
        double sum = 0.0;
        for( int x = minx; x <= input; x++ )
        {
            sum += pmf.evaluate(x);
        }

        return sum;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy