All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.statistics.distribution.ScalarDataDistribution Maven / Gradle / Ivy

/*
 * File:                ScalarDataDistribution.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright Jan 27, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government.
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 *
 */
package gov.sandia.cognition.statistics.distribution;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.collection.AbstractMutableDoubleMap;
import gov.sandia.cognition.collection.ScalarMap;
import gov.sandia.cognition.learning.algorithm.AbstractBatchAndIncrementalLearner;
import gov.sandia.cognition.math.MutableDouble;
import gov.sandia.cognition.statistics.CumulativeDistributionFunction;
import gov.sandia.cognition.statistics.DataDistribution;
import gov.sandia.cognition.statistics.DistributionEstimator;
import gov.sandia.cognition.statistics.UnivariateDistribution;
import java.util.Map;
import java.util.TreeMap;

/**
 * A Data Distribution that uses Doubles as its keys, making it a univariate
 * distribution
 * @author Kevin R. Dixon
 * @since 3.1
 */
public class ScalarDataDistribution
    extends DefaultDataDistribution
    implements UnivariateDistribution
{

    /** 
     * Creates a new instance of ScalarDataDistribution
     */
    public ScalarDataDistribution()
    {
        super();
    }

    /**
     * Copy constructor
     * @param other
     * ScalarDataDistribution to copy
     */
    public ScalarDataDistribution(
        ScalarDataDistribution other)
    {
        super(other);
    }

    /**
     * Creates a new instance of ScalarDataDistribution
     * @param data
     * Data to create the distribution
     */
    public ScalarDataDistribution(
        Iterable data )
    {
        this();
        // Can't use incrementAll, because that would require "? extends Double"
        for( Number value : data )
        {
            this.increment(value.doubleValue());
        }
    }

    /**
     * Creates a new instance of ScalarDataDistribution
     * @param map
     * @param total
     */
    protected ScalarDataDistribution(
        final Map map,
        final double total)
    {
        super(map, total);
    }

    @Override
    public ScalarDataDistribution clone()
    {
        return (ScalarDataDistribution) super.clone();
    }

    @Override
    public ScalarDataDistribution.PMF getProbabilityFunction()
    {
        return new ScalarDataDistribution.PMF(this);
    }

    @Override
    public Double getMean()
    {
        return this.getMeanAsDouble();
    }
    
    @Override
    public double getMeanAsDouble()
    {
        double sum = 0.0;
        for (ScalarMap.Entry entry : this.entrySet())
        {
            final double weight = entry.getValue();
            final double value = entry.getKey().doubleValue();
            sum += weight * value;
        }

        final double totalWeight = this.getTotal();
        return (totalWeight > 0.0) ? sum / totalWeight : 0.0;
    }

    @Override
    public ScalarDataDistribution.Estimator getEstimator()
    {
        return new ScalarDataDistribution.Estimator();
    }

    @Override
    public Double getMinSupport()
    {
        // Find the minimum key (not the key corresponding to the minimum value)
        double minkey = Double.POSITIVE_INFINITY;
        for (double key : this.keySet())
        {
            if (minkey > key)
            {
                minkey = key;
            }
        }
        return minkey;
    }

    @Override
    public Double getMaxSupport()
    {
        // Find the max key (not the key corresponding to the max value)
        double maxkey = Double.NEGATIVE_INFINITY;
        for (double key : this.keySet())
        {
            if (maxkey < key)
            {
                maxkey = key;
            }
        }
        return maxkey;
    }

    @Override
    public ScalarDataDistribution.CDF getCDF()
    {
        return new ScalarDataDistribution.CDF(this);
    }

    @Override
    @PublicationReference(
        title = "Algorithms for calculating variance",
        type = PublicationType.WebPage,
        year = 2010,
        author = "Wikipedia",
        url = "http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance")
    public double getVariance()
    {

        // Largely copied from:
        // UnivariateStatisticsUtil.computeWeightedMeanAndVariance();
        // Note: This is more compilcated than a straight-forward algorithm
        // that just computes the sum and sum-of-squares to get around
        // numerical precision issues.
        double mean = 0.0;
        double weightSum = 0.0;
        double m2 = 0.0;

        for (ScalarMap.Entry entry : this.entrySet())
        {
            final double x = entry.getKey();
            double weight = entry.getValue();

            if (weight != 0.0)
            {
                if (weight < 0.0)
                {
                    // Use the absolute value of weights.
                    weight = -weight;
                }

                final double newWeightSum = weightSum + weight;
                final double delta = x - mean;

                final double update = delta * weight / newWeightSum;
                m2 += weightSum * delta * update;
                mean += update;
                weightSum = newWeightSum;
            }
        }

        double variance;
        if (weightSum > 0.0)
        {
            variance = m2 / weightSum;
        }
        else
        {
            variance = 0.0;
        }

        return variance;

    }

    /**
     * PMF of the ScalarDataDistribution
     */
    public static class PMF
        extends ScalarDataDistribution
        implements DataDistribution.PMF
    {

        /**
         * Default constructor
         */
        public PMF()
        {
            super();
        }

        /**
         * Copy constructor
         * @param other
         * ScalarDataDistribution to copy
         */
        public PMF(
            final ScalarDataDistribution other)
        {
            super(other);
        }

        /**
         * Creates a new instance of PMF
         * @param data
         * Data used to create the PMF
         */
        public PMF(
            final Iterable data )
        {
            super( data );
        }

        @Override
        public double logEvaluate(
            final Double input)
        {
            return this.getLogFraction(input);
        }

        @Override
        public Double evaluate(
            final Double input)
        {
            return this.getFraction(input);
        }

        @Override
        public ScalarDataDistribution.PMF getProbabilityFunction()
        {
            return this;
        }

    }

    /**
     * CDF of the ScalarDataDistribution, maintains the keys/domain in
     * sorted order (TreeMap), so it's slower than it's peers.
     */
    public static class CDF
        extends ScalarDataDistribution
        implements CumulativeDistributionFunction
    {

        /**
         * Default constructor
         */
        public CDF()
        {
            super(new TreeMap(), 0.0);
        }

        /**
         * Copy constructor
         * @param other
         * ScalarDataDistribution to copy
         */
        public CDF(
            ScalarDataDistribution other)
        {
            this();
            this.incrementAll(other);
        }

        /**
         * Creates a new instance of PMF
         * @param data
         * Data used to create the PMF
         */
        public CDF(
            final Iterable data )
        {
            this();
            for( Number value : data )
            {
                this.increment(value.doubleValue());
            }
        }

        @Override
        public ScalarDataDistribution.CDF clone()
        {
            // The copy constructor is more appropriate because
            // super.clone will attempt to create a new LinkedHashMap
            // instead of a TreeMap
            ScalarDataDistribution.CDF clone =
                new ScalarDataDistribution.CDF( this );
            return clone;
        }

        @Override
        public Double getMinSupport()
        {
            return ((TreeMap) this.map).firstKey();
        }

        @Override
        public Double getMaxSupport()
        {
            return ((TreeMap) this.map).lastKey();
        }

        @Override
        public ScalarDataDistribution.CDF getCDF()
        {
            return this;
        }

        @Override
        public Double evaluate(
            Double input)
        {
            final double x0 = input;
            double sum = 0.0;
            for (ScalarMap.Entry entry : this.entrySet())
            {
                final double x = entry.getKey();
                if (x <= x0)
                {
                    sum += entry.getValue();
                }
            }
            final double t = this.getTotal();
            return (t > 0.0) ? (sum / t) : 0.0;
        }

    }

    /**
     * Estimator for a ScalarDataDistribution
     */
    public static class Estimator
        extends AbstractBatchAndIncrementalLearner
        implements DistributionEstimator
    {

        /**
         * Default constructor
         */
        public Estimator()
        {
            super();
        }

        @Override
        public ScalarDataDistribution createInitialLearnedObject()
        {
            return new ScalarDataDistribution();
        }

        @Override
        public void update(
            ScalarDataDistribution target,
            Double data)
        {
            target.increment(data, 1.0);
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy