gov.sandia.cognition.statistics.distribution.ScalarDataDistribution Maven / Gradle / Ivy
/*
* File: ScalarDataDistribution.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Jan 27, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.statistics.distribution;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.collection.AbstractMutableDoubleMap;
import gov.sandia.cognition.collection.ScalarMap;
import gov.sandia.cognition.learning.algorithm.AbstractBatchAndIncrementalLearner;
import gov.sandia.cognition.math.MutableDouble;
import gov.sandia.cognition.statistics.CumulativeDistributionFunction;
import gov.sandia.cognition.statistics.DataDistribution;
import gov.sandia.cognition.statistics.DistributionEstimator;
import gov.sandia.cognition.statistics.UnivariateDistribution;
import java.util.Map;
import java.util.TreeMap;
/**
* A Data Distribution that uses Doubles as its keys, making it a univariate
* distribution
* @author Kevin R. Dixon
* @since 3.1
*/
public class ScalarDataDistribution
extends DefaultDataDistribution
implements UnivariateDistribution
{
/**
* Creates a new instance of ScalarDataDistribution
*/
public ScalarDataDistribution()
{
super();
}
/**
* Copy constructor
* @param other
* ScalarDataDistribution to copy
*/
public ScalarDataDistribution(
ScalarDataDistribution other)
{
super(other);
}
/**
* Creates a new instance of ScalarDataDistribution
* @param data
* Data to create the distribution
*/
public ScalarDataDistribution(
Iterable extends Number> data )
{
this();
// Can't use incrementAll, because that would require "? extends Double"
for( Number value : data )
{
this.increment(value.doubleValue());
}
}
/**
* Creates a new instance of ScalarDataDistribution
* @param map
* @param total
*/
protected ScalarDataDistribution(
final Map map,
final double total)
{
super(map, total);
}
@Override
public ScalarDataDistribution clone()
{
return (ScalarDataDistribution) super.clone();
}
@Override
public ScalarDataDistribution.PMF getProbabilityFunction()
{
return new ScalarDataDistribution.PMF(this);
}
@Override
public Double getMean()
{
return this.getMeanAsDouble();
}
@Override
public double getMeanAsDouble()
{
double sum = 0.0;
for (ScalarMap.Entry entry : this.entrySet())
{
final double weight = entry.getValue();
final double value = entry.getKey().doubleValue();
sum += weight * value;
}
final double totalWeight = this.getTotal();
return (totalWeight > 0.0) ? sum / totalWeight : 0.0;
}
@Override
public ScalarDataDistribution.Estimator getEstimator()
{
return new ScalarDataDistribution.Estimator();
}
@Override
public Double getMinSupport()
{
// Find the minimum key (not the key corresponding to the minimum value)
double minkey = Double.POSITIVE_INFINITY;
for (double key : this.keySet())
{
if (minkey > key)
{
minkey = key;
}
}
return minkey;
}
@Override
public Double getMaxSupport()
{
// Find the max key (not the key corresponding to the max value)
double maxkey = Double.NEGATIVE_INFINITY;
for (double key : this.keySet())
{
if (maxkey < key)
{
maxkey = key;
}
}
return maxkey;
}
@Override
public ScalarDataDistribution.CDF getCDF()
{
return new ScalarDataDistribution.CDF(this);
}
@Override
@PublicationReference(
title = "Algorithms for calculating variance",
type = PublicationType.WebPage,
year = 2010,
author = "Wikipedia",
url = "http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance")
public double getVariance()
{
// Largely copied from:
// UnivariateStatisticsUtil.computeWeightedMeanAndVariance();
// Note: This is more compilcated than a straight-forward algorithm
// that just computes the sum and sum-of-squares to get around
// numerical precision issues.
double mean = 0.0;
double weightSum = 0.0;
double m2 = 0.0;
for (ScalarMap.Entry entry : this.entrySet())
{
final double x = entry.getKey();
double weight = entry.getValue();
if (weight != 0.0)
{
if (weight < 0.0)
{
// Use the absolute value of weights.
weight = -weight;
}
final double newWeightSum = weightSum + weight;
final double delta = x - mean;
final double update = delta * weight / newWeightSum;
m2 += weightSum * delta * update;
mean += update;
weightSum = newWeightSum;
}
}
double variance;
if (weightSum > 0.0)
{
variance = m2 / weightSum;
}
else
{
variance = 0.0;
}
return variance;
}
/**
* PMF of the ScalarDataDistribution
*/
public static class PMF
extends ScalarDataDistribution
implements DataDistribution.PMF
{
/**
* Default constructor
*/
public PMF()
{
super();
}
/**
* Copy constructor
* @param other
* ScalarDataDistribution to copy
*/
public PMF(
final ScalarDataDistribution other)
{
super(other);
}
/**
* Creates a new instance of PMF
* @param data
* Data used to create the PMF
*/
public PMF(
final Iterable extends Number> data )
{
super( data );
}
@Override
public double logEvaluate(
final Double input)
{
return this.getLogFraction(input);
}
@Override
public Double evaluate(
final Double input)
{
return this.getFraction(input);
}
@Override
public ScalarDataDistribution.PMF getProbabilityFunction()
{
return this;
}
}
/**
* CDF of the ScalarDataDistribution, maintains the keys/domain in
* sorted order (TreeMap), so it's slower than it's peers.
*/
public static class CDF
extends ScalarDataDistribution
implements CumulativeDistributionFunction
{
/**
* Default constructor
*/
public CDF()
{
super(new TreeMap(), 0.0);
}
/**
* Copy constructor
* @param other
* ScalarDataDistribution to copy
*/
public CDF(
ScalarDataDistribution other)
{
this();
this.incrementAll(other);
}
/**
* Creates a new instance of PMF
* @param data
* Data used to create the PMF
*/
public CDF(
final Iterable extends Number> data )
{
this();
for( Number value : data )
{
this.increment(value.doubleValue());
}
}
@Override
public ScalarDataDistribution.CDF clone()
{
// The copy constructor is more appropriate because
// super.clone will attempt to create a new LinkedHashMap
// instead of a TreeMap
ScalarDataDistribution.CDF clone =
new ScalarDataDistribution.CDF( this );
return clone;
}
@Override
public Double getMinSupport()
{
return ((TreeMap) this.map).firstKey();
}
@Override
public Double getMaxSupport()
{
return ((TreeMap) this.map).lastKey();
}
@Override
public ScalarDataDistribution.CDF getCDF()
{
return this;
}
@Override
public Double evaluate(
Double input)
{
final double x0 = input;
double sum = 0.0;
for (ScalarMap.Entry entry : this.entrySet())
{
final double x = entry.getKey();
if (x <= x0)
{
sum += entry.getValue();
}
}
final double t = this.getTotal();
return (t > 0.0) ? (sum / t) : 0.0;
}
}
/**
* Estimator for a ScalarDataDistribution
*/
public static class Estimator
extends AbstractBatchAndIncrementalLearner
implements DistributionEstimator
{
/**
* Default constructor
*/
public Estimator()
{
super();
}
@Override
public ScalarDataDistribution createInitialLearnedObject()
{
return new ScalarDataDistribution();
}
@Override
public void update(
ScalarDataDistribution target,
Double data)
{
target.increment(data, 1.0);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy