All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.jagg.LinearRegressionAggregator Maven / Gradle / Ivy

Go to download

jAgg is a Java 5.0 API that supports “group by” operations on Lists of Java objects: aggregate operations such as count, sum, max, min, avg, and many more. It also allows custom aggregate operations.

The newest version!
package net.sf.jagg;

import net.sf.jagg.math.DoubleDouble;
import net.sf.jagg.model.LinearRegressionStats;
import net.sf.jagg.model.WindowClause;

/**
 * This class represents the "linear regression" aggregator over two sets of
 * numeric values.  Many values can be returned by this
 * Aggregator, all encapsulated in the object
 * LinearRegressionStats.
 *
 * @author Randy Gettman
 * @since 0.1.0
 *
 * @see LinearRegressionStats
 */
public class LinearRegressionAggregator extends TwoPropAggregator implements AnalyticFunction
{
   private CovariancePopAggregator myCovarianceAgg = null;
   private VariancePopAggregator myFirstVarAgg = null;
   private VariancePopAggregator mySecondVarAgg = null;
   private AvgAggregator myFirstAvgAgg = null;
   private AvgAggregator mySecondAvgAgg = null;

   private long myCount;

   /**
    * Constructs a LinearRegressionAggregator on the specified
    * properties, in the format: property, property2.
    * @param properties A specification string in the format:
    *    property, property2.
    */
   public LinearRegressionAggregator(String properties)
   {
      setProperty(properties);
   }

   /**
    * Constructs a LinearRegressionAggregator that operates on the specified
    * properties.
    * @param property Calculate linear regression statistics of this property with the other.
    * @param property2 Calculate linear regression statistics of this property with the other.
    */
   public LinearRegressionAggregator(String property, String property2)
   {
      setProperty(property + "," + property2);
   }

   /**
    * Returns an uninitialized copy of this Aggregator object,
    * with the same property(ies) to analyze.
    * @return An uninitialized copy of this Aggregator object.
    */
   public LinearRegressionAggregator replicate()
   {
      return new LinearRegressionAggregator(getProperty(), getProperty2());
   }

   /**
    * Initialize the internal aggregators: A CovarianceAggregator,
    * 2 VarianceAggregators (one each for both properties), and 2
    * AvgAggregators (one each for both properties).  Initialize a
    * count to zero.
    *
    * @see CovariancePopAggregator
    * @see VariancePopAggregator
    * @see AvgAggregator
    */
   public void init()
   {
      if (myCovarianceAgg == null)
         myCovarianceAgg = new CovariancePopAggregator(getProperty(), getProperty2());
      if (myFirstVarAgg == null)
         myFirstVarAgg = new VariancePopAggregator(getProperty());
      if (mySecondVarAgg == null)
         mySecondVarAgg = new VariancePopAggregator(getProperty2());
      if (myFirstAvgAgg == null)
         myFirstAvgAgg = new AvgAggregator(getProperty());
      if (mySecondAvgAgg == null)
         mySecondAvgAgg = new AvgAggregator(getProperty2());

      myCovarianceAgg.init();
      myFirstVarAgg.init();
      mySecondVarAgg.init();
      myFirstAvgAgg.init();
      mySecondAvgAgg.init();

      myCount = 0;
   }

   /**
    * If both property values are non-null, then iterate the internal
    * aggregators and increment the count.
    *
    * @param value The value to aggregate.
    */
   public void iterate(Object value)
   {
      // Do the count here to detect only when both properties are non-null.
      if (value != null)
      {
         String property1 = getProperty();
         String property2 = getProperty2();

         Object obj1 = getValueFromProperty(value, property1);
         Object obj2 = getValueFromProperty(value, property2);
         // Don't count nulls.
         if (obj1 != null && obj2 != null)
         {
            myCount++;
            // Only iterate our internal aggregators if both values are
            // non-null.
            myCovarianceAgg.iterate(value);
            myFirstVarAgg.iterate(value);
            mySecondVarAgg.iterate(value);
            myFirstAvgAgg.iterate(value);
            mySecondAvgAgg.iterate(value);
         }
      }
   }

   /**
    * If both property values are non-null, then delete from the internal
    * aggregators and decrement the count.
    *
    * @param value The value to delete.
    * @since 0.9.0
    */
   public void delete(Object value)
   {
      // Do the count here to detect only when both properties are non-null.
      if (value != null)
      {
         String property1 = getProperty();
         String property2 = getProperty2();

         Object obj1 = getValueFromProperty(value, property1);
         Object obj2 = getValueFromProperty(value, property2);
         // Don't count nulls.
         if (obj1 != null && obj2 != null)
         {
            myCount--;
            // Only iterate our internal aggregators if both values are
            // non-null.
            myCovarianceAgg.delete(value);
            myFirstVarAgg.delete(value);
            mySecondVarAgg.delete(value);
            myFirstAvgAgg.delete(value);
            mySecondAvgAgg.delete(value);
         }
      }
   }

   /**
    * The linear regression function can take a window clause.
    * @return true.
    * @since 0.9.0
    */
   public boolean takesWindowClause()
   {
      return true;
   }

   /**
    * The linear regression function doesn't supply its own window clause.
    * @return null
    * @since 0.9.0
    */
   public WindowClause getWindowClause()
   {
      return null;
   }

   /**
    * Merge the given Aggregator into this one.  Add the internal
    * counts.
    *
    * @param agg The Aggregator to merge into this one.
    */
   public void merge(AggregateFunction agg)
   {
      if (agg != null && agg instanceof LinearRegressionAggregator)
      {
         LinearRegressionAggregator otherAgg = (LinearRegressionAggregator) agg;
         myCovarianceAgg.merge(otherAgg.myCovarianceAgg);
         myFirstVarAgg.merge(otherAgg.myFirstVarAgg);
         mySecondVarAgg.merge(otherAgg.mySecondVarAgg);
         myFirstAvgAgg.merge(otherAgg.myFirstAvgAgg);
         mySecondAvgAgg.merge(otherAgg.mySecondAvgAgg);

         myCount += otherAgg.myCount;
      }
   }

   /**
    * Return a LinearRegressionStats, with the following
    * calculations:
    * 
    *
  • slope = cov(prop1, prop2) / var(prop2) *
  • intercept = avg(prop1) - slope * avg(prop2) *
  • count = number of pairs where both elements were non-null, and thus * counted in these calculations. *
  • rSquared = if (var(prop2)) is 0, then NaN,
    * else if (var(prop1)) is 0, then 1,
    * else correlation(prop1, prop2) squared. *
  • correlation = covariancePop(prop1, prop2) / Math.sqrt(variancePop(prop1) * variancePop(prop2)) *
  • avg1 = avg(prop1) *
  • avg2 = avg(prop2) *
* * @return A LinearRegressionStats. */ public LinearRegressionStats terminate() { DoubleDouble covariance = myCovarianceAgg.terminateDoubleDouble(); DoubleDouble variance1 = myFirstVarAgg.terminateDoubleDouble(); DoubleDouble variance2 = mySecondVarAgg.terminateDoubleDouble(); DoubleDouble avg1 = myFirstAvgAgg.terminateDoubleDouble(); DoubleDouble avg2 = mySecondAvgAgg.terminateDoubleDouble(); if (myCount <= 0) { return new LinearRegressionStats(Double.NaN, Double.NaN, 0, Double.NaN, Double.NaN, Double.NaN, Double.NaN); } // Same calculation that the CorrelationAggregator makes. DoubleDouble correlation; if (variance1.compareTo(DoubleDouble.ZERO) == 0 || variance2.compareTo(DoubleDouble.ZERO) == 0) correlation = DoubleDouble.NaN; else { correlation = new DoubleDouble(covariance); DoubleDouble temp = new DoubleDouble(variance1); temp.multiplySelfBy(variance2); temp.sqrtSelf(); correlation.divideSelfBy(temp); } double slope, intercept, rSquared; if (variance2.compareTo(DoubleDouble.ZERO) == 0) { slope = Double.NaN; intercept = Double.NaN; rSquared = Double.NaN; } else if (variance1.compareTo(DoubleDouble.ZERO) == 0) { slope = 0; intercept = avg1.doubleValue(); rSquared = 1; } else { DoubleDouble temp = new DoubleDouble(covariance); temp.divideSelfBy(variance2); slope = temp.doubleValue(); temp.multiplySelfBy(avg2); temp.negateSelf(); temp.addToSelf(avg1); intercept = temp.doubleValue(); temp = new DoubleDouble(correlation); temp.squareSelf(); rSquared = temp.doubleValue(); } return new LinearRegressionStats(slope, intercept, myCount, rSquared, correlation.doubleValue(), avg1.doubleValue(), avg2.doubleValue()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy