
net.sf.jagg.Analytic Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jagg-core Show documentation
Show all versions of jagg-core Show documentation
jAgg is a Java 5.0 API that supports “group by” operations on Lists of Java objects: aggregate operations such as count, sum, max, min, avg, and many more. It also allows custom aggregate operations.
The newest version!
package net.sf.jagg;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import net.sf.jagg.model.AnalyticValue;
import net.sf.jagg.model.AnalyticContext;
import net.sf.jagg.model.OrderByElement;
import net.sf.jagg.model.WindowClause;
import net.sf.jagg.util.PartitionAndOrderByComparator;
/**
* The Analytic
class performs the actual analytic
* operations. It contains a Builder
class that, following the
* Builder Pattern, builds a Analytic
object that can be used
* for the actual analytic calculations.
*
* @author Randy Gettman
* @since 0.9.0
*/
public class Analytic
{
private static final boolean DEBUG = false;
private List myAnalytics;
/**
* Private constructor to ensure that the "Builder" pattern is used.
* @param builder A Builder
.
*/
private Analytic(Builder builder)
{
myAnalytics = new ArrayList(builder.myAnalytics);
}
/**
* Perform one or more analytic operations on a List<T>
.
* T
does not need to be Comparable
. This
* operates on copies of the list of values, each sorted based on the
* individual partitionBy clauses and/or orderBy clauses.
* @param The object type to aggregate.
* @param values The List<T>
of objects to aggregate.
* @return A List<AggregateValue<T>>
.
*/
@SuppressWarnings("ForLoopReplaceableByForEach")
public List> analyze(List values)
{
// If no values, must return an empty list of AnalyticValues.
if (values.size() == 0)
{
return new ArrayList>();
}
List> analyticValues = new ArrayList>(values.size());
for (int i = 0; i < values.size(); i++)
{
analyticValues.add(new AnalyticValue(values.get(i)));
}
List> sortComparators =
new ArrayList>(myAnalytics.size());
List> comparators =
new ArrayList>(myAnalytics.size());
for (int i = 0; i < myAnalytics.size(); i++)
{
AnalyticAggregator ana = myAnalytics.get(i);
PartitionAndOrderByComparator comparator =
new PartitionAndOrderByComparator(ana.getPartition(), ana.getOrderBy());
// Need original Comparator in the analysis process later.
comparators.add(comparator);
for (int j = 0; j < sortComparators.size(); j++)
{
PartitionAndOrderByComparator existing = sortComparators.get(j);
if (existing.covers(comparator))
{
if (DEBUG)
{
System.out.println(existing + " covers " + comparator + "; choosing " + existing);
}
// Choose the existing comparator again.
comparator = existing;
}
}
for (int j = 0; j < sortComparators.size(); j++)
{
PartitionAndOrderByComparator existing = sortComparators.get(j);
if (comparator.covers(existing))
{
if (DEBUG)
{
System.out.println(comparator + " covers " + existing + "; replacing " + existing + " with " + comparator);
}
// Overwrite.
sortComparators.set(j, comparator);
}
}
// Add in position.
sortComparators.add(comparator);
}
if (DEBUG)
{
for (PartitionAndOrderByComparator comparator : sortComparators)
{
System.out.println("Sort Comparator: " + comparator);
}
}
if (myAnalytics.isEmpty())
{
// Nothing to process!
return analyticValues;
}
// Create analytic contexts along with the individually sorted lists.
List> contexts = new ArrayList>(sortComparators.size());
for (int i = 0; i < sortComparators.size(); i++)
{
PartitionAndOrderByComparator comparator = comparators.get(i);
PartitionAndOrderByComparator sortComparator = sortComparators.get(i);
List> sortedValues = null;
// Try to find previously created equivalent comparator, so we don't
// have to sort the list the same way multiple times.
for (int j = 0; j < contexts.size(); j++)
{
AnalyticContext context = contexts.get(j);
PartitionAndOrderByComparator previous = context.getComparator();
if (previous.equals(sortComparator))
{
sortComparator = previous;
sortedValues = context.getListOfValues();
break;
}
}
if (sortedValues == null)
{
// Not found previously. Do a new sort of the values.
// Copy the list of the analytic values.
sortedValues = new ArrayList>(analyticValues);
Collections.sort(sortedValues, sortComparator);
if (DEBUG)
{
System.out.println("Sorted values for " + myAnalytics.get(i) + ": ");
for (AnalyticValue anaValue : sortedValues)
{
System.out.println(anaValue.getObject());
}
}
}
// Original PAOBC needed here; it'll be used in the analysis process.
AnalyticContext newContext = new AnalyticContext(sortedValues, comparator);
contexts.add(newContext);
}
doAnalysis(contexts);
return analyticValues;
}
/**
* Perform the actual analytic operations.
*
* TODO: Implement parallelism like Aggregation.doAggregation and the
* methods it calls does.
*
* This restricts the parallelism based on
* the size of the list of values to analyze, e.g. don't want to have a
* parallelism of 8 when the list size is 6. Then it delegates to either
* the single-threaded or multi-threaded version of
* getAnalyzedValues
.
* @param contexts The Lists
of AnalyticContexts
.
* Each AnalyticContext
contains the List
of
* values to analyze, plus the PartitionAndOrderByComparator
* that was used to sort them prior to analysis.
* @return A List
of AnalyticValues
.
*/
private List> doAnalysis(List> contexts)
{
List> analyzedList;
// Begin commented code for parallelism, originally copied from
// Aggregation.doAggregation.
// int size = listCopy.size();
// int minParallelism = (myParallelism > size) ? size : myParallelism;
// if (minParallelism > 1)
// analyzedList = getAnalyzedValues(listCopy, comparator, myParallelism);
// else
analyzedList = getAnalyzedValues(contexts);
// There is no concept of super-analytics like there is super-aggregation
// for aggregation.
return analyzedList;
}
/**
* Get all analyzed values for all analytics. This is the single-
* threaded version.
* @param contexts The List
of AnalyticContexts
.
* @return A List
of AnalyticValues
.
*/
private List> getAnalyzedValues(List> contexts)
{
List> aggValues = new ArrayList>();
List anaList = getAnalyticAggregatorsList();
int anaSize = myAnalytics.size();
int index = 0;
int listsize = contexts.get(0).getListOfValues().size();
for (int a = 0; a < anaSize; a++)
{
AnalyticContext context = contexts.get(a);
context.setEndOfPartitionIndex(Aggregations.indexOfLastMatching(
context.getListOfValues(), context.getComparator().getPartitionComparator(), index));
AnalyticAggregator ana = anaList.get(a);
anaList.addAll(ana.getDependentAnalyticAggregators());
if (anaSize != anaList.size())
{
// Dependents were added.
List dependencies = new ArrayList();
for (int i = anaSize; i < anaList.size(); i++)
{
dependencies.add(i);
// Add new contexts for these new analytics.
AnalyticContext newContext = new AnalyticContext(context.getListOfValues(), context.getComparator());
contexts.add(newContext);
}
context.setDependencies(dependencies);
}
anaSize = anaList.size();
ana.init();
if (DEBUG)
{
System.out.println("Beginning analysis: a = " + a + ", context's end of partition index = " +
context.getEndOfPartitionIndex());
}
}
while (index < listsize)
{
// Iterate backwards through the analytics, so that dependent
// analytics are processed after the analytics on which they depend.
for (int a = anaSize - 1; a >= 0; a--)
{
if (DEBUG)
{
System.out.println("index = " + index + ", a = " + a);
}
AnalyticContext context = contexts.get(a);
AnalyticAggregator ana = anaList.get(a);
if (index > context.getEndOfPartitionIndex())
{
betweenPartitionsProcessing(a, ana, context);
// Started a new partition for this AnalyticAggregator.
context.setEndOfPartitionIndex(Aggregations.indexOfLastMatching(
context.getListOfValues(), context.getComparator().getPartitionComparator(), index));
ana.init();
if (DEBUG)
{
System.out.println(" Partition ended; context's new end of partition index = " +
context.getEndOfPartitionIndex());
}
}
processItem(a, ana, index, context);
}
index++;
}
// Finish up processing of the last partition.
// Iterate backwards through the analytics, so that dependent
// analytics are prcoessed after the analytics on which they depend.
for (int a = anaSize - 1; a >= 0; a--)
{
AnalyticContext context = contexts.get(a);
AnalyticAggregator ana = anaList.get(a);
if (index > context.getEndOfPartitionIndex())
{
betweenPartitionsProcessing(a, ana, context);
}
}
return aggValues;
}
/**
* This processing occurs before, between, and after the processing of
* partitions. It terminates all unterminated AnalyticAggregators
* from the previous partition (if any).
* @param anaIndex The 0-based analytic aggregator index, only used to store
* terminated values by index.
* @param ana The AnalyticAggregator
.
* @param context The AnalyticContext
, which knows the current
* window and through which index the AnalyticValues
have
* been terminated.
*/
private void betweenPartitionsProcessing(int anaIndex, AnalyticAggregator ana, AnalyticContext> context)
{
if (DEBUG)
{
System.out.println("Between partitions: " + anaIndex);
}
int endOfPartitionIndex = context.getEndOfPartitionIndex();
while (context.getTerminatedThroughIndex() < endOfPartitionIndex)
{
if (context.getWindowStartIndex() > context.getEndOfPartitionIndex() ||
isInWindow(context, ana, context.getWindowStartIndex()))
{
terminate(anaIndex, ana, context);
}
else
{
delete(ana, context);
}
}
context.advanceWindowStartPastLastTerminated();
}
/**
* Processes the item at the given start index. This may include
* terminating analytic values whose windows are complete. This may also
* include deleting values that are no longer supposed to be in the current
* window. Iterates the item at the given start index and updates the
* window.
* @param anaIndex The 0-based analytic aggregator index, only used to store
* terminated values by index.
* @param ana The AnalyticAggregator
.
* @param index The current 0-based index into the list of values to
* iterate.
* @param context The AnalyticContext
, which knows the current
* window and through which index the AnalyticValues
have
* been terminated.
*/
private void processItem(int anaIndex, AnalyticAggregator ana, int index, AnalyticContext> context)
{
// Determine whether to terminate.
while (context.getTerminatedThroughIndex() < context.getEndOfPartitionIndex() &&
!isInWindow(context, ana, index, false))
{
terminate(anaIndex, ana, context);
// Determine whether to delete a value. Any delete values slide the
// beginning of the window forward.
while (context.getWindowEndIndex() >= context.getWindowStartIndex() &&
!isInWindow(context, ana, context.getWindowStartIndex()))
{
delete(ana, context);
}
}
// Iterate. This slides the end of the window forward.
AnalyticValue> iterValue = context.getListOfValues().get(index);
Object valueToIterate = iterValue.getObject();
ana.iterate(valueToIterate);
if (DEBUG)
{
System.out.println(" Iterating " + valueToIterate);
}
context.incrementWindowEndIndex();
}
/**
* Returns whether the current value to be analyzed at the given index is
* within the window described by the next indexed value to be terminated.
* @param context The AnalyticContext
.
* @param ana The AnalyticAggregator
.
* @param index The index that will be iterated next.
* @return Whether what is about to be iterated is within the current window.
*/
private boolean isInWindow(AnalyticContext context, AnalyticAggregator ana, int index)
{
return isInWindow(context, ana, index, true);
}
/**
* Returns whether the current value to be analyzed at the given index is
* within the window described by the next indexed value to be terminated.
* @param context The AnalyticContext
.
* @param ana The AnalyticAggregator
.
* @param index The index that will be iterated next.
* @param considerStart This will consider the start of the window only if
* this is set to true
. false
will allow it to
* be "in the window" even if the beginning of the window hasn't been
* iterated yet.
* @return Whether what is about to be iterated is within the current window.
*/
private boolean isInWindow(AnalyticContext context, AnalyticAggregator ana, int index, boolean considerStart)
{
WindowClause window = ana.getWindow();
int nextTerminationIndex = context.getTerminatedThroughIndex() + 1;
switch(window.getWindowType())
{
case ROWS:
{
if (considerStart && window.getStartValue() != null)
{
int startWindow = nextTerminationIndex - window.getStartValue().intValue();
if (startWindow > index)
return false;
}
if (window.getEndValue() != null)
{
int endWindow = nextTerminationIndex + window.getEndValue().intValue();
if (index > endWindow)
return false;
}
return true;
}
case RANGE:
{
// Get the property that the first order-by element uses.
// If actual values are involved, then the Order By Elements
// should have been restricted already to exactly one property.
PartitionAndOrderByComparator comparator = context.getComparator();
AnalyticValue nextTerminatedValue = context.getListOfValues().get(nextTerminationIndex);
AnalyticValue indexValue = context.getListOfValues().get(index);
Object objNextTerminatedValue = nextTerminatedValue.getObject();
Object objIndexPropValue = indexValue.getObject();
if (considerStart && window.getStartValue() != null)
{
if (window.getStartValue().doubleValue() == 0)
{
if (index < nextTerminationIndex && comparator.compare(nextTerminatedValue, indexValue) != 0)
return false;
}
else
{
OrderByElement only = comparator.getOrderByElements().get(0);
String property = only.getProperty();
Number nextTerminatedPropValue = (Number) Aggregator.getValueFromProperty(objNextTerminatedValue, property);
Number indexPropValue = (Number) Aggregator.getValueFromProperty(objIndexPropValue, property);
if (nextTerminatedPropValue == null || indexPropValue == null) return false; // TODO: Is this correct?
double dNextTerminatedValue = nextTerminatedPropValue.doubleValue();
double dIndexValue = indexPropValue.doubleValue();
switch(only.getSortDir())
{
case ASC:
{
double startWindow = dNextTerminatedValue - window.getStartValue().doubleValue();
if (DEBUG)
{
System.out.println(" +startWindow: " + startWindow + ", dNextTerminatedValue: " + dNextTerminatedValue);
}
if (startWindow > dIndexValue)
return false;
break;
}
case DESC:
{
double startWindow = dNextTerminatedValue + window.getStartValue().doubleValue();
if (DEBUG)
{
System.out.println(" -startWindow: " + startWindow + ", dNextTerminatedValue: " + dNextTerminatedValue);
}
if (startWindow < dIndexValue)
return false;
break;
}
}
}
}
if (window.getEndValue() != null)
{
if (window.getEndValue().doubleValue() == 0)
{
if (index > nextTerminationIndex && comparator.compare(nextTerminatedValue, indexValue) != 0)
return false;
}
else
{
OrderByElement only = comparator.getOrderByElements().get(0);
String property = only.getProperty();
Number nextTerminatedPropValue = (Number) Aggregator.getValueFromProperty(objNextTerminatedValue, property);
Number indexPropValue = (Number) Aggregator.getValueFromProperty(objIndexPropValue, property);
if (nextTerminatedPropValue == null || indexPropValue == null) return false; // TODO: Is this correct?
double dNextTerminatedValue = nextTerminatedPropValue.doubleValue();
double dIndexValue = indexPropValue.doubleValue();
switch(only.getSortDir())
{
case ASC:
{
double endWindow = dNextTerminatedValue + window.getEndValue().doubleValue();
if (DEBUG)
{
System.out.println(" +endWindow: " + endWindow + ", dNextTerminatedValue: " + dNextTerminatedValue);
}
if (dIndexValue > endWindow)
return false;
break;
}
case DESC:
{
double endWindow = dNextTerminatedValue - window.getEndValue().doubleValue();
if (DEBUG)
{
System.out.println(" -endWindow: " + endWindow + ", dNextTerminatedValue: " + dNextTerminatedValue);
}
if (dIndexValue < endWindow)
return false;
break;
}
}
}
}
return true;
}
default:
return false;
}
}
/**
* Terminates the next item due to be terminated.
* @param anaIndex The 0-based analytic aggregator index, only used to store
* terminated values by index.
* @param ana The AnalyticAggregator
.
* @param context The AnalyticContext
, which knows the current
* window and through which index the AnalyticValues
have
* been terminated.
*/
private void terminate(int anaIndex, AnalyticAggregator ana, AnalyticContext> context)
{
int nextTerminationIndex = context.getTerminatedThroughIndex() + 1;
AnalyticValue> valueToTerminate = context.getListOfValues().get(nextTerminationIndex);
ana.setValuesForDependentAnalytics(valueToTerminate, context);
valueToTerminate.setAnalyzedValue(anaIndex, ana, ana.terminate());
if (DEBUG)
{
System.out.println(" Terminating analytic value at " + nextTerminationIndex);
}
context.incrementTerminatedThroughIndex();
}
/**
* Deletes the item at the start of the window.
* @param ana The AnalyticAggregator
.
* @param context The AnalyticContext
, which knows the current
* window and through which index the AnalyticValues
have
* been terminated.
*/
private void delete(AnalyticAggregator ana, AnalyticContext> context)
{
AnalyticValue> deleteValue = context.getListOfValues().get(context.getWindowStartIndex());
Object valueToDelete = deleteValue.getObject();
ana.delete(valueToDelete);
if (DEBUG)
{
System.out.println(" Deleting " + valueToDelete);
}
context.incrementWindowStartIndex();
}
/**
* Helper function to create a new List
of
* AnalyticAggregators
.
* @return A List
of AnalyticAggregators
.
*/
private List getAnalyticAggregatorsList()
{
int anaSize = myAnalytics.size();
List aggList = new ArrayList(anaSize);
for (int a = 0; a < anaSize; a++)
{
AnalyticAggregator archetype = myAnalytics.get(a);
aggList.add((AnalyticAggregator) Aggregator.getAggregator(archetype));
}
return aggList;
}
/**
* This Builder
class follows the "Builder" pattern to create
* an Analytic
object.
*/
public static class Builder
{
private List myAnalytics;
/**
* Constructs a Builder
with no analytics.
*/
public Builder()
{
myAnalytics = null;
}
/**
* Sets the List
of AnalyticAggregators
to use. The
* AnalyticAggregators
define which analytic operatons to perform.
* @param analytics A List
of AnalyticAggregators
.
* AnalyticAggregators can be created in two ways: direct instantiation, or by
* using the factory method.
*
* AnalyticAggregator ana = new AnalyticAggregator.Builder()
* .setAnalyticFunction(new SumAggregator("value")
* .setPartition(new PartitionClause(Arrays.asList("value")))
* .setOrderBy(new OrderByClause(Arrays.asList(
* new OrderByElement("category", OrderByElement.SortDir.DESC, OrderByElement.NullSort.LAST))))
* .setWindow(new WindowClause(WindowClause.Type.ROWS, 3, 0))
* .build();
* AnalyticAggregator ana =
* AnalyticAggregator.getAnalytic("Sum(value) partitionBy(value) orderBy(category desc nulls last) rows(3, 0)");
*
* @return This Builder
.
* @see AnalyticAggregator
* @see AnalyticFunction
*/
public Builder setAnalytics(List analytics)
{
myAnalytics = analytics;
return this;
}
/**
* Build the Analytic
object.
* @return An Analytic
object that can be used to perform
* the actual analytic calculations.
* @throws IllegalArgumentException If at least one
* AnalyticFunction
was not supplied with the
* setAnalytics
method.
* @see #setAnalytics
*/
public Analytic build()
{
if (myAnalytics == null || myAnalytics.isEmpty())
throw new IllegalArgumentException("Analytic.Builder: Must supply at least one AnalyticFunction.");
return new Analytic(this);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy