com.tangosol.util.aggregator.GroupAggregator Maven / Gradle / Ivy
Show all versions of coherence Show documentation
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* https://oss.oracle.com/licenses/upl.
*/
package com.tangosol.util.aggregator;
import com.tangosol.io.ExternalizableLite;
import com.tangosol.io.pof.PofReader;
import com.tangosol.io.pof.PofWriter;
import com.tangosol.io.pof.PortableObject;
import com.tangosol.util.ClassHelper;
import com.tangosol.util.ExternalizableHelper;
import com.tangosol.util.Filter;
import com.tangosol.util.InvocableMap;
import com.tangosol.util.LiteMap;
import com.tangosol.util.ValueExtractor;
import com.tangosol.util.extractor.ChainedExtractor;
import com.tangosol.util.extractor.MultiExtractor;
import com.tangosol.util.extractor.ReflectionExtractor;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BinaryOperator;
import jakarta.json.bind.annotation.JsonbProperty;
/**
* The GroupAggregator provides an ability to split a subset of entries in an
* InvocableMap into a collection of non-intersecting subsets and then
* aggregate them separately and independently. The splitting (grouping) is
* performed using the results of the underlying ValueExtractor in such a way
* that two entries will belong to the same group if and only if the result of
* the corresponding {@link ValueExtractor#extract extract} call produces the
* same value or tuple (list of values). After the entries are split into the
* groups, the underlying aggregator is applied separately to each group. The
* result of the aggregation by the GroupAggregator is a Map that has distinct
* values (or tuples) as keys and results of the individual aggregation as
* values. Additionally, those results could be further reduced using an
* optional Filter object.
*
* Informally speaking, this aggregator is analogous to the SQL "group by" and
* "having" clauses. Note that the "having" Filter is applied independently on
* each server against the partial aggregation results; this generally implies
* that data affinity is required to ensure that all required data used to
* generate a given result exists within a single cache partition.
* In other words, the "group by" predicate should not span multiple
* partitions if the "having" clause is used.
*
* The GroupAggregator is somewhat similar to the {@link DistinctValues}
* aggregator, which returns back a list of distinct values (tuples) without
* performing any additional aggregation work.
*
* Unlike many other concrete EntryAggregator implementations that are
* constructed directly, instances of GroupAggregator should only be created
* using one of the factory methods:
* {@link #createInstance(ValueExtractor, InvocableMap.EntryAggregator)
* createInstance(extractor, aggregator)},
* {@link #createInstance(ValueExtractor, InvocableMap.EntryAggregator, Filter)
* createInstance(extractor, aggregator, filter)},
* {@link #createInstance(String, InvocableMap.EntryAggregator)
* createInstance(sMethod, aggregator)}
* {@link #createInstance(String, InvocableMap.EntryAggregator, Filter)
* createInstance(sMethod, aggregator, filter)}
*
* @param the type of the Map entry keys
* @param the type of the Map entry values
* @param the type of the value to extract from
* @param the type of the extracted value
* @param the type of the group aggregator result
*
* @author gg 2006.02.15
* @author as 2014.11.09
*
* @since Coherence 3.2
*/
@SuppressWarnings("unchecked")
public class GroupAggregator
extends ExternalizableHelper
implements InvocableMap.StreamingAggregator, Map>,
ExternalizableLite, PortableObject
{
// ----- constructors ---------------------------------------------------
/**
* Default constructor (necessary for the ExternalizableLite interface).
*/
public GroupAggregator()
{
}
/**
* Construct a GroupAggregator based on a specified ValueExtractor and
* underlying EntryAggregator.
*
* @param extractor a ValueExtractor object that is used to split
* InvocableMap entries into non-intersecting subsets;
* may not be null
* @param aggregator an EntryAggregator object; may not be null
* @param filter an optional Filter object used to filter out
* results of individual group aggregation results
*/
protected GroupAggregator(ValueExtractor super T, ? extends E> extractor,
InvocableMap.EntryAggregator super K, ? super V, R> aggregator,
Filter filter)
{
azzert(extractor != null && aggregator != null);
m_extractor = extractor;
m_aggregator = aggregator;
m_filter = filter;
}
// ----- StreamingAggregator interface ----------------------------------
@Override
public InvocableMap.StreamingAggregator, Map> supply()
{
return new GroupAggregator<>(m_extractor, m_aggregator, m_filter);
}
@Override
public boolean accumulate(InvocableMap.Entry extends K, ? extends V> entry)
{
ensureInitialized();
if (entry.isPresent())
{
E groupKey = entry.extract(m_extractor);
// add the entry to the corresponding group
if (isDelegateStreaming())
{
InvocableMap.StreamingAggregator super K, ? super V, Object, R> aggregator =
(InvocableMap.StreamingAggregator super K, ? super V, Object, R>)
m_mapResults.computeIfAbsent(groupKey, k -> streaming(m_aggregator).supply());
aggregator.accumulate(entry);
}
else
{
Set> setEntries =
(Set>)
m_mapResults.computeIfAbsent(groupKey, k -> new HashSet<>());
setEntries.add(entry);
}
}
return true;
}
@Override
public boolean combine(Map partialResult)
{
ensureInitialized();
for (Map.Entry part : partialResult.entrySet())
{
E groupKey = part.getKey();
if (isDelegateStreaming())
{
InvocableMap.StreamingAggregator super K, ? super V, Object, R> aggregator =
(InvocableMap.StreamingAggregator super K, ? super V, Object, R>)
m_mapResults.computeIfAbsent(groupKey, k -> streaming(m_aggregator).supply());
aggregator.combine(part.getValue());
}
else if (isDelegateParallel())
{
List listResults = (List) m_mapResults.computeIfAbsent(groupKey, k -> new ArrayList<>());
listResults.add(part.getValue());
}
else
{
Set> setEntries =
(Set>)
m_mapResults.computeIfAbsent(groupKey, k -> new HashSet<>());
setEntries.addAll((Collection) part.getValue());
}
}
return true;
}
@Override
public Map getPartialResult()
{
ensureInitialized();
boolean fStreaming = isDelegateStreaming();
if (!fStreaming && !isDelegateParallel())
{
return m_mapResults;
}
Map mapResults = new LiteMap<>();
for (Map.Entry entry : m_mapResults.entrySet())
{
Object oResult;
if (fStreaming)
{
oResult = ((InvocableMap.StreamingAggregator) entry.getValue()).getPartialResult();
}
else // must be parallel
{
oResult = parallel(m_aggregator).getParallelAggregator()
.aggregate((Set>) entry.getValue());
}
mapResults.put(entry.getKey(), oResult);
}
return mapResults;
}
@Override
public Map finalizeResult()
{
ensureInitialized();
boolean fStreaming = isDelegateStreaming();
boolean fParallelAware = isDelegateParallel();
Filter filter = m_filter;
Map mapResults = new LiteMap<>();
for (Map.Entry entry : m_mapResults.entrySet())
{
R result =
fStreaming ? ((InvocableMap.StreamingAggregator super K, ? super V, Object, R>) entry.getValue()).finalizeResult() :
fParallelAware ? parallel(m_aggregator).aggregateResults((Collection