All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.solutions.PipelinedAggregationOp Maven / Gradle / Ivy

package com.bigdata.bop.solutions;

import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.Constant;
import com.bigdata.bop.ContextBindingSet;
import com.bigdata.bop.HashMapAnnotations;
import com.bigdata.bop.IBind;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.ISingleThreadedOp;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.aggregate.IAggregate;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.rdf.error.SparqlTypeErrorException;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.util.InnerCause;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * A pipelined aggregation operator based on an in memory hash table associating
 * with per-group state for each aggregate expression (it can also handle the
 * degenerate case where all solutions form a single implicit group). This
 * operator is highly efficient, but may only be used if (a) DISTINCT is NOT
 * specified for any aggregate and (b) aggregates do not embed other aggregates.
 * 

* Note: This implementation is a pipelined operator which inspects each chunk * of solutions as they arrive. The state is shared across invocations of the * operator for each source chunk. The operator waits until the last chunk has * been consumed before writing the output solutions. In order to observe the * lastInvocation signal, the operator MUST be single threaded ( * {@link PipelineOp.Annotations#MAX_PARALLEL}:=1) and running on the query * controller. *

* Note: Since this operator evaluates {@link IAggregate}s incrementally (one * input solution at a time), it relies on {@link IAggregate}'s contract for * "sticky" errors. See {@link IAggregate#get(IBindingSet)} and * {@link IAggregate#done()}. *

* Note: This this operator will be invoked multiple times, and potentially on * multiple nodes in a cluster, it is critical that the anonymous variables * assigned by the {@link GroupByRewriter} are stable across all invocations on * any node of the cluster (this caution also applies for a single node where * the operator can still be invoked multiple times). * * @author Bryan Thompson */ public class PipelinedAggregationOp extends GroupByOp implements ISingleThreadedOp { private final static transient Logger log = Logger .getLogger(PipelinedAggregationOp.class); /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends PipelineOp.Annotations, HashMapAnnotations, GroupByOp.Annotations { } /** * {@inheritDoc} *

* Returns true. This is a pipelined aggregation operator and * MAY NOT be used to evaluate aggregation requests which use DISTINCT or * which nest {@link IAggregate}s in other {@link IAggregate}s. */ @Override public boolean isPipelinedAggregationOp() { return true; } /** * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}. */ public PipelinedAggregationOp(final PipelinedAggregationOp op) { super(op); } /** * Required shallow copy constructor. */ public PipelinedAggregationOp(final BOp[] args, final Map annotations) { super(args, annotations); switch (getEvaluationContext()) { case CONTROLLER: break; default: throw new UnsupportedOperationException( Annotations.EVALUATION_CONTEXT + "=" + getEvaluationContext()); } getRequiredProperty(Annotations.GROUP_BY_STATE); getRequiredProperty(Annotations.GROUP_BY_REWRITE); if (!isSharedState()) { /* * Note: shared state is used to share the hash table across * invocations. */ throw new UnsupportedOperationException(Annotations.SHARED_STATE + "=" + isSharedState()); } if (!isLastPassRequested()) { /* * Note: A final evaluation pass is required to write out the * aggregates. */ throw new UnsupportedOperationException(Annotations.LAST_PASS + "=" + isLastPassRequested()); } /* * Note: The operator MUST be single threaded in order to receive the * isLastInvocation notice. */ assertMaxParallelOne(); } /** * @see Annotations#INITIAL_CAPACITY */ public int getInitialCapacity() { return getProperty(Annotations.INITIAL_CAPACITY, Annotations.DEFAULT_INITIAL_CAPACITY); } /** * @see Annotations#LOAD_FACTOR */ public float getLoadFactor() { return getProperty(Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR); } @Override public BOpStats newStats() { return new AggregateStats(this); } @Override public FutureTask eval(final BOpContext context) { return new FutureTask(new ChunkTask(this, context)); } /** * Wrapper used for the solution groups. */ private static class SolutionGroup { /** The hash code for {@link #vals}. */ private final int hash; /** * The computed values for the groupBy value expressions in the order in * which they were declared. */ private final IConstant[] vals; @Override public String toString() { return super.toString() + // "{group=" + Arrays.toString(vals) + // "}"; } /** * Return a new {@link SolutionGroup} given the value expressions and * the binding set. * * @param groupBy * The value expressions to be computed. * The binding set. * * @return The new {@link SolutionGroup} -or- null if any * of the value expressions evaluates to a null * -OR- throws a {@link SparqlTypeErrorException}. */ static SolutionGroup newInstance(final IValueExpression[] groupBy, final IBindingSet bset, final BOpStats stats) { final IConstant[] r = new IConstant[groupBy.length]; for (int i = 0; i < groupBy.length; i++) { final IValueExpression expr = groupBy[i]; final Object asBound; try { /* * Note: This has a side-effect on the solution and causes * the evaluated GROUP_BY value expressions to become bound * on the solution. This is necessary in order for us to * compute the aggregates incrementally. */ asBound = expr.get(bset); } catch (SparqlTypeErrorException ex) { TypeErrorLog.handleTypeError(ex, expr, stats); // Drop solution. return null; } if (asBound == null) { // Drop solution. return null; } @SuppressWarnings({ "rawtypes", "unchecked" }) final IConstant x = new Constant(asBound); r[i] = x; } return new SolutionGroup(r); } private SolutionGroup(final IConstant[] vals) { this.vals = vals; this.hash = java.util.Arrays.hashCode(vals); } @Override public int hashCode() { return hash; } @Override public boolean equals(final Object o) { if (this == o) return true; if (!(o instanceof SolutionGroup)) { return false; } final SolutionGroup t = (SolutionGroup) o; if (vals.length != t.vals.length) return false; for (int i = 0; i < vals.length; i++) { if (vals[i] == t.vals[i]) continue; if (vals[i] == null) return false; if (!vals[i].equals(t.vals[i])) return false; } return true; } } // SolutionGroup /** * State associated with each {@link SolutionGroup} (this is not used if all * solutions belong to a single implicit group). */ private static class SolutionGroupState { /** * The aggregate expressions to be evaluated. The {@link IAggregate}s * MUST have been cloned to avoid side-effect across groups. */ private final LinkedHashMap, IVariable> aggExpr; /** * The intermediate solution with all bindings produced when evaluating * this solution group. Any bare variables and any variables declared * by the GROUP_BY clause are projected onto {@link #aggregates} by * the constructor. */ private final IBindingSet aggregates; /** * * @param groupBy * The (rewritten) GROUP_BY clause. * @param aggExpr * The aggregates to be computed for each group. The * {@link IAggregate}s will be *cloned* in order to avoid * side-effects across groups. * @param bset * The first input solution encountered for the group (the * one which led to the group becoming defined). */ SolutionGroupState(final BOpContext context, final IValueExpression[] groupBy, final LinkedHashMap, IVariable> aggExpr, final IBindingSet bset) { this.aggExpr = new LinkedHashMap, IVariable>(); for (Map.Entry, IVariable> e : aggExpr.entrySet()) { // Note: IAggregates MUST be cloned to avoid side-effects. this.aggExpr.put((IAggregate) e.getKey().clone(), e.getValue()); } /** * Propagate GROUP_BY expression onto [aggregates]. */ this.aggregates = new ContextBindingSet(context, new ListBindingSet()); final IBindingSet aSolution = bset; for (IValueExpression expr : groupBy) { if (expr instanceof IVariable) { /** * Propagate bare variable used in GROUP_BY clause to * [aggregates]. * *

                     * GROUP BY ?x
                     * 
*/ final IVariable var = (IVariable) expr; // Note: MUST be a binding for each groupBy var. @SuppressWarnings({ "rawtypes", "unchecked" }) final Constant val = new Constant(var.get(aSolution)); // Bind on [aggregates]. aggregates.set(var, val); } else if (expr instanceof IBind) { /** * Propagate BIND declared by GROUP_BY clause to * [aggregates]. * *
                     * GROUP BY (2*?y as ?x)
                     * 
*/ final IBind bindExpr = (IBind) expr; // Compute value expression. // Note: MUST be valid since group exists. @SuppressWarnings({ "rawtypes", "unchecked" }) final Constant val = new Constant( bindExpr.get(aSolution)); // Variable to be projected out by SELECT. final IVariable ovar = ((IBind) expr).getVar(); // Bind on [aggregates]. aggregates.set(ovar, val); } } // next GROUP_BY value expression } } // class SolutionGroupState /** * Extends {@link BOpStats} to provide the shared state for the aggregation * operator across invocations for different source chunks. *

* Note: mutable fields on instances of this class are guarded by the * monitor for the instance. */ private static class AggregateStats extends BOpStats { /** * */ private static final long serialVersionUID = 1L; /** * true until we initialize the shared start during the * first invocation of the {@link ChunkTask}. */ private boolean first = true; private final IGroupByState groupByState; private final IGroupByRewriteState rewrite; public AggregateStats(final PipelinedAggregationOp op) { this.groupByState = (IGroupByState) op .getRequiredProperty(Annotations.GROUP_BY_STATE); this.rewrite = (IGroupByRewriteState) op .getRequiredProperty(Annotations.GROUP_BY_REWRITE); if (groupByState.isAnyDistinct()) { // Pipelined aggregation does not support DISTINCT. throw new UnsupportedOperationException( "DISTINCT not allowed with pipelined aggregation."); } if (groupByState.isNestedAggregates()) { /* * Pipelined aggregation does not support aggregates which embed * other aggregates. */ throw new UnsupportedOperationException( "Nested aggregates not allowed with pipelined aggregation."); } } } /** * Shared execution state for the {@link PipelinedAggregationOp}. */ private static class SharedState { /** * A map whose keys are the bindings on the specified variables and * whose values are the per-group state. *

* Note: The map is shared state and can not be discarded or cleared * until the last invocation!!! *

* Note: This is only iff an explicit GROUP_BY clause is used. */ private final LinkedHashMap map; /** * The aggregates to be computed (they have internal state). *

* Note: The map is shared state and can not be discarded or cleared * until the last invocation!!! *

* Note: This is bound iff all solutions will be collected within a * single implicit group. */ private final LinkedHashMap, IVariable> aggExpr; SharedState(final PipelinedAggregationOp op, final AggregateStats stats) { if (stats.groupByState.getGroupByClause() == null) { map = null; aggExpr = stats.rewrite.getAggExpr(); } else { /* * The map is only defined if a GROUP_BY clause was used. */ map = new LinkedHashMap( op.getInitialCapacity(), op.getLoadFactor()); aggExpr = null; } } } /** * Task executing on the node. */ static private class ChunkTask implements Callable { private final BOpContext context; /** * A map whose keys are the bindings on the specified variables and * whose values are the per-group state. *

* Note: The map is shared state and can not be discarded or cleared * until the last invocation!!! *

* Note: This is only iff an explicit GROUP_BY clause is used. */ private final LinkedHashMap map; /** * The aggregates to be computed (they have internal state). *

* Note: The map is shared state and can not be discarded or cleared * until the last invocation!!! *

* Note: This is bound iff all solutions will be collected within a * single implicit group. */ private final LinkedHashMap, IVariable> aggExpr; private final IGroupByState groupByState; private final IGroupByRewriteState rewrite; private final IValueExpression[] groupBy; private final Object sharedStateKey; private final BOpStats stats; ChunkTask(final PipelinedAggregationOp op, final BOpContext context) { this.context = context; this.sharedStateKey = op.getId(); final AggregateStats stats = (AggregateStats) context.getStats(); this.stats = stats; final SharedState sharedState; synchronized (stats) { if (stats.first) { /* * Setup the shared state. */ stats.first = false; sharedState = new SharedState(op, stats); context.getRunningQuery().getAttributes() .put(sharedStateKey, sharedState); } else { sharedState = (SharedState) context.getRunningQuery() .getAttributes().get(sharedStateKey); } } // synchronized(stats) /* * Initialize from the shared state. */ this.map = sharedState.map; this.aggExpr = sharedState.aggExpr; this.groupByState = stats.groupByState; this.rewrite = stats.rewrite; this.groupBy = stats.groupByState.getGroupByClause(); } /** * Discard the shared state (this can not be discarded until the last * invocation). */ private void release() { context.getRunningQuery().getAttributes().remove(sharedStateKey); } /** * Update the state of the {@link IAggregate}s for the appropriate * group. * * @param bset * The solution. */ private void accept(final IBindingSet bset) { if (groupBy == null || groupBy.length == 0) throw new IllegalArgumentException(); if (bset == null) throw new IllegalArgumentException(); final SolutionGroup s = SolutionGroup.newInstance(groupBy, bset, stats); if (s == null) { // Drop the solution. if (log.isDebugEnabled()) log.debug("Dropping solution: " + bset); return; } SolutionGroupState m = map.get(s); if (m == null) { map.put(s, m = new SolutionGroupState(context, groupBy, rewrite .getAggExpr(), bset)); } // Accept the solution. if (log.isTraceEnabled()) log.trace("Accepting solution: " + bset); // Update the aggregates. doAggregate(m.aggExpr, bset, stats); } @Override public Void call() throws Exception { final ICloseableIterator itr = context .getSource(); final IBlockingBuffer sink = context.getSink(); try { while (itr.hasNext()) { final IBindingSet[] a = itr.next(); stats.chunksIn.increment(); stats.unitsIn.add(a.length); for (IBindingSet bset : a) { if (groupBy == null) { /* * A single implicit group. */ doAggregate(aggExpr, bset, stats); } else { /* * Explicit GROUP_BY. */ accept(bset); } } } if(context.isLastInvocation()) { // The solutions to be written onto the sink. final List outList = new LinkedList(); if(groupBy == null) { /* * A single implicit group. * * Output solution for the implicit group IFF the HAVING * constraints are satisfied. */ /** * The intermediate solution with all bindings produced * when evaluating this solution group. * * Note: There is no GROUP_BY so we do not need to * propagate any bindings declared by that clause. * * This evaluates the (rewritten) SELECT expressions. * The rewritten HAVING clause (if any) is then * evaluated. If the solution is not dropped, then only * the SELECTed variables are projected out. */ final IBindingSet aggregates = new ContextBindingSet(context, new ListBindingSet()); // Finalize and bind on [aggregates]. finalizeAggregates(aggExpr, aggregates, stats); // Evaluate SELECT expressions. for (IValueExpression expr : rewrite.getSelect2()) { try { expr.get(aggregates); } catch (SparqlTypeErrorException ex) { TypeErrorLog.handleTypeError(ex, expr, stats); continue; } catch (IllegalArgumentException ex) { /* * Note: This a hack turns an * IllegalArgumentException which we presume is * coming out of new Constant(null) into an * (implicit) SPARQL type error so we can drop * the binding for this SELECT expression. (Note * that we are not trying to drop the entire * group!) */ TypeErrorLog.handleTypeError(ex, expr, stats); continue; } } // Verify optional HAVING constraint(s) final boolean drop; final IConstraint[] having2 = rewrite.getHaving2(); if (having2 != null && !BOpUtility .isConsistent(having2, aggregates)) { // drop this solution. drop = true; } else { drop = false; } if (log.isInfoEnabled()) log.info((drop ? "drop" : "keep") + " : " + aggregates); if (!drop) { // project out only selected variables. final IBindingSet out = aggregates .copy(groupByState.getSelectVars().toArray( new IVariable[0])); outList.add(out); } } else { /* * Explicit GROUP_BY. * * Output solutions for the observed groups which pass * the optional HAVING constraint(s). */ for (SolutionGroupState groupState : map.values()) { final IBindingSet aggregates = groupState.aggregates; // Finalize and bind on [aggregates]. finalizeAggregates(groupState.aggExpr, aggregates, stats); // Evaluate SELECT expressions. for (IValueExpression expr : rewrite .getSelect2()) { try { expr.get(aggregates); } catch (SparqlTypeErrorException ex) { TypeErrorLog.handleTypeError(ex, expr, stats); continue; } catch (IllegalArgumentException ex) { /* * Note: This hack turns an * IllegalArgumentException which we presume * is coming out of new Constant(null) into * an (implicit) SPARQL type error so we can * drop the binding for this SELECT * expression. (Note that we are not trying * to drop the entire group!) */ TypeErrorLog.handleTypeError(ex, expr, stats); continue; } } // Verify optional HAVING constraint(s) final boolean drop; final IConstraint[] having2 = rewrite.getHaving2(); if (having2 != null && !BOpUtility.isConsistent(having2, aggregates)) { // drop this solution. drop = true; } else { drop = false; } if (log.isInfoEnabled()) log.info((drop ? "drop" : "keep") + " : " + aggregates); if (!drop) { // project out only selected variables. final IBindingSet out = aggregates .copy(groupByState.getSelectVars() .toArray(new IVariable[0])); outList.add(out); } } } if (!outList.isEmpty()) { // Write the solutions onto the sink. sink.add(outList.toArray(new IBindingSet[0])); sink.flush(); } // Discard the shared state. release(); } // done. return null; } finally { sink.close(); } } } /** * Update the {@link IAggregate}s for the given binding set. *

* Note: The {@link IAggregate} instances MUST be distinct within each group * to avoid side-effects across groups. * * @param aggExpr * The aggregate expressions to be evaluated. * @param bset * The binding set. * @param stats * Used to report type errors. */ static private void doAggregate( final LinkedHashMap, IVariable> aggExpr, final IBindingSet bset, final BOpStats stats) { for (IAggregate a : aggExpr.keySet()) { try { a.get(bset); } catch (Throwable t) { if (InnerCause.isInnerCause(t, SparqlTypeErrorException.class)) { /* * Trap the type error. The group will be reported, but this * aggregate will not bind a value for that group (the * aggregate will track its error state internally.) */ TypeErrorLog.handleTypeError(t, a, stats); // if (log.isInfoEnabled()) // log.info("type error: expr=" + a + " : " + t); } } } } /** * Finalize the {@link IAggregate}s for a solution group (or for the * implicit group formed from all solutions when no GROUP_BY was given). * This invokes {@link IAggregate#done()} on each {@link IAggregate} in turn * and binds any non-null results onto aggregates. * * @param aggExpr * The aggregate expressions to be evaluated. * @param aggregates * The binding set where the aggregates will become bound. */ static private void finalizeAggregates( final LinkedHashMap, IVariable> aggExpr, final IBindingSet aggregates, final BOpStats stats) { for (Map.Entry, IVariable> e : aggExpr.entrySet()) { final IAggregate expr = e.getKey(); final Object val; try { val = expr.done(); } catch (Throwable t) { if (InnerCause.isInnerCause(t, SparqlTypeErrorException.class)) { // trap the type error and filter out the solution TypeErrorLog.handleTypeError(t, expr, stats); // if (log.isInfoEnabled()) // log.info("aggregate will not bind due type error: expr=" // + expr + " : " + t); // No binding. continue; } else { throw new RuntimeException(t); } } if (val != null) { // bind the result. aggregates.set(e.getValue(), new Constant(val)); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy