![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.bop.solutions.GroupByRewriter Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jul 29, 2011
*/
package com.bigdata.bop.solutions;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpBase;
import com.bigdata.bop.Bind;
import com.bigdata.bop.IBind;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IVariableFactory;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.bop.Var;
import com.bigdata.bop.aggregate.IAggregate;
/**
* Utility class simplifies an aggregation operator through a rewrite.
*
* @author Bryan Thompson
* @version $Id$
*
* TODO Modify to rewrite AVERAGE as SUM()/COUNT() in preparation for a
* more distributed / parallel evaluation when the aggregate does not
* use DISTINCT and when there are no dependencies among the aggregate
* expressions (e.g., SUM(x)+1 is fine, as is SUM(x)+SUM(y), but
* SUM(x+AVG(x)) introduces a dependency which prevents us from
* optimizing the aggregation using per-group incremental pipelined
* evaluation).
*/
public class GroupByRewriter implements IGroupByRewriteState, IVariableFactory,
Serializable {
/**
* Note: This class must be serializable so we may distribute it with the
* parallel decomposition of an aggregation operator on a cluster, otherwise
* each time an operator computes the rewrite it will assign new variables
* and we will be unable to combine the decomposed aggregation results on
* the query controller.
*/
private static final long serialVersionUID = 1L;
private final LinkedHashMap,IVariable>> aggExpr;
// private final LinkedHashMap,ProjectionType> columnProjections;
private final IValueExpression>[] select2;
private final IConstraint[] having2;
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append("{aggExpr=" + aggExpr);
sb.append(",select2=" + Arrays.toString(select2));
sb.append(",having2=" + Arrays.toString(having2));
sb.append("}");
return sb.toString();
}
/**
* The set of all unique {@link IAggregate} expressions with {@link Bind}s
* onto anonymous variables. Any internal {@link IAggregate} have been
* lifted out and will appear before any {@link IAggregate}s which use them.
*/
public LinkedHashMap,IVariable>> getAggExpr() {
return aggExpr;
}
/**
* A modified version of the original SELECT expression which has the same
* semantics. However, the modified select expressions DO NOT contain any
* {@link IAggregate} functions. All {@link IAggregate} functions have been
* lifted out into {@link #aggExpr}.
*/
@Override
public IValueExpression>[] getSelect2() {
return select2;
}
/**
* A modified version of the original HAVING expression which has the same
* semantics (and null
iff the original was null
* or empty). However, the modified select expressions DO NOT contain any
* {@link IAggregate} functions. All {@link IAggregate} functions have been
* lifted out into {@link #aggExpr}.
*/
@Override
public IConstraint[] getHaving2() {
return having2;
}
// /**
// * Metadata flags reporting whether the column projection of a value
// * expression will include all values or only the distinct values. If both
// * flags are set, then both the projection of all values and the projection
// * of all distinct values are required.
// */
// public static class ProjectionType {
//
// /**
// * The column projection of the all observed values is required.
// */
// static final int AllValues = 1 << 0;
//
// /**
// * The column projection of the observed distinct values is required.
// */
// static final int DistinctValues = 1 << 1;
//
// private int state = 0;
//
// public ProjectionType() {
//
// }
//
// public boolean isAllValues() {
// return (state & AllValues) != 0;
// }
//
// public boolean isDistinctValues() {
// return (state & DistinctValues) != 0;
// }
//
// public void setAllValues() {
// state |= AllValues;
// }
//
// public void setDistinctValues() {
// state |= DistinctValues;
// }
//
// public Object getColumnProjection() {
// return proj;
// }
//
// public void setColumnProjection(Object proj) {
// this.proj = proj;
// }
//
// private Object proj;
//
// }
//
// /**
// * The distinct {@link IValueExpression}s whose column projections are used
// * by the {@link #getAggExpr() aggregate expressions}. The variables appear
// * in the order in which they are first used by the aggregate expressions.
// * For example, given
// *
// *
// * SELECT SUM(x), SUM(y), SUM(x+y), AVG(x+y), SUM(DISTINCT z)
// *
// *
// * this would return [x,y,x+y]
.
// *
// * Note that the value expression x+y
appears more than once as
// * the inner value expression of different aggregation functions, but it is
// * only reported once.
// *
// * TODO This is unused and untested.
// */
// public LinkedHashMap, ProjectionType> getColumnProjections() {
// return columnProjections;
// }
/**
* Special construct creates a distinct instance of each {@link IAggregate}
* in order to avoid side-effects in the internal state of the
* {@link IAggregate} functions when evaluated in different contexts (e.g.,
* a pipelined aggregation subquery).
*
* @param rewrittenState
*/
public GroupByRewriter(final IGroupByRewriteState rewrittenState) {
this.select2 = rewrittenState.getSelect2();
this.having2 = rewrittenState.getHaving2();
final LinkedHashMap, IVariable>> aggExpr = rewrittenState.getAggExpr();
this.aggExpr = new LinkedHashMap, IVariable>>();
for (Map.Entry, IVariable>> e : aggExpr.entrySet()) {
// Note: *clone* the IAggregate function!
this.aggExpr.put((IAggregate>) e.getKey().clone(), e.getValue());
}
}
public GroupByRewriter(final IGroupByState groupByState) {
if (groupByState == null)
throw new IllegalArgumentException();
this.aggExpr = new LinkedHashMap, IVariable>>();
// this.columnProjections = new LinkedHashMap, ProjectionType>();
final IValueExpression>[] select = groupByState.getSelectClause();
this.select2 = new IValueExpression[select.length];
/*
* Rewrite having clause.
*
* Note: The HAVING clause (if one exists) is rewritten first so the
* left-to-right dependency will evaluate the aggregates for the HAVING
* clause before the aggregates for the SELECT clause. This makes it
* easier to decide whether or not a GROUP will pass its HAVING filter
* before we project the solution for that group.
*/
final IConstraint[] having = groupByState.getHavingClause();
if (having == null || having.length == 0) {
this.having2 = null;
} else {
/*
* Rewrite the HAVING clause.
*/
having2 = new IConstraint[having.length];
for (int i = 0; i < having.length; i++) {
final IConstraint e = having[i];
having2[i] = (IConstraint) rewrite(e, this, aggExpr);
}
}
/*
* Rewrite SELECT clause.
*/
for (int i = 0; i < select.length; i++) {
final IValueExpression> e = select[i];
select2[i] = rewrite(e, this, aggExpr);
}
// /*
// * Collect the distinct value expressions (the inner expression for the
// * aggregates) and add some metadata about the types of column projects
// * which we require for each such value expression.
// */
// for (IAggregate> e : aggExpr.keySet()) {
// final IValueExpression> valueExpr = e.getExpr();
// ProjectionType ptype = columnProjections.get(valueExpr);
// if (ptype == null) {
// ptype = new ProjectionType();
// columnProjections.put(valueExpr, ptype);
// }
// if (e.isDistinct())
// ptype.setDistinctValues();
// else
// ptype.setAllValues();
// }
}
/**
* Rewrite an {@link IConstraint}.
*
* Note: Rewriting a constraint require us to effectively clone the original
* constraint. I've hacked this in two ways. First, I assume that the inner
* value expression is e.get(0)
. Second, I wrap the rewritten
* value expression with {@link #newConstraint(IValueExpression)}. That
* factory is responsible for the type of the returned {@link IConstraint}.
*/
static public IConstraint rewrite(final IConstraint e,
final IVariableFactory f,
final LinkedHashMap, IVariable>> aggExpr) {
// tunnel through : assumes arg0 is the inner value expression!
final IValueExpression> oldInnerExpr = (IValueExpression>) e.get(0);
// rewrite the inner value expression.
final IValueExpression> newInnerExpr = rewrite(oldInnerExpr, f,
aggExpr);
if (newInnerExpr == oldInnerExpr) {
// Not rewritten.
return e;
}
return (IConstraint) ((BOpBase) e).setArg(0, newInnerExpr);
}
/**
* Rewrite an {@link IValueExpression} from a SELECT or HAVING clause. If a
* rewrite is performed, then the modified expression is returned. Otherwise
* the original expression is returned. If an aggregation expression is
* lifted out by the rewrite, then it is added to aggExpr.
*
* @param e
* The {@link IValueExpression}.
* @param f
* A factory for anonymous variables.
* @param aggExpr
* A map associating each unique {@link IAggregate} expression
* lifted out of the an {@link IValueExpression} with an
* anonymous variable on which the value of the
* {@link IAggregate} lifted out the of {@link IValueExpression}
* will be bound when that {@link IAggregate} is evaluated.
*
* @return The (possibly rewritten) {@link IValueExpression}.
*/
public static IValueExpression> rewrite(final IValueExpression> e,
final IVariableFactory f,
final LinkedHashMap, IVariable>> aggExpr) {
/*
* Bind should only be observed at the top-level.
*/
if (e instanceof IBind>) {
final IValueExpression> expr = ((IBind>) e).getExpr();
if (expr instanceof IVariableOrConstant>) {
return e;
}
}
/*
* Re-write the expression (anything but a top-level bind).
*/
return rewrite2(e, f, aggExpr);
}
/**
* Depth first recursion replaces any {@link IAggregate}s. Depth first
* recursion is used to lift any embedded {@link IAggregate}s out first.
*
* @param expr
* The value expression.
* @param f
* The factory for anonymous variables.
* @param aggExpr
* The map of distinct {@link IAggregate} expressions and the
* anonymous variables which they will be bound as when they are
* evaluated.
*
* @return The (possibly rewritten) expression.
*/
static private IValueExpression> rewrite2(IValueExpression> expr,
final IVariableFactory f,
final LinkedHashMap, IVariable>> aggExpr) {
if (expr instanceof IVariableOrConstant> || expr.arity() == 0) {
// Nothing to do.
return expr;
}
/*
* Depth first recursion for each child argument.
*/
int index = 0;
final Iterator itr = expr.argIterator();
while (itr.hasNext()) {
final IValueExpression> c = (IValueExpression>) itr.next();
// rewrite child.
final IValueExpression> newC = rewrite(c, f, aggExpr);
if (newC != c) {
// copy-on-write for parent iff child was rewritten.
expr = (IValueExpression>) ((BOpBase) expr).setArg(index,
newC);
}
index++;
}
/*
* Examine this node and replace it with a reference to an anonymous
* variable if it is an IAggregate expression.
*/
if (expr instanceof IAggregate>) {
final IAggregate> t = (IAggregate>) expr;
IVariable> anonVar = aggExpr.get(t);
if (anonVar == null) {
/*
* This is the first time we have encountered this aggregate
* expression.
*/
// new anonymous variable.
anonVar = f.var();
// add to the map.
aggExpr.put(t, anonVar);
}
// return the anonymous variable for the aggregate expression.
return anonVar;
}
return expr;
}
/**
* Return a new anonymous variable (this is overridden by some unit tests in
* order to have predictable variable names).
*/
@Override
public IVariable> var() {
return Var.var();
}
}