All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.solutions.GroupByState Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Jul 27, 2011
 */

package com.bigdata.bop.solutions;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBind;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.IValueExpressionConstraint;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.aggregate.IAggregate;

/**
 * An object which encapsulates the validation and state of an aggregation
 * operation with an optional GROUP BY clause, SELECT expressions, and an
 * optional HAVING clause. The SELECT expressions MUST be aggregates (if the
 * SELECT expressions do not involve aggregates then you should not be using an
 * aggregation operator to compute the select expressions).
 * 

* Note: As part of decoupling the SPARQL parser from the database in BLZG-1176, * a copy of this logic is now maintained in * {@link com.bigdata.rdf.sail.sparql.VerifyAggregates}. * * @see https://jira.blazegraph.com/browse/BLZG-1176 * @author Bryan Thompson */ public class GroupByState implements IGroupByState, Serializable { private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(GroupByState.class); private final IValueExpression[] select; private final IValueExpression[] groupBy; private final IConstraint[] having; private final LinkedHashSet> groupByVars = new LinkedHashSet>(); private final LinkedHashSet> selectVars = new LinkedHashSet>(); private final LinkedHashSet> columnVars = new LinkedHashSet>(); final private boolean anyDistinct; final private boolean selectDependency; final private boolean nestedAggregates; final private boolean simpleHaving; @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(getClass().getSimpleName()); sb.append("{select=" + Arrays.toString(select)); sb.append(",groupBy=" + Arrays.toString(groupBy)); sb.append(",having=" + Arrays.toString(having)); sb.append("}"); return sb.toString(); } @Override public IValueExpression[] getGroupByClause() { return groupBy; } @Override public LinkedHashSet> getGroupByVars() { return groupByVars; } @Override public IValueExpression[] getSelectClause() { return select; } @Override public LinkedHashSet> getSelectVars() { return selectVars; } @Override public IConstraint[] getHavingClause() { return having; } @Override public LinkedHashSet> getColumnVars() { return columnVars; } // public LinkedHashSet> getDistinctColumnVars() { // return distinctColumnVars; // } @Override public boolean isAnyDistinct() { return anyDistinct; } @Override public boolean isSelectDependency() { return selectDependency; } @Override public boolean isNestedAggregates() { return nestedAggregates; } @Override public boolean isSimpleHaving() { return simpleHaving; } public GroupByState(final IValueExpression[] select, final IValueExpression[] groupBy, final IConstraint[] having) { // normalize an empty[] to a null. this.groupBy = groupBy != null && groupBy.length == 0 ? null : groupBy; // must be non-null, non-empty array. this.select = select; if (select == null) throw new IllegalArgumentException(); if (select.length == 0) throw new IllegalArgumentException(); // normalize an empty[] to a null. this.having = having != null && having.length == 0 ? null : having; // true iff any aggregate expression uses DISTINCT. final AtomicBoolean anyDistinct = new AtomicBoolean(false); // true iff any aggregate expression nests another aggregate expression. final AtomicBoolean nestedAggregates = new AtomicBoolean(false); /* * Validate GROUP_BY value expressions. * * Note: The GROUP BY clause may include bare variables such as "?x", * non-aggregate expressions such as "STR(?x)" and declarations of * variables for non-aggregate expressions such as "STR(?x) as strX". * However, only bare variables or variables declared using "AS" may * appear in the SELECT clause. Those variables are collected in * [groupByVars]. * * Note: Aggregate functions MAY NOT appear in the GROUP_BY clause. */ if (groupBy != null) { // Collect top-level variables from GROUP_BY value expressions. for (IValueExpression expr : groupBy) { if (expr instanceof IVariable) { groupByVars.add((IVariable) expr); } else if (expr instanceof IBind) { final IBind bindExpr = (IBind) expr; final IValueExpression e = bindExpr.getExpr(); if (isAggregate(e, false/* isSelectClause */, null/* isSelectDependency */, nestedAggregates, anyDistinct)) { throw new IllegalArgumentException( "Aggregate expression not allowed in GROUP_BY: " + expr); } groupByVars.add(bindExpr.getVar()); } } } /* * Validate SELECT value expressions. * * Note: SELECT value expressions must be either variables appearing in * the top-level of the GROUP BY value expressions -or- a IBind wrapping * an aggregate function. * * Note: Certain optimizations are possible when none of the SELECT * value expressions use DISTINCT. * * Note: Certain optimizations are possible when all of the SELECT value * expressions may be computed based on per-group counters. */ { // true iff any aggregate expression uses a reference to another // aggregate expression in the select clause. final AtomicBoolean selectDependency = new AtomicBoolean(false); for (IValueExpression expr : select) { /* * Each SELECT value expression must be either a top-level * IVariable in the GROUP BY clause or an IBind wrapping a value * expression consisting solely of aggregates (which may of * course wrap bare variables) and constants. */ if (expr instanceof IVariable) { final IVariable var = (IVariable) expr; if (!groupByVars.contains(var)) { throw new IllegalArgumentException( "Bare variable not declared by GROUP_BY clause: " + var); } selectVars.add(var); } else if (expr instanceof IBind) { /* * Child of IBind must be a valid aggregate expression * consisting solely of aggregates (which may wrap bare * variables declared in the GROUP_BY clause) and constants. * * Note: Top-level variables already declared in a GROUP_BY * or SELECT clause MAY appear within other value * expressions in the SELECT clause. * * Note: If any aggregate in the expression uses DISTINCT * then we make a note of that as certain optimizations are * not possible when DISTINCT is used within an aggregate * expression (this is done by isAggregate()). */ final IBind bindExpr = (IBind) expr; final IValueExpression e = bindExpr.getExpr(); if (!isAggregate(e, true/* isSelectClause */, selectDependency, nestedAggregates, anyDistinct)) throw new IllegalArgumentException("Not an aggregate: " + bindExpr); selectVars.add(bindExpr.getVar()); } else { throw new IllegalArgumentException( "Top-level of SELECT expression must be IVariable or IBind: " + expr); } } this.selectDependency = selectDependency.get(); } /* * HAVING clause. * * The having[] may be null or an empty[]. However, any value * expressions used within the IConstraint[] must be aggregates (as * defined for SELECT expressions). */ /* * true iff none of the value expressions in the HAVING clause involve * IAggregate functions. */ boolean simpleHaving = true; if (having != null) { for (IConstraint c : having) { /* * The constraint must be an aggregate expression. * * Note: Top-level variables already declared in a GROUP_BY or * SELECT clause MAY appear within value expressions in the * HAVING clause. * * Note: If any aggregate in the expression uses DISTINCT then * we make a note of that as certain optimizations are not * possible when DISTINCT is used within an aggregate expression * (this is done by isAggregate()). */ if (!isAggregate(c, false/* isSelectClause */, null/* isSelectDependency */, nestedAggregates, anyDistinct)) throw new IllegalArgumentException("Not an aggregate: " + c); if (simpleHaving) { /* * Inspect the value expression for each constraint. * Typically the constraint will be a SPARQLConstraint, * which reports the EBV of a value expression. If that * value expression uses an IAggregate function then we set * [simpleHaving := false]. We are done as soon as we have * falsified the "simpleHaving" hypothesis. */ final IValueExpression expr = ((IValueExpressionConstraint) c) .getValueExpression(); final Iterator itr = BOpUtility.preOrderIterator(expr); while (itr.hasNext()) { final BOp t = itr.next(); if (t instanceof IAggregate) { simpleHaving = false; break; } } } } } this.simpleHaving = simpleHaving; // true iff any aggregate function nests another aggregate function // within it. this.nestedAggregates = nestedAggregates.get(); // true iff DISTINCT used w/in aggregate function in SELECT or HAVING. this.anyDistinct = anyDistinct.get(); } /** * Return true iff the expression is an aggregate. *

* Aggregates may be built out of constants, references to {@link IVariable} * s which are already defined and which are themselves aggregates, and * {@link IAggregate} functions. An {@link IVariable} will be an aggregate * if it appears as a bare variable in a GROUP_BY clause or if it declared * by a prior value expression in a GROUP_BY or SELECT clause. Testing * whether or not an {@link IValueExpression} is an aggregate therefore * depends on access to the set of known aggregates. The value expressions * in the GROUP_BY clause must be processed first (in order) followed by the * value expressions in the SELECT clause (in order). *

* An aggregate may use a non-aggregate variable only allowed within an * {@link IAggregate} function. For example, given: * SUM(?x) as ?y, ?x must be a non-aggregate * variable and ?y will be an aggregate variable. *

* Aggregate variables may be used both inside and outside of an * {@link IAggregate} function as long as the variable was declared before * it was used. For example, the following are legal: * *

     * SELECT SUM(?x) as ?y, SUM(?x + ?y) as ?z, SUM(?x)+AVG(?x) as ?z2
     * 
     * SELECT SUM(?x) as ?y, SUM(?x + COUNT(?y)) as ?z
     * 
* * Patterns where an aggregate depends on a prior aggregate prevent certain * optimizations, notably you have to evaluate each aggregate in turn rather * than evaluating them in parallel over the solutions is a group. If any * such patterns are observed in the SELECT clause then this method will set * isSelectDependency := true as a side-effect. * * @param op * An {@link IValueExpression} or {@link IConstraint}. * @param isSelectClause * true if the op appears a SELECT clause. * @param isSelectDependency * Set as a side-effect when an {@link IValueExpression} * appearing in a SELECT clause has a dependency on an * {@link IVariable} declared in the GROUP_BY clause or earlier * in the SELECT clause. This argument is optional unless * isSelectClause is true. * @param isNestedAggregates * Set as a side-effect when an {@link IValueExpression} * containing an {@link IAggregate} nests another * {@link IAggregate} within it. * @param isAnyDistinct * Set as a side-effect if an {@link IAggregate} function is * encountered which reports true for * {@link IAggregate#isDistinct()}. * * @return true iff the operator is an aggregate. */ protected boolean isAggregate(final BOp op, final boolean isSelectClause, final AtomicBoolean isSelectDependency, final AtomicBoolean isNestedAggregates, final AtomicBoolean isAnyDistinct) { if (op == null) throw new IllegalArgumentException(); if (op instanceof IConstant && isSelectClause) { /* * A constant appearing in the root of a SELECT expression is an * aggregate. */ return true; } return isAggregate(op, isSelectClause, isSelectDependency, isNestedAggregates, isAnyDistinct, false/* withinAggregateFunction */); } private boolean isAggregate(final BOp op, final boolean isSelectClause, final AtomicBoolean isSelectDependency, final AtomicBoolean isNestedAggregates, final AtomicBoolean isAnyDistinct, final boolean withinAggregateFunction) { if (op instanceof IAggregate) { if(withinAggregateFunction) { isNestedAggregates.set(true); } if (((IAggregate) op).isDistinct()) { isAnyDistinct.set(true); } } final boolean aggregationContext = withinAggregateFunction || op instanceof IAggregate; boolean isAggregate = aggregationContext; { final BOp t = op; if (t instanceof IVariable) { final IVariable v = (IVariable) t; if (aggregationContext) { /* * Decide if a variable appearing in within an aggregation * context is a reference to a previously observed * aggregate. If not, then we presume it to be a variable in * the detail records and aggregation will (at least logically) * form a column projection of that variable for each group. */ if (!groupByVars.contains(v) && !selectVars.contains(v)) { columnVars.add(v); } return false; } if (groupByVars.contains(v)) { isAggregate = true; return true; } if (selectVars.contains(v)) { if (isSelectClause) isSelectDependency.set(true); isAggregate = true; return true; } if(isSelectClause) { /* * Note: This is also thrown when there is a forward * reference to a variable in the select expression which we * have not yet seen. * * Note: This situation does not arise for the GROUP_BY * clause because it may only reference non-aggregate * variables. * * Note: This situation does not arise for the HAVING clause * because it can not define new variables using "AS". */ throw new IllegalArgumentException( "Non-aggregate variable in select expression: " + v); } } } final Iterator itr = op.argIterator(); while (itr.hasNext()) { final BOp arg = itr.next(); if(!(arg instanceof IValueExpression)) { // skip non-value expression arguments. continue; } if (log.isTraceEnabled()) log.trace("op=" + op.getClass() + // ", isSelectClause=" + isSelectClause // + ", isSelectDependency=" + isSelectDependency // + ", isNestedAggregates=" + isNestedAggregates// + ", isAnyDistinct=" + isAnyDistinct // + ", withinAggregateFunction=" + withinAggregateFunction // + ", aggregationContext=" + aggregationContext // + ", groupByVars=" + groupByVars// + ", selectVars=" + selectVars // + ", arg=" + arg// ); // recursion through child value expression. isAggregate |= isAggregate(arg, isSelectClause, isSelectDependency, isNestedAggregates, isAnyDistinct, aggregationContext/* withinAggregateFunction */); } return isAggregate; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy