
org.apache.lens.cube.parse.AggregateResolver Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.cube.parse;
import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL;
import java.util.Iterator;
import org.apache.lens.cube.error.LensCubeErrorCode;
import org.apache.lens.cube.metadata.CubeMeasure;
import org.apache.lens.cube.metadata.ExprColumn;
import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext;
import org.apache.lens.server.api.error.LensException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.antlr.runtime.CommonToken;
import lombok.extern.slf4j.Slf4j;
/**
* Replace select and having columns with default aggregate functions on them, if default aggregate is defined and
* if there isn't already an aggregate function specified on the columns.
Expressions which already
* contain aggregate sub-expressions will not be changed.
At this point it's assumed that aliases have
* been added to all columns.
*/
@Slf4j
class AggregateResolver implements ContextRewriter {
@Override
public void rewriteContext(CubeQueryContext cubeql) throws LensException {
if (cubeql.getCube() == null) {
return;
}
boolean nonDefaultAggregates = false;
boolean aggregateResolverDisabled = cubeql.getConf().getBoolean(CubeQueryConfUtil.DISABLE_AGGREGATE_RESOLVER,
CubeQueryConfUtil.DEFAULT_DISABLE_AGGREGATE_RESOLVER);
// Check if the query contains measures
// 1. not inside default aggregate expressions
// 2. With no default aggregate defined
// 3. there are distinct selection of measures
// If yes, only the raw (non aggregated) fact can answer this query.
// In that case remove aggregate facts from the candidate fact list
if (hasMeasuresInDistinctClause(cubeql, cubeql.getSelectAST(), false)
|| hasMeasuresInDistinctClause(cubeql, cubeql.getHavingAST(), false)
|| hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getSelectAST(), null, aggregateResolverDisabled)
|| hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getHavingAST(), null, aggregateResolverDisabled)
|| hasMeasures(cubeql, cubeql.getWhereAST()) || hasMeasures(cubeql, cubeql.getGroupByAST())
|| hasMeasures(cubeql, cubeql.getOrderByAST())) {
Iterator candItr = cubeql.getCandidates().iterator();
while (candItr.hasNext()) {
Candidate candidate = candItr.next();
if (candidate instanceof StorageCandidate) { // only work on storage candidates
StorageCandidate sc = (StorageCandidate) candidate;
if (sc.getFact().isAggregated()) {
cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.missingDefaultAggregate());
candItr.remove();
}
}
}
nonDefaultAggregates = true;
log.info("Query has non default aggregates, no aggregate resolution will be done");
}
if (nonDefaultAggregates || aggregateResolverDisabled) {
return;
}
resolveClause(cubeql, cubeql.getSelectAST());
resolveClause(cubeql, cubeql.getHavingAST());
Configuration distConf = cubeql.getConf();
boolean isDimOnlyDistinctEnabled = distConf.getBoolean(CubeQueryConfUtil.ENABLE_ATTRFIELDS_ADD_DISTINCT,
CubeQueryConfUtil.DEFAULT_ATTR_FIELDS_ADD_DISTINCT);
//Having clause will always work with measures, if only keys projected
//query should skip distinct and promote group by.
if (cubeql.getHavingAST() == null && isDimOnlyDistinctEnabled) {
// Check if any measure/aggregate columns and distinct clause used in
// select tree. If not, update selectAST token "SELECT" to "SELECT DISTINCT"
if (!hasMeasures(cubeql, cubeql.getSelectAST()) && !isDistinctClauseUsed(cubeql.getSelectAST())
&& !HQLParser.hasAggregate(cubeql.getSelectAST())
&& !isAggregateDimExprUsedInSelect(cubeql, cubeql.getSelectAST())) {
cubeql.getSelectAST().getToken().setType(HiveParser.TOK_SELECTDI);
}
}
}
private boolean isAggregateDimExprUsedInSelect(CubeQueryContext cubeql, ASTNode selectAST) throws LensException {
for (int i = 0; i < selectAST.getChildCount(); i++) {
ASTNode child = (ASTNode) selectAST.getChild(i);
String expr = HQLParser.getString((ASTNode) child.getChild(0).getChild(1));
if (cubeql.getQueriedExprs().contains(expr)) {
for (ExpressionResolver.ExpressionContext expressionContext
: cubeql.getExprCtx().getAllExprsQueried().get(expr)) {
for (ExprColumn.ExprSpec exprSpec : expressionContext.getExprCol().getExpressionSpecs()) {
ASTNode exprAST = HQLParser.parseExpr(exprSpec.getExpr(), cubeql.getConf());
if (HQLParser.isAggregateAST(exprAST)) {
return true;
}
}
}
}
}
return false;
}
// We need to traverse the clause looking for eligible measures which can be
// wrapped inside aggregates
// We have to skip any columns that are already inside an aggregate UDAF
private String resolveClause(CubeQueryContext cubeql, ASTNode clause) throws LensException {
if (clause == null) {
return null;
}
for (int i = 0; i < clause.getChildCount(); i++) {
transform(cubeql, clause, (ASTNode) clause.getChild(i), i);
}
return HQLParser.getString(clause);
}
private ASTNode transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) throws LensException {
if (node == null) {
return node;
}
int nodeType = node.getToken().getType();
if (!(HQLParser.isAggregateAST(node))) {
if (nodeType == HiveParser.TOK_TABLE_OR_COL || nodeType == HiveParser.DOT) {
// Leaf node
ASTNode wrapped = wrapAggregate(cubeql, node);
if (wrapped != node) {
if (parent != null) {
parent.setChild(nodePos, wrapped);
} else {
return wrapped;
}
}
} else {
// Dig deeper in non-leaf nodes
for (int i = 0; i < node.getChildCount(); i++) {
transform(cubeql, node, (ASTNode) node.getChild(i), i);
}
}
}
return node;
}
// Wrap an aggregate function around the node if its a measure, leave it
// unchanged otherwise
private ASTNode wrapAggregate(CubeQueryContext cubeql, ASTNode node) throws LensException {
String tabname = null;
String colname;
if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
colname = node.getChild(0).getText();
} else {
// node in 'alias.column' format
ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier);
ASTNode colIdent = (ASTNode) node.getChild(1);
colname = colIdent.getText().toLowerCase();
tabname = tabident.getText().toLowerCase();
}
String msrname = StringUtils.isBlank(tabname) ? colname : tabname + "." + colname;
if (cubeql.isCubeMeasure(msrname)) {
if (cubeql.getQueriedExprs().contains(colname)) {
String alias = cubeql.getAliasForTableName(cubeql.getCube().getName());
for (ExprSpecContext esc : cubeql.getExprCtx().getExpressionContext(colname, alias).getAllExprs()) {
ASTNode transformedNode = transform(cubeql, null, esc.getFinalAST(), 0);
esc.setFinalAST(transformedNode);
}
return node;
} else {
CubeMeasure measure = cubeql.getCube().getMeasureByName(colname);
String aggregateFn = measure.getAggregate();
if (StringUtils.isBlank(aggregateFn)) {
throw new LensException(LensCubeErrorCode.NO_DEFAULT_AGGREGATE.getLensErrorInfo(), colname);
}
ASTNode fnroot = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"));
ASTNode fnIdentNode = new ASTNode(new CommonToken(HiveParser.Identifier, aggregateFn));
fnroot.addChild(fnIdentNode);
fnroot.addChild(node);
return fnroot;
}
} else {
return node;
}
}
private boolean hasMeasuresNotInDefaultAggregates(CubeQueryContext cubeql, ASTNode node, String function,
boolean aggregateResolverDisabled) {
if (node == null) {
return false;
}
if (HQLParser.isAggregateAST(node)) {
if (node.getChild(0).getType() == HiveParser.Identifier) {
function = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(0).getText());
}
} else if (cubeql.isCubeMeasure(node)) {
// Exit for the recursion
String colname;
if (node.getToken().getType() == HiveParser.TOK_TABLE_OR_COL) {
colname = node.getChild(0).getText();
} else {
// node in 'alias.column' format
ASTNode colIdent = (ASTNode) node.getChild(1);
colname = colIdent.getText();
}
colname = colname.toLowerCase();
if (cubeql.getQueriedExprs().contains(colname)) {
String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName());
for (ASTNode exprNode : cubeql.getExprCtx().getExpressionContext(colname, cubeAlias).getAllASTNodes()) {
if (hasMeasuresNotInDefaultAggregates(cubeql, exprNode, function, aggregateResolverDisabled)) {
return true;
}
}
return false;
} else {
CubeMeasure measure = cubeql.getCube().getMeasureByName(colname);
if (function != null && !function.isEmpty()) {
// Get the cube measure object and check if the passed function is the
// default one set for this measure
return !function.equalsIgnoreCase(measure.getAggregate());
} else if (!aggregateResolverDisabled && measure.getAggregate() != null) {
// not inside any aggregate, but default aggregate exists
return false;
}
return true;
}
}
for (int i = 0; i < node.getChildCount(); i++) {
if (hasMeasuresNotInDefaultAggregates(cubeql, (ASTNode) node.getChild(i), function, aggregateResolverDisabled)) {
// Return on the first measure not inside its default aggregate
return true;
}
}
return false;
}
/*
* Check if distinct keyword used in node
*/
private boolean isDistinctClauseUsed(ASTNode node) {
if (node == null) {
return false;
}
if (node.getToken() != null) {
if (node.getToken().getType() == HiveParser.TOK_FUNCTIONDI
|| node.getToken().getType() == HiveParser.TOK_SELECTDI) {
return true;
}
}
for (int i = 0; i < node.getChildCount(); i++) {
if (isDistinctClauseUsed((ASTNode) node.getChild(i))) {
return true;
}
}
return false;
}
private boolean hasMeasuresInDistinctClause(CubeQueryContext cubeql, ASTNode node, boolean hasDistinct) {
if (node == null) {
return false;
}
int exprTokenType = node.getToken().getType();
boolean isDistinct = hasDistinct;
if (exprTokenType == HiveParser.TOK_FUNCTIONDI || exprTokenType == HiveParser.TOK_SELECTDI) {
isDistinct = true;
} else if (cubeql.isCubeMeasure(node) && isDistinct) {
// Exit for the recursion
return true;
}
for (int i = 0; i < node.getChildCount(); i++) {
if (hasMeasuresInDistinctClause(cubeql, (ASTNode) node.getChild(i), isDistinct)) {
// Return on the first measure in distinct clause
return true;
}
}
return false;
}
private boolean hasMeasures(CubeQueryContext cubeql, ASTNode node) {
if (node == null) {
return false;
}
if (cubeql.isCubeMeasure(node)) {
return true;
}
for (int i = 0; i < node.getChildCount(); i++) {
if (hasMeasures(cubeql, (ASTNode) node.getChild(i))) {
return true;
}
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy