org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import com.facebook.presto.hive.$internal.org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.GenTezUtils;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import com.facebook.presto.hive.$internal.com.google.common.collect.ArrayListMultimap;
import com.facebook.presto.hive.$internal.com.google.common.collect.HashMultimap;
import com.facebook.presto.hive.$internal.com.google.common.collect.ImmutableList;
import com.facebook.presto.hive.$internal.com.google.common.collect.ImmutableSet;
import com.facebook.presto.hive.$internal.com.google.common.collect.Lists;
import com.facebook.presto.hive.$internal.com.google.common.collect.Multimap;
import com.facebook.presto.hive.$internal.com.google.common.collect.Multiset;
import com.facebook.presto.hive.$internal.com.google.common.collect.Sets;
import com.facebook.presto.hive.$internal.com.google.common.collect.TreeMultiset;
/**
* Shared computation optimizer.
*
* Originally, this rule would find scan operators over the same table
* in the query plan and merge them if they met some preconditions.
*
* TS TS TS
* | | -> / \
* Op Op Op Op
*
*
Now the rule has been extended to find opportunities to other operators
* downstream, not only a single table scan.
*
* TS1 TS2 TS1 TS2 TS1 TS2
* | | | | | |
* | RS | RS | RS
* \ / \ / -> \ /
* MapJoin MapJoin MapJoin
* | | / \
* Op Op Op Op
*
*
If the extended version of the optimizer is enabled, it can go beyond
* a work boundary to find reutilization opportunities.
*
*
The optimization only works with the Tez execution engine.
*/
public class SharedWorkOptimizer extends Transform {
private final static Logger LOG = LoggerFactory.getLogger(SharedWorkOptimizer.class);
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
final Map topOps = pctx.getTopOps();
if (topOps.size() < 2) {
// Nothing to do, bail out
return pctx;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Before SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
}
// Cache to use during optimization
SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();
// Gather information about the DPP table scans and store it in the cache
gatherDPPTableScanOps(pctx, optimizerCache);
// Map of dbName.TblName -> TSOperator
Multimap tableNameToOps = splitTableScanOpsByTable(pctx);
// We enforce a certain order when we do the reutilization.
// In particular, we use size of table x number of reads to
// rank the tables.
List> sortedTables = rankTablesByAccumulatedSize(pctx);
LOG.debug("Sorted tables by size: {}", sortedTables);
// Execute optimization
Multimap existingOps = ArrayListMultimap.create();
Set> removedOps = new HashSet<>();
for (Entry tablePair : sortedTables) {
String tableName = tablePair.getKey();
for (TableScanOperator discardableTsOp : tableNameToOps.get(tableName)) {
if (removedOps.contains(discardableTsOp)) {
LOG.debug("Skip {} as it has already been removed", discardableTsOp);
continue;
}
Collection prevTsOps = existingOps.get(tableName);
for (TableScanOperator retainableTsOp : prevTsOps) {
if (removedOps.contains(retainableTsOp)) {
LOG.debug("Skip {} as it has already been removed", retainableTsOp);
continue;
}
// First we quickly check if the two table scan operators can actually be merged
boolean mergeable = areMergeable(pctx, optimizerCache, retainableTsOp, discardableTsOp);
if (!mergeable) {
// Skip
LOG.debug("{} and {} cannot be merged", retainableTsOp, discardableTsOp);
continue;
}
// Secondly, we extract information about the part of the tree that can be merged
// as well as some structural information (memory consumption) that needs to be
// used to determined whether the merge can happen
SharedResult sr = extractSharedOptimizationInfoForRoot(
pctx, optimizerCache, retainableTsOp, discardableTsOp);
// It seems these two operators can be merged.
// Check that plan meets some preconditions before doing it.
// In particular, in the presence of map joins in the upstream plan:
// - we cannot exceed the noconditional task size, and
// - if we already merged the big table, we cannot merge the broadcast
// tables.
if (!validPreConditions(pctx, optimizerCache, sr)) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableTsOp, discardableTsOp);
continue;
}
// We can merge
if (sr.retainableOps.size() > 1) {
// More than TS operator
Operator> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
Operator> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
if (lastDiscardableOp.getNumChild() != 0) {
List> allChildren =
Lists.newArrayList(lastDiscardableOp.getChildOperators());
for (Operator extends OperatorDesc> op : allChildren) {
lastDiscardableOp.getChildOperators().remove(op);
op.replaceParent(lastDiscardableOp, lastRetainableOp);
lastRetainableOp.getChildOperators().add(op);
}
}
LOG.debug("Merging subtree starting at {} into subtree starting at {}",
discardableTsOp, retainableTsOp);
} else {
// Only TS operator
ExprNodeGenericFuncDesc exprNode = null;
if (retainableTsOp.getConf().getFilterExpr() != null) {
// Push filter on top of children
pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
// Clone to push to table scan
exprNode = (ExprNodeGenericFuncDesc) retainableTsOp.getConf().getFilterExpr();
}
if (discardableTsOp.getConf().getFilterExpr() != null) {
// Push filter on top
pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
ExprNodeGenericFuncDesc tsExprNode = discardableTsOp.getConf().getFilterExpr();
if (exprNode != null && !exprNode.isSame(tsExprNode)) {
// We merge filters from previous scan by ORing with filters from current scan
if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) {
List newChildren = new ArrayList<>(exprNode.getChildren().size() + 1);
for (ExprNodeDesc childExprNode : exprNode.getChildren()) {
if (childExprNode.isSame(tsExprNode)) {
// We do not need to do anything, it is in the OR expression
break;
}
newChildren.add(childExprNode);
}
if (exprNode.getChildren().size() == newChildren.size()) {
newChildren.add(tsExprNode);
exprNode = ExprNodeGenericFuncDesc.newInstance(
new GenericUDFOPOr(),
newChildren);
}
} else {
exprNode = ExprNodeGenericFuncDesc.newInstance(
new GenericUDFOPOr(),
Arrays.asList(exprNode, tsExprNode));
}
}
}
// Replace filter
retainableTsOp.getConf().setFilterExpr(exprNode);
// Replace table scan operator
List> allChildren =
Lists.newArrayList(discardableTsOp.getChildOperators());
for (Operator extends OperatorDesc> op : allChildren) {
discardableTsOp.getChildOperators().remove(op);
op.replaceParent(discardableTsOp, retainableTsOp);
retainableTsOp.getChildOperators().add(op);
}
LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp);
}
// First we remove the input operators of the expression that
// we are going to eliminate
for (Operator> op : sr.discardableInputOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
// Remove DPP predicates
if (op instanceof ReduceSinkOperator) {
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp()) &&
!sr.discardableInputOps.contains(sjbi.getTsOp())) {
GenTezUtils.removeSemiJoinOperator(
pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
optimizerCache.tableScanToDPPSource.remove(sjbi.getTsOp(), op);
}
} else if (op instanceof AppMasterEventOperator) {
DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
if (!sr.discardableOps.contains(dped.getTableScan()) &&
!sr.discardableInputOps.contains(dped.getTableScan())) {
GenTezUtils.removeSemiJoinOperator(
pctx, (AppMasterEventOperator) op, dped.getTableScan());
optimizerCache.tableScanToDPPSource.remove(dped.getTableScan(), op);
}
}
LOG.debug("Input operator removed: {}", op);
}
// Then we merge the operators of the works we are going to merge
optimizerCache.removeOpAndCombineWork(discardableTsOp, retainableTsOp);
removedOps.add(discardableTsOp);
// Finally we remove the expression from the tree
for (Operator> op : sr.discardableOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
if (sr.discardableOps.size() == 1) {
// If there is a single discardable operator, it is a TableScanOperator
// and it means that we have merged filter expressions for it. Thus, we
// might need to remove DPP predicates from the retainable TableScanOperator
Collection> c =
optimizerCache.tableScanToDPPSource.get((TableScanOperator) op);
for (Operator> dppSource : c) {
if (dppSource instanceof ReduceSinkOperator) {
GenTezUtils.removeSemiJoinOperator(pctx,
(ReduceSinkOperator) dppSource,
(TableScanOperator) sr.retainableOps.get(0));
optimizerCache.tableScanToDPPSource.remove(sr.retainableOps.get(0), op);
} else if (dppSource instanceof AppMasterEventOperator) {
GenTezUtils.removeSemiJoinOperator(pctx,
(AppMasterEventOperator) dppSource,
(TableScanOperator) sr.retainableOps.get(0));
optimizerCache.tableScanToDPPSource.remove(sr.retainableOps.get(0), op);
}
}
}
LOG.debug("Operator removed: {}", op);
}
break;
}
if (removedOps.contains(discardableTsOp)) {
// This operator has been removed, remove it from the list of existing operators
existingOps.remove(tableName, discardableTsOp);
} else {
// This operator has not been removed, include it in the list of existing operators
existingOps.put(tableName, discardableTsOp);
}
}
}
// Remove unused table scan operators
Iterator> it = topOps.entrySet().iterator();
while (it.hasNext()) {
Entry e = it.next();
if (e.getValue().getNumChild() == 0) {
it.remove();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("After SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
}
if(pctx.getConf().getBoolVar(ConfVars.HIVE_SHARED_WORK_EXTENDED_OPTIMIZATION)) {
// Gather RS operators that 1) belong to root works, i.e., works containing TS operators,
// and 2) share the same input operator.
// These will be the first target for extended shared work optimization
Multimap, ReduceSinkOperator> parentToRsOps = ArrayListMultimap.create();
Set> visited = new HashSet<>();
for (Entry e : topOps.entrySet()) {
gatherReduceSinkOpsByInput(parentToRsOps, visited,
findWorkOperators(optimizerCache, e.getValue()));
}
while (!parentToRsOps.isEmpty()) {
// As above, we enforce a certain order when we do the reutilization.
// In particular, we use size of data in RS x number of uses.
List, Long>> sortedRSGroups =
rankOpsByAccumulatedSize(parentToRsOps.keySet());
LOG.debug("Sorted operators by size: {}", sortedRSGroups);
// Execute extended optimization
// For each RS, check whether other RS in same work could be merge into this one.
// If they are merged, RS operators in the resulting work will be considered
// mergeable in next loop iteration.
Multimap, ReduceSinkOperator> existingRsOps = ArrayListMultimap.create();
for (Entry, Long> rsGroupInfo : sortedRSGroups) {
Operator> rsParent = rsGroupInfo.getKey();
for (ReduceSinkOperator discardableRsOp : parentToRsOps.get(rsParent)) {
if (removedOps.contains(discardableRsOp)) {
LOG.debug("Skip {} as it has already been removed", discardableRsOp);
continue;
}
Collection otherRsOps = existingRsOps.get(rsParent);
for (ReduceSinkOperator retainableRsOp : otherRsOps) {
if (removedOps.contains(retainableRsOp)) {
LOG.debug("Skip {} as it has already been removed", retainableRsOp);
continue;
}
// First we quickly check if the two RS operators can actually be merged.
// We already know that these two RS operators have the same parent, but
// we need to check whether both RS are actually equal. Further, we check
// whether their child is also equal. If any of these conditions are not
// met, we are not going to try to merge.
boolean mergeable = compareOperator(pctx, retainableRsOp, discardableRsOp) &&
compareOperator(pctx, retainableRsOp.getChildOperators().get(0),
discardableRsOp.getChildOperators().get(0));
if (!mergeable) {
// Skip
LOG.debug("{} and {} cannot be merged", retainableRsOp, discardableRsOp);
continue;
}
LOG.debug("Checking additional conditions for merging subtree starting at {}"
+ " into subtree starting at {}", discardableRsOp, retainableRsOp);
// Secondly, we extract information about the part of the tree that can be merged
// as well as some structural information (memory consumption) that needs to be
// used to determined whether the merge can happen
Operator> retainableRsOpChild = retainableRsOp.getChildOperators().get(0);
Operator> discardableRsOpChild = discardableRsOp.getChildOperators().get(0);
SharedResult sr = extractSharedOptimizationInfo(
pctx, optimizerCache, retainableRsOp, discardableRsOp,
retainableRsOpChild, discardableRsOpChild);
// It seems these two operators can be merged.
// Check that plan meets some preconditions before doing it.
// In particular, in the presence of map joins in the upstream plan:
// - we cannot exceed the noconditional task size, and
// - if we already merged the big table, we cannot merge the broadcast
// tables.
if (sr.retainableOps.isEmpty() || !validPreConditions(pctx, optimizerCache, sr)) {
// Skip
LOG.debug("{} and {} do not meet preconditions", retainableRsOp, discardableRsOp);
continue;
}
// We can merge
Operator> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
Operator> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
if (lastDiscardableOp.getNumChild() != 0) {
List> allChildren =
Lists.newArrayList(lastDiscardableOp.getChildOperators());
for (Operator extends OperatorDesc> op : allChildren) {
lastDiscardableOp.getChildOperators().remove(op);
op.replaceParent(lastDiscardableOp, lastRetainableOp);
lastRetainableOp.getChildOperators().add(op);
}
}
LOG.debug("Merging subtree starting at {} into subtree starting at {}",
discardableRsOp, retainableRsOp);
// First we remove the input operators of the expression that
// we are going to eliminate
for (Operator> op : sr.discardableInputOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
// Remove DPP predicates
if (op instanceof ReduceSinkOperator) {
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp()) &&
!sr.discardableInputOps.contains(sjbi.getTsOp())) {
GenTezUtils.removeSemiJoinOperator(
pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
optimizerCache.tableScanToDPPSource.remove(sjbi.getTsOp(), op);
}
} else if (op instanceof AppMasterEventOperator) {
DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
if (!sr.discardableOps.contains(dped.getTableScan()) &&
!sr.discardableInputOps.contains(dped.getTableScan())) {
GenTezUtils.removeSemiJoinOperator(
pctx, (AppMasterEventOperator) op, dped.getTableScan());
optimizerCache.tableScanToDPPSource.remove(dped.getTableScan(), op);
}
}
LOG.debug("Input operator removed: {}", op);
}
// We remove the discardable RS operator
OperatorUtils.removeOperator(discardableRsOp);
optimizerCache.removeOp(discardableRsOp);
removedOps.add(discardableRsOp);
LOG.debug("Operator removed: {}", discardableRsOp);
// Then we merge the operators of the works we are going to merge
optimizerCache.removeOpAndCombineWork(discardableRsOpChild, retainableRsOpChild);
// Finally we remove the rest of the expression from the tree
for (Operator> op : sr.discardableOps) {
OperatorUtils.removeOperator(op);
optimizerCache.removeOp(op);
removedOps.add(op);
LOG.debug("Operator removed: {}", op);
}
break;
}
if (removedOps.contains(discardableRsOp)) {
// This operator has been removed, remove it from the list of existing operators
existingRsOps.remove(rsParent, discardableRsOp);
} else {
// This operator has not been removed, include it in the list of existing operators
existingRsOps.put(rsParent, discardableRsOp);
}
}
}
// We gather the operators that will be used for next iteration of extended optimization
// (if any)
parentToRsOps = ArrayListMultimap.create();
visited = new HashSet<>();
for (Entry, ReduceSinkOperator> e : existingRsOps.entries()) {
if (removedOps.contains(e.getValue()) || e.getValue().getNumChild() < 1) {
// If 1) RS has been removed, or 2) it does not have a child (for instance, it is a
// semijoin RS), we can quickly skip this one
continue;
}
gatherReduceSinkOpsByInput(parentToRsOps, visited,
findWorkOperators(optimizerCache, e.getValue().getChildOperators().get(0)));
}
}
// Remove unused table scan operators
it = topOps.entrySet().iterator();
while (it.hasNext()) {
Entry e = it.next();
if (e.getValue().getNumChild() == 0) {
it.remove();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("After SharedWorkExtendedOptimizer:\n"
+ Operator.toString(pctx.getTopOps().values()));
}
}
// If we are running tests, we are going to verify that the contents of the cache
// correspond with the contents of the plan, and otherwise we fail.
// This check always run when we are running in test mode, independently on whether
// we use the basic or the extended version of the optimizer.
if (pctx.getConf().getBoolVar(ConfVars.HIVE_IN_TEST)) {
Set> visited = new HashSet<>();
it = topOps.entrySet().iterator();
while (it.hasNext()) {
Entry e = it.next();
for (Operator> op : OperatorUtils.findOperators(e.getValue(), Operator.class)) {
if (!visited.contains(op)) {
if (!findWorkOperators(optimizerCache, op).equals(
findWorkOperators(op, new HashSet>()))) {
throw new SemanticException("Error in shared work optimizer: operator cache contents"
+ "and actual plan differ");
}
visited.add(op);
}
}
}
}
return pctx;
}
/**
* This method gathers the TS operators with DPP from the context and
* stores them into the input optimization cache.
*/
private static void gatherDPPTableScanOps(
ParseContext pctx, SharedWorkOptimizerCache optimizerCache) throws SemanticException {
// Find TS operators with partition pruning enabled in plan
// because these TS may potentially read different data for
// different pipeline.
// These can be:
// 1) TS with DPP.
// 2) TS with semijoin DPP.
Map topOps = pctx.getTopOps();
Collection> tableScanOps =
Lists.>newArrayList(topOps.values());
Set s =
OperatorUtils.findOperators(tableScanOps, AppMasterEventOperator.class);
for (AppMasterEventOperator a : s) {
if (a.getConf() instanceof DynamicPruningEventDesc) {
DynamicPruningEventDesc dped = (DynamicPruningEventDesc) a.getConf();
optimizerCache.tableScanToDPPSource.put(dped.getTableScan(), a);
}
}
for (Entry e
: pctx.getRsToSemiJoinBranchInfo().entrySet()) {
optimizerCache.tableScanToDPPSource.put(e.getValue().getTsOp(), e.getKey());
}
LOG.debug("DPP information stored in the cache: {}", optimizerCache.tableScanToDPPSource);
}
private static Multimap splitTableScanOpsByTable(
ParseContext pctx) {
Multimap tableNameToOps = ArrayListMultimap.create();
// Sort by operator ID so we get deterministic results
Map sortedTopOps = new TreeMap<>(pctx.getTopOps());
for (Entry e : sortedTopOps.entrySet()) {
TableScanOperator tsOp = e.getValue();
tableNameToOps.put(
tsOp.getConf().getTableMetadata().getDbName() + "."
+ tsOp.getConf().getTableMetadata().getTableName(), tsOp);
}
return tableNameToOps;
}
private static List> rankTablesByAccumulatedSize(ParseContext pctx) {
Map tableToTotalSize = new HashMap<>();
for (Entry e : pctx.getTopOps().entrySet()) {
TableScanOperator tsOp = e.getValue();
String tableName = tsOp.getConf().getTableMetadata().getDbName() + "."
+ tsOp.getConf().getTableMetadata().getTableName();
long tableSize = tsOp.getStatistics() != null ?
tsOp.getStatistics().getDataSize() : 0L;
Long totalSize = tableToTotalSize.get(tableName);
if (totalSize != null) {
tableToTotalSize.put(tableName,
StatsUtils.safeAdd(totalSize, tableSize));
} else {
tableToTotalSize.put(tableName, tableSize);
}
}
List> sortedTables =
new ArrayList<>(tableToTotalSize.entrySet());
Collections.sort(sortedTables, Collections.reverseOrder(
new Comparator>() {
@Override
public int compare(Map.Entry o1, Map.Entry o2) {
return (o1.getValue()).compareTo(o2.getValue());
}
}));
return sortedTables;
}
private static void gatherReduceSinkOpsByInput(Multimap,
ReduceSinkOperator> parentToRsOps, Set> visited, Set> ops) {
for (Operator> op : ops) {
// If the RS has other RS siblings, we will add it to be considered in next iteration
if (op instanceof ReduceSinkOperator && !visited.contains(op)) {
Operator> parent = op.getParentOperators().get(0);
Set s = new LinkedHashSet<>();
for (Operator> c : parent.getChildOperators()) {
if (c instanceof ReduceSinkOperator) {
s.add((ReduceSinkOperator) c);
visited.add(c);
}
}
if (s.size() > 1) {
parentToRsOps.putAll(parent, s);
}
}
}
}
private static List, Long>> rankOpsByAccumulatedSize(Set> opsSet) {
Map, Long> opToTotalSize = new HashMap<>();
for (Operator> op : opsSet) {
long size = op.getStatistics() != null ?
op.getStatistics().getDataSize() : 0L;
opToTotalSize.put(op,
StatsUtils.safeMult(op.getChildOperators().size(), size));
}
List, Long>> sortedOps =
new ArrayList<>(opToTotalSize.entrySet());
Collections.sort(sortedOps, Collections.reverseOrder(
new Comparator, Long>>() {
@Override
public int compare(Map.Entry, Long> o1, Map.Entry, Long> o2) {
int valCmp = o1.getValue().compareTo(o2.getValue());
if (valCmp == 0) {
return o1.getKey().toString().compareTo(o2.getKey().toString());
}
return valCmp;
}
}));
return sortedOps;
}
// FIXME: probably this should also be integrated with isSame() logics
private static boolean areMergeable(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
// First we check if the two table scan operators can actually be merged
// If schemas do not match, we currently do not merge
List prevTsOpNeededColumns = tsOp1.getNeededColumns();
List tsOpNeededColumns = tsOp2.getNeededColumns();
if (prevTsOpNeededColumns.size() != tsOpNeededColumns.size()) {
return false;
}
boolean notEqual = false;
for (int i = 0; i < prevTsOpNeededColumns.size(); i++) {
if (!prevTsOpNeededColumns.get(i).equals(tsOpNeededColumns.get(i))) {
notEqual = true;
break;
}
}
if (notEqual) {
return false;
}
// If row limit does not match, we currently do not merge
if (tsOp1.getConf().getRowLimit() != tsOp2.getConf().getRowLimit()) {
return false;
}
// If partitions do not match, we currently do not merge
PrunedPartitionList prevTsOpPPList = pctx.getPrunedPartitions(tsOp1);
PrunedPartitionList tsOpPPList = pctx.getPrunedPartitions(tsOp2);
if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) {
return false;
}
// If is a DPP, check if actually it refers to same target, column, etc.
// Further, the DPP value needs to be generated from same subtree
List> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
List> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
if (dppsOp1.isEmpty() && dppsOp2.isEmpty()) {
return true;
}
for (int i = 0; i < dppsOp1.size(); i++) {
Operator> op = dppsOp1.get(i);
if (op instanceof ReduceSinkOperator) {
Set> ascendants =
findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp2)) {
// This should not happen, we cannot merge
return false;
}
}
}
for (int i = 0; i < dppsOp2.size(); i++) {
Operator> op = dppsOp2.get(i);
if (op instanceof ReduceSinkOperator) {
Set> ascendants =
findAscendantWorkOperators(pctx, optimizerCache, op);
if (ascendants.contains(tsOp1)) {
// This should not happen, we cannot merge
return false;
}
}
}
if (dppsOp1.size() != dppsOp2.size()) {
// Only first or second operator contains DPP pruning
return false;
}
// Check if DPP branches are equal
BitSet bs = new BitSet();
for (int i = 0; i < dppsOp1.size(); i++) {
Operator> dppOp1 = dppsOp1.get(i);
for (int j = 0; j < dppsOp2.size(); j++) {
if (!bs.get(j)) {
// If not visited yet
Operator> dppOp2 = dppsOp2.get(j);
if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
// The DPP operator/branch are equal
bs.set(j);
break;
}
}
}
if (bs.cardinality() < i + 1) {
return false;
}
}
return true;
}
private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache,
TableScanOperator retainableTsOp,
TableScanOperator discardableTsOp) throws SemanticException {
LinkedHashSet> retainableOps = new LinkedHashSet<>();
LinkedHashSet> discardableOps = new LinkedHashSet<>();
Set> discardableInputOps = new HashSet<>();
long dataSize = 0L;
long maxDataSize = 0L;
retainableOps.add(retainableTsOp);
discardableOps.add(discardableTsOp);
Operator> equalOp1 = retainableTsOp;
Operator> equalOp2 = discardableTsOp;
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps,
dataSize, maxDataSize);
}
Operator> currentOp1 = retainableTsOp.getChildOperators().get(0);
Operator> currentOp2 = discardableTsOp.getChildOperators().get(0);
// Special treatment for Filter operator that ignores the DPP predicates
if (currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
boolean equalFilters = false;
FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() &&
StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
Multiset conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
Multiset conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
if (conjsOp1String.equals(conjsOp2String)) {
equalFilters = true;
}
}
if (equalFilters) {
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (currentOp1.getChildOperators().size() > 1 ||
currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps,
dataSize, maxDataSize);
}
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
} else {
// Bail out
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps,
dataSize, maxDataSize);
}
}
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2,
currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
}
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache,
Operator> retainableOpEqualParent,
Operator> discardableOpEqualParent,
Operator> retainableOp,
Operator> discardableOp) throws SemanticException {
return extractSharedOptimizationInfo(pctx, optimizerCache,
retainableOpEqualParent, discardableOpEqualParent, retainableOp, discardableOp,
new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>(), true);
}
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache,
Operator> retainableOpEqualParent,
Operator> discardableOpEqualParent,
Operator> retainableOp,
Operator> discardableOp,
LinkedHashSet> retainableOps,
LinkedHashSet> discardableOps,
Set> discardableInputOps,
boolean removeInputBranch) throws SemanticException {
Operator> equalOp1 = retainableOpEqualParent;
Operator> equalOp2 = discardableOpEqualParent;
Operator> currentOp1 = retainableOp;
Operator> currentOp2 = discardableOp;
long dataSize = 0L;
long maxDataSize = 0L;
// Try to merge rest of operators
while (!(currentOp1 instanceof ReduceSinkOperator)) {
// Check whether current operators are equal
if (!compareOperator(pctx, currentOp1, currentOp2)) {
// If they are not equal, we could zip up till here
break;
}
if (currentOp1.getParentOperators().size() !=
currentOp2.getParentOperators().size()) {
// If they are not equal, we could zip up till here
break;
}
if (currentOp1.getParentOperators().size() > 1) {
List> discardableOpsForCurrentOp = new ArrayList<>();
int idx = 0;
for (; idx < currentOp1.getParentOperators().size(); idx++) {
Operator> parentOp1 = currentOp1.getParentOperators().get(idx);
Operator> parentOp2 = currentOp2.getParentOperators().get(idx);
if (parentOp1 == equalOp1 && parentOp2 == equalOp2 && !removeInputBranch) {
continue;
}
if ((parentOp1 == equalOp1 && parentOp2 != equalOp2) ||
(parentOp1 != equalOp1 && parentOp2 == equalOp2)) {
// Input operator is not in the same position
break;
}
// Compare input
List> removeOpsForCurrentInput =
compareAndGatherOps(pctx, parentOp1, parentOp2);
if (removeOpsForCurrentInput == null) {
// Inputs are not the same, bail out
break;
}
// Add inputs to ops to remove
discardableOpsForCurrentOp.addAll(removeOpsForCurrentInput);
}
if (idx != currentOp1.getParentOperators().size()) {
// If inputs are not equal, we could zip up till here
break;
}
discardableInputOps.addAll(discardableOpsForCurrentOp);
}
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (equalOp1 instanceof MapJoinOperator) {
MapJoinOperator mop = (MapJoinOperator) equalOp1;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
if (currentOp1.getChildOperators().size() > 1 ||
currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
break;
}
// Update for next iteration
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
}
// Add the rest to the memory consumption
Set> opsWork1 = findWorkOperators(optimizerCache, currentOp1);
for (Operator> op : opsWork1) {
if (op instanceof MapJoinOperator && !retainableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
}
Set> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
for (Operator> op : opsWork2) {
if (op instanceof MapJoinOperator && !discardableOps.contains(op)) {
MapJoinOperator mop = (MapJoinOperator) op;
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
}
}
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache,
Sets.union(discardableInputOps, discardableOps)));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps,
dataSize, maxDataSize);
}
private static Multiset extractConjsIgnoringDPPPreds(ExprNodeDesc predicate) {
List conjsOp = ExprNodeDescUtils.split(predicate);
Multiset conjsOpString = TreeMultiset.create();
for (int i = 0; i < conjsOp.size(); i++) {
if (conjsOp.get(i) instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) conjsOp.get(i);
if (GenericUDFInBloomFilter.class == func.getGenericUDF().getClass()) {
continue;
} else if (GenericUDFBetween.class == func.getGenericUDF().getClass() &&
(func.getChildren().get(2) instanceof ExprNodeDynamicValueDesc ||
func.getChildren().get(3) instanceof ExprNodeDynamicValueDesc)) {
continue;
}
} else if(conjsOp.get(i) instanceof ExprNodeDynamicListDesc) {
continue;
}
conjsOpString.add(conjsOp.get(i).toString());
}
return conjsOpString;
}
private static Set> gatherDPPBranchOps(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Set> ops) {
Set> dppBranches = new HashSet<>();
for (Operator> op : ops) {
if (op instanceof TableScanOperator) {
Collection> c = optimizerCache.tableScanToDPPSource
.get((TableScanOperator) op);
for (Operator> dppSource : c) {
// Remove the branches
removeBranch(dppSource, dppBranches, ops);
}
}
}
return dppBranches;
}
private static Set> gatherDPPBranchOps(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Set> ops,
Set> discardedOps) {
Set> dppBranches = new HashSet<>();
for (Operator> op : ops) {
if (op instanceof TableScanOperator) {
Collection> c = optimizerCache.tableScanToDPPSource
.get((TableScanOperator) op);
for (Operator> dppSource : c) {
Set> ascendants =
findAscendantWorkOperators(pctx, optimizerCache, dppSource);
if (!Collections.disjoint(ascendants, discardedOps)) {
// Remove branch
removeBranch(dppSource, dppBranches, ops);
}
}
}
}
return dppBranches;
}
private static void removeBranch(Operator> currentOp, Set> branchesOps,
Set> discardableOps) {
if (currentOp.getNumChild() > 1) {
for (Operator> childOp : currentOp.getChildOperators()) {
if (!branchesOps.contains(childOp) && !discardableOps.contains(childOp)) {
return;
}
}
}
branchesOps.add(currentOp);
if (currentOp.getParentOperators() != null) {
for (Operator> parentOp : currentOp.getParentOperators()) {
removeBranch(parentOp, branchesOps, discardableOps);
}
}
}
private static List> compareAndGatherOps(ParseContext pctx,
Operator> op1, Operator> op2) throws SemanticException {
List> result = new ArrayList<>();
boolean mergeable = compareAndGatherOps(pctx, op1, op2, result, true);
if (!mergeable) {
return null;
}
return result;
}
private static boolean compareAndGatherOps(ParseContext pctx, Operator> op1, Operator> op2,
List> result, boolean gather) throws SemanticException {
if (!compareOperator(pctx, op1, op2)) {
LOG.debug("Operators not equal: {} and {}", op1, op2);
return false;
}
if (gather && op2.getChildOperators().size() > 1) {
// If the second operator has more than one child, we stop gathering
gather = false;
}
if (gather) {
result.add(op2);
}
List> op1ParentOperators = op1.getParentOperators();
List> op2ParentOperators = op2.getParentOperators();
if (op1ParentOperators != null && op2ParentOperators != null) {
if (op1ParentOperators.size() != op2ParentOperators.size()) {
return false;
}
for (int i = 0; i < op1ParentOperators.size(); i++) {
Operator> op1ParentOp = op1ParentOperators.get(i);
Operator> op2ParentOp = op2ParentOperators.get(i);
boolean mergeable =
compareAndGatherOps(pctx, op1ParentOp, op2ParentOp, result, gather);
if (!mergeable) {
return false;
}
}
} else if (op1ParentOperators != null || op2ParentOperators != null) {
return false;
}
return true;
}
private static boolean compareOperator(ParseContext pctx, Operator> op1, Operator> op2)
throws SemanticException {
if (!op1.getClass().getName().equals(op2.getClass().getName())) {
return false;
}
// We handle ReduceSinkOperator here as we can safely ignore table alias
// and the current comparator implementation does not.
// We can ignore table alias since when we compare ReduceSinkOperator, all
// its ancestors need to match (down to table scan), thus we make sure that
// both plans are the same.
// TODO: move this to logicalEquals
if (op1 instanceof ReduceSinkOperator) {
ReduceSinkDesc op1Conf = ((ReduceSinkOperator) op1).getConf();
ReduceSinkDesc op2Conf = ((ReduceSinkOperator) op2).getConf();
if (StringUtils.equals(op1Conf.getKeyColString(), op2Conf.getKeyColString()) &&
StringUtils.equals(op1Conf.getValueColsString(), op2Conf.getValueColsString()) &&
StringUtils.equals(op1Conf.getParitionColsString(), op2Conf.getParitionColsString()) &&
op1Conf.getTag() == op2Conf.getTag() &&
StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) &&
op1Conf.getTopN() == op2Conf.getTopN() &&
op1Conf.isAutoParallel() == op2Conf.isAutoParallel()) {
return true;
} else {
return false;
}
}
// We handle TableScanOperator here as we can safely ignore table alias
// and the current comparator implementation does not.
// TODO: move this to logicalEquals
if (op1 instanceof TableScanOperator) {
TableScanOperator tsOp1 = (TableScanOperator) op1;
TableScanOperator tsOp2 = (TableScanOperator) op2;
TableScanDesc op1Conf = tsOp1.getConf();
TableScanDesc op2Conf = tsOp2.getConf();
Table tableMeta1 = op1Conf.getTableMetadata();
Table tableMeta2 = op2Conf.getTableMetadata();
if (StringUtils.equals(tableMeta1.getFullyQualifiedName(), tableMeta2.getFullyQualifiedName())
&& op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns())
&& StringUtils.equals(op1Conf.getFilterExprString(), op2Conf.getFilterExprString())
&& pctx.getPrunedPartitions(tsOp1).getPartitions().equals(
pctx.getPrunedPartitions(tsOp2).getPartitions())
&& op1Conf.getRowLimit() == op2Conf.getRowLimit()) {
return true;
} else {
return false;
}
}
return op1.logicalEquals(op2);
}
private static boolean validPreConditions(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
SharedResult sr) {
// We check whether merging the works would cause the size of
// the data in memory grow too large.
// TODO: Currently ignores GBY and PTF which may also buffer data in memory.
if (sr.dataSize > sr.maxDataSize) {
// Size surpasses limit, we cannot convert
LOG.debug("accumulated data size: {} / max size: {}", sr.dataSize, sr.maxDataSize);
return false;
}
Operator> op1 = sr.retainableOps.get(0);
Operator> op2 = sr.discardableOps.get(0);
// 1) The set of operators in the works that we are merging need to meet
// some requirements. In particular:
// 1.1. None of the works that we are merging can contain a Union
// operator. This is not supported yet as we might end up with cycles in
// the Tez DAG.
// 1.2. There cannot be more than one DummyStore operator in the new resulting
// work when the operators are merged. This is due to an assumption in
// MergeJoinProc that needs to be further explored.
// If any of these conditions are not met, we cannot merge.
// TODO: Extend rule so it can be applied for these cases.
final Set> workOps1 = findWorkOperators(optimizerCache, op1);
final Set> workOps2 = findWorkOperators(optimizerCache, op2);
boolean foundDummyStoreOp = false;
for (Operator> op : workOps1) {
if (op instanceof UnionOperator) {
// We cannot merge (1.1)
return false;
}
if (op instanceof DummyStoreOperator) {
foundDummyStoreOp = true;
}
}
for (Operator> op : workOps2) {
if (op instanceof UnionOperator) {
// We cannot merge (1.1)
return false;
}
if (foundDummyStoreOp && op instanceof DummyStoreOperator) {
// We cannot merge (1.2)
return false;
}
}
// 2) We check whether output works when we merge the operators will collide.
//
// Work1 Work2 (merge TS in W1 & W2) Work1
// \ / -> | | X
// Work3 Work3
//
// If we do, we cannot merge. The reason is that Tez currently does
// not support parallel edges, i.e., multiple edges from same work x
// into same work y.
final Set> outputWorksOps1 = findChildWorkOperators(pctx, optimizerCache, op1);
final Set> outputWorksOps2 = findChildWorkOperators(pctx, optimizerCache, op2);
if (!Collections.disjoint(outputWorksOps1, outputWorksOps2)) {
// We cannot merge
return false;
}
// 3) We check whether we will end up with same operators inputing on same work.
//
// Work1 (merge TS in W2 & W3) Work1
// / \ -> | | X
// Work2 Work3 Work2
//
// If we do, we cannot merge. The reason is the same as above, currently
// Tez does not support parallel edges.
//
// In the check, we exclude the inputs to the root operator that we are trying
// to merge (only useful for extended merging as TS do not have inputs).
final Set> excludeOps1 = sr.retainableOps.get(0).getNumParent() > 0 ?
ImmutableSet.copyOf(sr.retainableOps.get(0).getParentOperators()) : ImmutableSet.of();
final Set> inputWorksOps1 =
findParentWorkOperators(pctx, optimizerCache, op1, excludeOps1);
final Set> excludeOps2 = sr.discardableOps.get(0).getNumParent() > 0 ?
Sets.union(ImmutableSet.copyOf(sr.discardableOps.get(0).getParentOperators()), sr.discardableInputOps) :
sr.discardableInputOps;
final Set> inputWorksOps2 =
findParentWorkOperators(pctx, optimizerCache, op2, excludeOps2);
if (!Collections.disjoint(inputWorksOps1, inputWorksOps2)) {
// We cannot merge
return false;
}
// 4) We check whether one of the operators is part of a work that is an input for
// the work of the other operator.
//
// Work1 (merge TS in W1 & W3) Work1
// | -> | X
// Work2 Work2
// | |
// Work3 Work1
//
// If we do, we cannot merge, as we would end up with a cycle in the DAG.
final Set> descendantWorksOps1 =
findDescendantWorkOperators(pctx, optimizerCache, op1, sr.discardableInputOps);
final Set> descendantWorksOps2 =
findDescendantWorkOperators(pctx, optimizerCache, op2, sr.discardableInputOps);
if (!Collections.disjoint(descendantWorksOps1, workOps2)
|| !Collections.disjoint(workOps1, descendantWorksOps2)) {
return false;
}
return true;
}
private static Set> findParentWorkOperators(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Operator> start) {
return findParentWorkOperators(pctx, optimizerCache, start, ImmutableSet.of());
}
private static Set> findParentWorkOperators(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Operator> start,
Set> excludeOps) {
// Find operators in work
Set> workOps = findWorkOperators(optimizerCache, start);
// Gather input works operators
Set> set = new HashSet>();
for (Operator> op : workOps) {
if (op.getParentOperators() != null) {
for (Operator> parent : op.getParentOperators()) {
if (parent instanceof ReduceSinkOperator && !excludeOps.contains(parent)) {
set.addAll(findWorkOperators(optimizerCache, parent));
}
}
} else if (op instanceof TableScanOperator) {
// Check for DPP and semijoin DPP
for (Operator> parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
if (!excludeOps.contains(parent)) {
set.addAll(findWorkOperators(optimizerCache, parent));
}
}
}
}
return set;
}
private static Set> findAscendantWorkOperators(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Operator> start) {
// Find operators in work
Set> workOps = findWorkOperators(optimizerCache, start);
// Gather input works operators
Set> result = new HashSet>();
Set> set;
while (!workOps.isEmpty()) {
set = new HashSet>();
for (Operator> op : workOps) {
if (op.getParentOperators() != null) {
for (Operator> parent : op.getParentOperators()) {
if (parent instanceof ReduceSinkOperator) {
set.addAll(findWorkOperators(optimizerCache, parent));
}
}
} else if (op instanceof TableScanOperator) {
// Check for DPP and semijoin DPP
for (Operator> parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
set.addAll(findWorkOperators(optimizerCache, parent));
}
}
}
workOps = set;
result.addAll(set);
}
return result;
}
private static Set> findChildWorkOperators(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Operator> start) {
// Find operators in work
Set> workOps = findWorkOperators(optimizerCache, start);
// Gather output works operators
Set> set = new HashSet>();
for (Operator> op : workOps) {
if (op instanceof ReduceSinkOperator) {
if (op.getChildOperators() != null) {
// All children of RS are descendants
for (Operator> child : op.getChildOperators()) {
set.addAll(findWorkOperators(optimizerCache, child));
}
}
// Semijoin DPP work is considered a child because work needs
// to finish for it to execute
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null) {
set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
}
} else if(op.getConf() instanceof DynamicPruningEventDesc) {
// DPP work is considered a child because work needs
// to finish for it to execute
set.addAll(findWorkOperators(
optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
}
}
return set;
}
private static Set> findDescendantWorkOperators(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, Operator> start,
Set> excludeOps) {
// Find operators in work
Set> workOps = findWorkOperators(optimizerCache, start);
// Gather output works operators
Set> result = new HashSet>();
Set> set;
while (!workOps.isEmpty()) {
set = new HashSet>();
for (Operator> op : workOps) {
if (excludeOps.contains(op)) {
continue;
}
if (op instanceof ReduceSinkOperator) {
if (op.getChildOperators() != null) {
// All children of RS are descendants
for (Operator> child : op.getChildOperators()) {
set.addAll(findWorkOperators(optimizerCache, child));
}
}
// Semijoin DPP work is considered a descendant because work needs
// to finish for it to execute
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
if (sjbi != null) {
set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
}
} else if(op.getConf() instanceof DynamicPruningEventDesc) {
// DPP work is considered a descendant because work needs
// to finish for it to execute
set.addAll(findWorkOperators(
optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
}
}
workOps = set;
result.addAll(set);
}
return result;
}
// Stores result in cache
private static Set> findWorkOperators(
SharedWorkOptimizerCache optimizerCache, Operator> start) {
Set> c = optimizerCache.operatorToWorkOperators.get(start);
if (!c.isEmpty()) {
return c;
}
c = findWorkOperators(start, new HashSet>());
for (Operator> op : c) {
optimizerCache.operatorToWorkOperators.putAll(op, c);
}
return c;
}
private static Set> findWorkOperators(Operator> start, Set> found) {
found.add(start);
if (start.getParentOperators() != null) {
for (Operator> parent : start.getParentOperators()) {
if (parent instanceof ReduceSinkOperator) {
continue;
}
if (!found.contains(parent)) {
findWorkOperators(parent, found);
}
}
}
if (start instanceof ReduceSinkOperator) {
return found;
}
if (start.getChildOperators() != null) {
for (Operator> child : start.getChildOperators()) {
if (!found.contains(child)) {
findWorkOperators(child, found);
}
}
}
return found;
}
private static void pushFilterToTopOfTableScan(
SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp)
throws UDFArgumentException {
ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
List> allChildren =
Lists.newArrayList(tsOp.getChildOperators());
for (Operator extends OperatorDesc> op : allChildren) {
if (op instanceof FilterOperator) {
FilterOperator filterOp = (FilterOperator) op;
ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
if (tableScanExprNode.isSame(filterExprNode)) {
// We do not need to do anything
return;
}
if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
if (childExprNode.isSame(filterExprNode)) {
// We do not need to do anything, it is in the OR expression
// so probably we pushed previously
return;
}
}
}
ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(
new GenericUDFOPAnd(),
Arrays.asList(tableScanExprNode.clone(), filterExprNode));
filterOp.getConf().setPredicate(newPred);
} else {
Operator newOp = OperatorFactory.get(tsOp.getCompilationOpContext(),
new FilterDesc(tableScanExprNode.clone(), false),
new RowSchema(tsOp.getSchema().getSignature()));
tsOp.replaceChild(op, newOp);
newOp.getParentOperators().add(tsOp);
op.replaceParent(tsOp, newOp);
newOp.getChildOperators().add(op);
// Add to cache (same group as tsOp)
optimizerCache.putIfWorkExists(newOp, tsOp);
}
}
}
private static class SharedResult {
final List> retainableOps;
final List> discardableOps;
final Set> discardableInputOps;
final long dataSize;
final long maxDataSize;
private SharedResult(Collection> retainableOps, Collection> discardableOps,
Set> discardableInputOps, long dataSize, long maxDataSize) {
this.retainableOps = ImmutableList.copyOf(retainableOps);
this.discardableOps = ImmutableList.copyOf(discardableOps);
this.discardableInputOps = ImmutableSet.copyOf(discardableInputOps);
this.dataSize = dataSize;
this.maxDataSize = maxDataSize;
}
@Override
public String toString() {
return "SharedResult { " + this.retainableOps + "; " + this.discardableOps + "; "
+ this.discardableInputOps + "};";
}
}
/** Cache to accelerate optimization */
private static class SharedWorkOptimizerCache {
// Operators that belong to each work
final HashMultimap, Operator>> operatorToWorkOperators =
HashMultimap., Operator>>create();
// Table scan operators to DPP sources
final Multimap> tableScanToDPPSource =
HashMultimap.>create();
// Add new operator to cache work group of existing operator (if group exists)
void putIfWorkExists(Operator> opToAdd, Operator> existingOp) {
List> c = ImmutableList.copyOf(operatorToWorkOperators.get(existingOp));
if (!c.isEmpty()) {
for (Operator> op : c) {
operatorToWorkOperators.get(op).add(opToAdd);
}
operatorToWorkOperators.putAll(opToAdd, c);
operatorToWorkOperators.put(opToAdd, opToAdd);
}
}
// Remove operator
void removeOp(Operator> opToRemove) {
Set> s = operatorToWorkOperators.get(opToRemove);
s.remove(opToRemove);
List> c1 = ImmutableList.copyOf(s);
if (!c1.isEmpty()) {
for (Operator> op1 : c1) {
operatorToWorkOperators.remove(op1, opToRemove); // Remove operator
}
operatorToWorkOperators.removeAll(opToRemove); // Remove entry for operator
}
}
// Remove operator and combine
void removeOpAndCombineWork(Operator> opToRemove, Operator> replacementOp) {
Set> s = operatorToWorkOperators.get(opToRemove);
s.remove(opToRemove);
List> c1 = ImmutableList.copyOf(s);
List> c2 = ImmutableList.copyOf(operatorToWorkOperators.get(replacementOp));
if (!c1.isEmpty() && !c2.isEmpty()) {
for (Operator> op1 : c1) {
operatorToWorkOperators.remove(op1, opToRemove); // Remove operator
operatorToWorkOperators.putAll(op1, c2); // Add ops of new collection
}
operatorToWorkOperators.removeAll(opToRemove); // Remove entry for operator
for (Operator> op2 : c2) {
operatorToWorkOperators.putAll(op2, c1); // Add ops to existing collection
}
}
}
@Override
public String toString() {
return "SharedWorkOptimizerCache { \n" + operatorToWorkOperators.toString() + "\n };";
}
}
}