All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.parse.TezCompiler Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.hadoop.hive.ql.parse;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ListMultimap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.Stack;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.LlapHiveUtils;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TerminalOperator;
import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.TopNKeyOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lib.CompositeProcessor;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.SemanticDispatcher;
import org.apache.hadoop.hive.ql.lib.ForwardWalker;
import org.apache.hadoop.hive.ql.lib.SemanticGraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker;
import org.apache.hadoop.hive.ql.lib.SemanticRule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.optimizer.BucketVersionPopulator;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin;
import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization;
import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc;
import org.apache.hadoop.hive.ql.optimizer.NonBlockingOpDeDupProc;
import org.apache.hadoop.hive.ql.optimizer.ParallelEdgeFixer;
import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize;
import org.apache.hadoop.hive.ql.optimizer.SemiJoinReductionMerge;
import org.apache.hadoop.hive.ql.optimizer.SetHashGroupByMinReduction;
import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer;
import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler;
import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile;
import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider;
import org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass;
import org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider;
import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MergeJoinWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.ql.exec.FunctionRegistry.BLOOM_FILTER_FUNCTION;

/**
 * TezCompiler translates the operator plan into TezTasks.
 */
public class TezCompiler extends TaskCompiler {

  protected static final Logger LOG = LoggerFactory.getLogger(TezCompiler.class);

  public TezCompiler() {
  }

  @Override
  public void init(QueryState queryState, LogHelper console, Hive db) {
    super.init(queryState, console, db);

    // Tez requires us to use RPC for the query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);

    // We require the use of recursive input dirs for union processing
    conf.setBoolean("mapred.input.dir.recursive", true);
  }

  @Override
  protected void optimizeOperatorPlan(ParseContext pCtx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    // Create the context for the walker
    OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx);

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    runTopNKeyOptimization(procCtx);
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run top n key optimization");

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    // setup dynamic partition pruning where possible
    runDynamicPartitionPruning(procCtx);
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup dynamic partition pruning");

    if(procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_MULTICOLUMN)) {
      SemiJoinReductionMerge sjmerge = new SemiJoinReductionMerge();
      sjmerge.beginPerfLogging();
      sjmerge.transform(procCtx.parseContext);
      sjmerge.endPerfLogging("Merge single column semi-join reducers to composite");
    }

    // need to run this; to get consistent filterop conditions(for operator tree matching)
    if (procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
      new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext);
    }

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    // setup stats in the operator plan
    runStatsAnnotation(procCtx);
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Setup stats in the operator plan");

    // run Sorted dynamic partition optimization
    if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONING) &&
        HiveConf.getVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") &&
        !HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      new SortedDynPartitionOptimizer().transform(procCtx.parseContext);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization");
    }

    if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      // Dynamic sort partition adds an extra RS therefore need to de-dup
      new ReduceSinkDeDuplication().transform(procCtx.parseContext);
      // there is an issue with dedup logic wherein SELECT is created with wrong columns
      // NonBlockingOpDeDupProc fixes that
      // (kind of hackish, the issue in de-dup should be fixed but it needs more investigation)
      new NonBlockingOpDeDupProc().transform(procCtx.parseContext);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Reduce Sink de-duplication");
    }

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    // run the optimizations that use stats for optimization
    runStatsDependentOptimizations(procCtx);
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");

    // repopulate bucket versions; join conversion may have created some new reducesinks
    new BucketVersionPopulator().transform(pCtx);

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    if(procCtx.conf.getBoolVar(ConfVars.HIVEOPTJOINREDUCEDEDUPLICATION)) {
      new ReduceSinkJoinDeDuplication().transform(procCtx.parseContext);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection");

    semijoinRemovalBasedTransformations(procCtx);

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    if (procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) {
      new SharedWorkOptimizer().transform(procCtx.parseContext);
      new ParallelEdgeFixer().transform(procCtx.parseContext);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization");

    // need a new run of the constant folding because we might have created lots
    // of "and true and true" conditions.
    // Rather than run the full constant folding just need to shortcut AND/OR expressions
    // involving constant true/false values.
    if(procCtx.conf.getBoolVar(ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
      new ConstantPropagate(ConstantPropagateOption.SHORTCUT).transform(procCtx.parseContext);
    }

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    AuxOpTreeSignature.linkAuxSignatures(procCtx.parseContext);
    markOperatorsWithUnstableRuntimeStats(procCtx);
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats");

    if (procCtx.conf.getBoolVar(ConfVars.HIVE_IN_TEST)) {
      bucketingVersionSanityCheck(procCtx);
    }
  }

  private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx) throws SemanticException {
    // Semijoins may have created task level cycles, examine those
    connectTerminalOps(procCtx.parseContext);
    boolean cycleFree = false;
    while (!cycleFree) {
      cycleFree = true;
      Set>> components = getComponents(procCtx);
      for (Set> component : components) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Component: ");
          for (Operator co : component) {
            LOG.debug("Operator: " + co.getName() + ", " + co.getIdentifier());
          }
        }
        if (component.size() != 1) {
          LOG.info("Found cycle in operator plan...");
          cycleFree = false;
          removeCycleOperator(component, procCtx);
          break;
        }
      }
      LOG.info("Cycle free: " + cycleFree);
    }
  }

  private void removeCycleOperator(Set> component, OptimizeTezProcContext context) throws SemanticException {
    AppMasterEventOperator victimAM = null;
    TableScanOperator victimTS = null;
    ReduceSinkOperator victimRS = null;

    // If there is a hint and no operator is removed then throw error
    boolean hasHint = false;
    boolean removed = false;
    for (Operator o : component) {
      // Look for AppMasterEventOperator or ReduceSinkOperator
      if (o instanceof AppMasterEventOperator) {
        if (victimAM == null
                || o.getStatistics().getDataSize() < victimAM.getStatistics()
                .getDataSize()) {
          victimAM = (AppMasterEventOperator) o;
          removed = true;
        }
      } else if (o instanceof ReduceSinkOperator) {

        SemiJoinBranchInfo sjInfo =
                context.parseContext.getRsToSemiJoinBranchInfo().get(o);
        if (sjInfo == null ) {
          continue;
        }
        if (sjInfo.getIsHint()) {
          // Skipping because of hint. Mark this info,
          hasHint = true;
          continue;
        }

        TableScanOperator ts = sjInfo.getTsOp();
        // Sanity check
        assert component.contains(ts);

        if (victimRS == null ||
                ts.getStatistics().getDataSize() <
                        victimTS.getStatistics().getDataSize()) {
          victimRS = (ReduceSinkOperator) o;
          victimTS = ts;
          removed = true;
        }
      }
    }

    // Always set the semijoin optimization as victim.
    Operator victim = victimRS;

    if (victimRS == null && victimAM != null ) {
        victim = victimAM;
    } else if (victimAM == null) {
      // do nothing
    } else {
      // Cycle consists of atleast one dynamic partition pruning(DPP)
      // optimization and atleast one min/max optimization.
      // DPP is a better optimization unless it ends up scanning the
      // bigger table for keys instead of the smaller table.

      // Get the parent TS of victimRS.
      Operator op = victimRS;
      while(!(op instanceof TableScanOperator)) {
        op = op.getParentOperators().get(0);
      }
      if ((2 * op.getStatistics().getDataSize()) <
              victimAM.getStatistics().getDataSize()) {
        victim = victimAM;
      }
    }

    if (hasHint && !removed) {
      // There is hint but none of the operators removed. Throw error
      throw new SemanticException("The user hint is causing an operator cycle. Please fix it and retry");
    }

    if (victim == null ||
            (!context.pruningOpsRemovedByPriorOpt.isEmpty() &&
                    context.pruningOpsRemovedByPriorOpt.contains(victim))) {
      return;
    }

    GenTezUtils.removeBranch(victim);

    if (victim == victimRS) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Cycle found. Removing semijoin "
            + OperatorUtils.getOpNamePretty(victimRS) + " - " + OperatorUtils.getOpNamePretty(victimTS));
      }
      GenTezUtils.removeSemiJoinOperator(context.parseContext, victimRS, victimTS);
    } else {
      // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
      LOG.info("Disabling dynamic pruning for: "
          + ((DynamicPruningEventDesc) victim.getConf()).getTableScan().toString()
          + ". Needed to break cyclic dependency");
    }
  }

  // Tarjan's algo
  private Set>> getComponents(OptimizeTezProcContext procCtx) {
    Deque> deque = new LinkedList>();
    deque.addAll(procCtx.parseContext.getTopOps().values());

    AtomicInteger index = new AtomicInteger();
    Map, Integer> indexes = new HashMap, Integer>();
    Map, Integer> lowLinks = new HashMap, Integer>();
    Stack> nodes = new Stack>();
    Set>> components = new LinkedHashSet>>();

    for (Operator o : deque) {
      if (!indexes.containsKey(o)) {
        connect(o, index, nodes, indexes, lowLinks, components, procCtx.parseContext);
      }
    }

    return components;
  }

  private void connect(Operator o, AtomicInteger index, Stack> nodes,
      Map, Integer> indexes, Map, Integer> lowLinks,
      Set>> components, ParseContext parseContext) {

    indexes.put(o, index.get());
    lowLinks.put(o, index.get());
    index.incrementAndGet();
    nodes.push(o);

    List> children;
    if (o instanceof AppMasterEventOperator) {
      children = new ArrayList<>((o.getChildOperators()));
      TableScanOperator ts = ((DynamicPruningEventDesc) o.getConf()).getTableScan();
      LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString());
      children.add(ts);
    } else if (o instanceof TerminalOperator) {
      children = new ArrayList<>((o.getChildOperators()));
      for (ReduceSinkOperator rs : parseContext.getTerminalOpToRSMap().get((TerminalOperator)o)) {
        // add an edge
        LOG.debug("Adding special edge: From terminal op to semijoin edge "  + o.getName() + " --> " + rs.toString());
        children.add(rs);
      }
      if (o instanceof ReduceSinkOperator) {
        // semijoin case
        SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(o);
        if (sjInfo != null) {
          TableScanOperator ts = sjInfo.getTsOp();
          LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString());
          children.add(ts);
        }
      }
    } else {
      children = o.getChildOperators();
    }

    for (Operator child : children) {
      if (!indexes.containsKey(child)) {
        connect(child, index, nodes, indexes, lowLinks, components, parseContext);
        lowLinks.put(o, Math.min(lowLinks.get(o), lowLinks.get(child)));
      } else if (nodes.contains(child)) {
        lowLinks.put(o, Math.min(lowLinks.get(o), indexes.get(child)));
      }
    }

    if (lowLinks.get(o).equals(indexes.get(o))) {
      Set> component = new LinkedHashSet>();
      components.add(component);
      Operator current;
      do {
        current = nodes.pop();
        component.add(current);
      } while (current != o);
    }
  }

  private void runStatsAnnotation(OptimizeTezProcContext procCtx) throws SemanticException {
    new AnnotateWithStatistics().transform(procCtx.parseContext);
    new AnnotateWithOpTraits().transform(procCtx.parseContext);
  }

  private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx) throws SemanticException {

    // Sequence of TableScan operators to be walked
    Deque> deque = new LinkedList>();
    deque.addAll(procCtx.parseContext.getTopOps().values());

    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map opRules = new LinkedHashMap();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink",
        ReduceSinkOperator.getOperatorName() + "%"),
        new SetReducerParallelism());
    opRules.put(new RuleRegExp("Convert Join to Map-join",
        JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
    if (procCtx.conf.getBoolVar(ConfVars.HIVEMAPAGGRHASHMINREDUCTIONSTATSADJUST)) {
      opRules.put(new RuleRegExp("Set min reduction - GBy (Hash)",
          GroupByOperator.getOperatorName() + "%"),
          new SetHashGroupByMinReduction());
    }

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();

    final boolean dynamicPartitionPruningEnabled =
        procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING);
    final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled &&
        procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) &&
            procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0;
    final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled &&
        procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);

    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    if (dynamicPartitionPruningEnabled) {
      runRemoveDynamicPruningOptimization(procCtx);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size");

    if (semiJoinReductionEnabled) {
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      markSemiJoinForDPP(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");

      // Remove any semi join edges from Union Op
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      removeSemiJoinEdgesForUnion(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER,
                            "Remove any semi join edge between Union and RS");

      // Remove any parallel edge between semijoin and mapjoin.
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      removeSemijoinsParallelToMapJoin(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin");

      // Remove semijoin optimization if SMB join is created.
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      removeSemijoinOptimizationFromSMBJoins(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed");

      // Remove bloomfilter if no stats generated
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      removeSemiJoinIfNoStats(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");

      // Removing semijoin optimization when it may not be beneficial
      perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
      removeSemijoinOptimizationByBenefit(procCtx);
      perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits");
    }

    // after the stats phase we might have some cyclic dependencies that we need
    // to take care of.
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    if (dynamicPartitionPruningEnabled) {
      runCycleAnalysisForPartitionPruning(procCtx);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");

    // remove redundant dpp and semijoins
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    if (extendedReductionEnabled) {
      removeRedundantSemijoinAndDpp(procCtx);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction");
  }

  private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx) throws SemanticException {

    // Sequence of TableScan operators to be walked
    Deque> deque = new LinkedList>();
    deque.addAll(procCtx.parseContext.getTopOps().values());

    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map opRules = new LinkedHashMap();
    opRules.put(
        new RuleRegExp("Remove dynamic pruning by size",
        AppMasterEventOperator.getOperatorName() + "%"),
        new RemoveDynamicPruningBySize());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx) throws SemanticException {

    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
      return;
    }

    // Sequence of TableScan operators to be walked
    Deque> deque = new LinkedList>();
    deque.addAll(procCtx.parseContext.getTopOps().values());

    Map opRules = new LinkedHashMap();
    opRules.put(
        new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName()
            + "%"), new DynamicPartitionPruningOptimization());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  @Override
  protected void generateTaskTree(List> rootTasks, ParseContext pCtx,
      List> mvTask, Set inputs, Set outputs)
      throws SemanticException {

	PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenTezUtils utils = new GenTezUtils();
    GenTezWork genTezWork = new GenTezWork(utils);

    GenTezProcContext procCtx = new GenTezProcContext(
        conf, tempParseContext, mvTask, rootTasks, inputs, outputs);

    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map opRules = new LinkedHashMap();
    opRules.put(new RuleRegExp("Split Work - ReduceSink",
        ReduceSinkOperator.getOperatorName() + "%"),
        genTezWork);

    opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin",
        MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());

    opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and"
        + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName()
        + "%"), new MergeJoinProc());

    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink",
        FileSinkOperator.getOperatorName() + "%"),
        new CompositeProcessor(new FileSinkProcessor(), genTezWork));

    opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName()
        + "%"), genTezWork);

    opRules.put(new RuleRegExp("Handle Potential Analyze Command",
        TableScanOperator.getOperatorName() + "%"),
        new ProcessAnalyzeTable(utils));

    opRules.put(new RuleRegExp("Remember union",
        UnionOperator.getOperatorName() + "%"),
        new UnionProcessor());

    opRules.put(new RuleRegExp("AppMasterEventOperator",
        AppMasterEventOperator.getOperatorName() + "%"),
        new AppMasterEventProcessor());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(pCtx.getTopOps().values());
    SemanticGraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);

    // we need to specify the reserved memory for each work that contains Map Join
    for (List baseWorkList : procCtx.mapJoinWorkMap.values()) {
      for (BaseWork w : baseWorkList) {
        // work should be the smallest unit for memory allocation
        w.setReservedMemoryMB(
            (int)(conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
      }
    }

    // we need to clone some operator plans and remove union operators still
    int indexForTezUnion = 0;
    for (BaseWork w: procCtx.workWithUnionOperators) {
      GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
    }

    // then we make sure the file sink operators are set up right
    for (FileSinkOperator fileSink: procCtx.fileSinkSet) {
      GenTezUtils.processFileSink(procCtx, fileSink);
    }

    // Connect any edges required for min/max pushdown
    if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
      for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
        // Process min/max
        GenTezUtils.processDynamicSemiJoinPushDownOperator(
                procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
      }
    }
    // and finally we hook up any events that need to be sent to the tez AM
    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
      LOG.debug("Handling AppMasterEventOperator: " + event);
      GenTezUtils.processAppMasterEvent(procCtx, event);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
  }

  void setInputFormatForMapWork(BaseWork work) {
    if (work instanceof MapWork) {
      MapWork mapWork = (MapWork) work;
      Map> opMap = mapWork.getAliasToWork();
      if (!opMap.isEmpty()) {
        for (Operator op : opMap.values()) {
          setInputFormat(mapWork, op);
        }
      }
    }
  }

  @Override
  protected void setInputFormat(Task task) {
    if (task instanceof TezTask) {
      TezWork work = ((TezTask)task).getWork();
      List all = work.getAllWork();
      for (BaseWork w: all) {
        if (w instanceof MergeJoinWork) {
          MergeJoinWork mj = (MergeJoinWork)w;
          setInputFormatForMapWork(mj.getMainWork());
          for (BaseWork bw : mj.getBaseWorkList()) {
            setInputFormatForMapWork(bw);
          }
        } else {
          setInputFormatForMapWork(w);
        }
      }
    } else if (task instanceof ConditionalTask) {
      List> listTasks
        = ((ConditionalTask) task).getListTasks();
      for (Task tsk : listTasks) {
        setInputFormat(tsk);
      }
    }

    if (task.getChildTasks() != null) {
      for (Task childTask : task.getChildTasks()) {
        setInputFormat(childTask);
      }
    }
  }

  private void setInputFormat(MapWork work, Operator op) {
    if (op == null) {
      return;
    }
    if (op.isUseBucketizedHiveInputFormat()) {
      work.setUseBucketizedHiveInputFormat(true);
      return;
    }

    if (op.getChildOperators() != null) {
      for (Operator childOp : op.getChildOperators()) {
        setInputFormat(work, childOp);
      }
    }
  }

  @Override
  protected void decideExecMode(List> rootTasks, Context ctx,
      GlobalLimitCtx globalLimitCtx)
      throws SemanticException {
    // currently all Tez work is on the cluster
    return;
  }

  @Override
  protected void optimizeTaskPlan(List> rootTasks, ParseContext pCtx,
      Context ctx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
       pCtx.getFetchTask());

    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
      physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping null scan query optimization");
    }

    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
      physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping metadata only query optimization");
    }

    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
      physicalCtx = new CrossProductHandler().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping cross product analysis");
    }

    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
      physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping llap pre-vectorization pass");
    }

    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
      physicalCtx = new Vectorizer().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping vectorization");
    }

    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
      physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
    } else {
      LOG.debug("Skipping stage id rearranger");
    }

    if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER))
        && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
      physicalCtx = new MemoryDecider().resolve(physicalCtx);
    }

    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
      LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
      physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
    } else {
      LOG.debug("Skipping llap decider");
    }

    //  This optimizer will serialize all filters that made it to the
    //  table scan operator to avoid having to do it multiple times on
    //  the backend. If you have a physical optimization that changes
    //  table scans or filters, you have to invoke it before this one.
    physicalCtx = new SerializeFilter().resolve(physicalCtx);

    if (physicalCtx.getContext().getExplainAnalyze() != null) {
      new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }

    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
    return;
  }

  private static class SMBJoinOpProcContext implements NodeProcessorCtx {
    HashMap JoinOpToTsOpMap = new HashMap();
  }

  private static class SMBJoinOpProc implements SemanticNodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
                          Object... nodeOutputs) throws SemanticException {
      SMBJoinOpProcContext ctx = (SMBJoinOpProcContext) procCtx;
      ctx.JoinOpToTsOpMap.put((CommonMergeJoinOperator) nd,
              (TableScanOperator) stack.get(0));
      return null;
    }
  }

  private static void removeSemijoinOptimizationFromSMBJoins(
          OptimizeTezProcContext procCtx) throws SemanticException {
    Map opRules = new LinkedHashMap();
    opRules.put(
            new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" +
                    ".*" + TezDummyStoreOperator.getOperatorName() + "%" +
                    CommonMergeJoinOperator.getOperatorName() + "%"),
            new SMBJoinOpProc());

    SMBJoinOpProcContext ctx = new SMBJoinOpProcContext();
    // The dispatcher finds SMB and if there is semijoin optimization before it, removes it.
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);

    List tsOps = new ArrayList<>();
    // Iterate over the map and remove semijoin optimizations if needed.
    for (CommonMergeJoinOperator joinOp : ctx.JoinOpToTsOpMap.keySet()) {
      // Get one top level TS Op directly from the stack
      tsOps.add(ctx.JoinOpToTsOpMap.get(joinOp));

      // Get the other one by examining Join Op
      List> parents = joinOp.getParentOperators();
      for (Operator parent : parents) {
        if (parent instanceof TezDummyStoreOperator) {
          // already accounted for
          continue;
        }

        while (parent != null) {
          if (parent instanceof TableScanOperator) {
            tsOps.add((TableScanOperator) parent);
            break;
          }
          parent = parent.getParentOperators().get(0);
        }
      }
    }
    // Now the relevant TableScanOperators are known, find if there exists
    // a semijoin filter on any of them, if so, remove it.

    ParseContext pctx = procCtx.parseContext;
    Set rsSet = new HashSet<>(pctx.getRsToSemiJoinBranchInfo().keySet());
    for (TableScanOperator ts : tsOps) {
      for (ReduceSinkOperator rs : rsSet) {
        SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs);
        if (sjInfo != null && ts == sjInfo.getTsOp()) {
          // match!
          if (sjInfo.getIsHint()) {
            throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts);
          }
          if (LOG.isDebugEnabled()) {
            LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin "
                    + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
          }
          GenTezUtils.removeBranch(rs);
          GenTezUtils.removeSemiJoinOperator(pctx, rs, ts);
        }
      }
    }
  }

  private static class TerminalOpsInfo {
    public Set> terminalOps;

    TerminalOpsInfo(Set> terminalOps) {
      this.terminalOps = terminalOps;
    }
  }

  private void connectTerminalOps(ParseContext pCtx) {
    // The map which contains the virtual edges from non-semijoin terminal ops to semjoin RSs.
    Multimap, ReduceSinkOperator> terminalOpToRSMap = ArrayListMultimap.create();

    // Map of semijoin RS to work ops to ensure no work is examined more than once.
    Map rsToTerminalOpsInfo = new HashMap<>();

    // Get all the terminal ops
    for (ReduceSinkOperator rs : pCtx.getRsToSemiJoinBranchInfo().keySet()) {
      TerminalOpsInfo terminalOpsInfo = rsToTerminalOpsInfo.get(rs);
      if (terminalOpsInfo != null) {
        continue; // done with this one
      }

      Set workRSOps = new HashSet<>();
      Set> workTerminalOps = new HashSet<>();
      // Get the SEL Op in the semijoin-branch, SEL->GBY1->RS1->GBY2->RS2
      SelectOperator selOp = OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0);
      OperatorUtils.findWorkOperatorsAndSemiJoinEdges(selOp,
              pCtx.getRsToSemiJoinBranchInfo(), workRSOps, workTerminalOps);

      TerminalOpsInfo candidate = new TerminalOpsInfo(workTerminalOps);

      // A work may contain multiple semijoin edges, traverse rsOps and add for each
      for (ReduceSinkOperator rsFound : workRSOps) {
        rsToTerminalOpsInfo.put(rsFound, candidate);
        for (TerminalOperator terminalOp : candidate.terminalOps) {
          terminalOpToRSMap.put(terminalOp, rsFound);
        }
      }
    }

    pCtx.setTerminalOpToRSMap(terminalOpToRSMap);
  }

  private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
          throws SemanticException {
    Map opRules = new LinkedHashMap();
    opRules.put(
            new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
                    ReduceSinkOperator.getOperatorName() + "%" +
                    GroupByOperator.getOperatorName() + "%" +
                    ReduceSinkOperator.getOperatorName() + "%"),
            new SemiJoinRemovalProc(true, false));
    SemiJoinRemovalContext ctx =
        new SemiJoinRemovalContext(procCtx.parseContext);
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  private static class CollectAll implements SemanticNodeProcessor {
    private PlanMapper planMapper;

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
      planMapper = pCtx.getContext().getPlanMapper();
      FilterOperator fop = (FilterOperator) nd;
      OpTreeSignature sig = planMapper.getSignatureOf(fop);
      List ar = getGroups(planMapper, HiveFilter.class);


      return nd;
    }

    private List getGroups(PlanMapper planMapper2, Class class1) {
      Iterator it = planMapper.iterateGroups();
      List ret = new ArrayList();
      while (it.hasNext()) {
        EquivGroup g = it.next();
        if (g.getAll(class1).size() > 0) {
          ret.add(g);
        }
      }
      return ret;
    }
  }

  private static class MarkRuntimeStatsAsIncorrect implements SemanticNodeProcessor {

    private PlanMapper planMapper;

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
      planMapper = pCtx.getContext().getPlanMapper();
      if (nd instanceof ReduceSinkOperator) {
        ReduceSinkOperator rs = (ReduceSinkOperator) nd;
        SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
        if (sjInfo == null) {
          return null;
        }
        walkSubtree(sjInfo.getTsOp());
      }
      if (nd instanceof AppMasterEventOperator) {
        AppMasterEventOperator ame = (AppMasterEventOperator) nd;
        AppMasterEventDesc c = ame.getConf();
        if (c instanceof DynamicPruningEventDesc) {
          DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c;
          mark(dped.getTableScan());
        }
      }
      if (nd instanceof TableScanOperator) {
        // If the tablescan operator is making use of filtering capabilities of readers then
        // we will not see the actual incoming rowcount which was processed - so we may not use it for relNodes
        TableScanOperator ts = (TableScanOperator) nd;
        if (ts.getConf().getPredicateString() != null) {
          planMapper.link(ts, new OperatorStats.MayNotUseForRelNodes());
        }
      }
      return null;
    }

    private void walkSubtree(Operator root) {
      Deque> deque = new LinkedList<>();
      deque.add(root);
      while (!deque.isEmpty()) {
        Operator op = deque.pollLast();
        mark(op);
        if (op instanceof ReduceSinkOperator) {
          // Done with this branch
        } else {
          deque.addAll(op.getChildOperators());
        }
      }
    }

    private void mark(Operator op) {
      planMapper.link(op, new OperatorStats.IncorrectRuntimeStatsMarker());
    }

  }

  private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCtx) throws SemanticException {
    Map opRules = new LinkedHashMap();
    opRules.put(
        new RuleRegExp("R1",
            ReduceSinkOperator.getOperatorName() + "%"),
        new MarkRuntimeStatsAsIncorrect());
    opRules.put(
        new RuleRegExp("R2",
            AppMasterEventOperator.getOperatorName() + "%"),
        new MarkRuntimeStatsAsIncorrect());
    opRules.put(
        new RuleRegExp("R3",
            TableScanOperator.getOperatorName() + "%"),
        new MarkRuntimeStatsAsIncorrect());
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  private class SemiJoinRemovalProc implements SemanticNodeProcessor {

    private final boolean removeBasedOnStats;
    private final boolean removeRedundant;

    private SemiJoinRemovalProc (boolean removeBasedOnStats, boolean removeRedundant) {
      this.removeBasedOnStats = removeBasedOnStats;
      this.removeRedundant = removeRedundant;
    }

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
                          Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator rs = (ReduceSinkOperator) nd;
      SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;
      ParseContext pCtx = rCtx.parseContext;
      SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
      if (sjInfo == null) {
        // nothing to do here.
        return null;
      }
      TableScanOperator targetTSOp = sjInfo.getTsOp();

      // This is a semijoin branch. The stack should look like,
      // -SEL-GB1-RS1-GB2-RS2
      GroupByOperator gbOp = (GroupByOperator) stack.get(stack.size() - 2);
      GroupByDesc gbDesc = gbOp.getConf();
      List aggregationDescs = gbDesc.getAggregators();
      for (AggregationDesc agg : aggregationDescs) {
        if (!isBloomFilterAgg(agg)) {
          continue;
        }

        GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
            (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
        if (udafBloomFilterEvaluator.hasHintEntries()) {
          return null; // Created using hint, skip it
        }

        if (removeBasedOnStats) {
          long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
          if (expectedEntries == -1 || expectedEntries >
              pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
            if (sjInfo.getIsHint() && expectedEntries == -1) {
              throw new SemanticException("Removing hinted semijoin due to lack to stats" +
                  " or exceeding max bloom filter entries");
            } else if(sjInfo.getIsHint()) {
              // do not remove if hint is provided
              continue;
            }
            // Remove the semijoin optimization branch along with ALL the mappings
            // The parent GB2 has all the branches. Collect them and remove them.
            for (Node node : gbOp.getChildren()) {
              ReduceSinkOperator rsFinal = (ReduceSinkOperator) node;
              TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo().
                  get(rsFinal).getTsOp();
              if (LOG.isDebugEnabled()) {
                LOG.debug("expectedEntries=" + expectedEntries + ". "
                    + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. "
                    + "Removing semijoin "
                    + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
              }
              GenTezUtils.removeBranch(rsFinal);
              GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts);
            }
            return null;
          }
        }
      }

      if (removeBasedOnStats) {
        // At this point, hinted semijoin case has been handled already
        // Check if big table is big enough that runtime filtering is
        // worth it.
        TableScanOperator ts = sjInfo.getTsOp();
        if (ts.getStatistics() != null) {
          long numRows = ts.getStatistics().getNumRows();
          if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
            if (sjInfo.getShouldRemove()) {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin "
                    + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
              }
              GenTezUtils.removeBranch(rs);
              GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
            }
          }
        }
      }

      if (removeRedundant) {
        // Look for RS ops above the current semijoin branch
        Set rsOps = OperatorUtils.findOperators(
            ((Operator) stack.get(stack.size() - 5)).getParentOperators().get(0),
            ReduceSinkOperator.class);
        for (Operator otherRSOp : rsOps) {
          SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp);
          // First conjunct prevents SJ RS from removing itself
          if (otherRSOp != rs && otherSjInfo != null && otherSjInfo.getTsOp() == targetTSOp) {
            if (rCtx.opsToRemove.containsKey(otherRSOp)) {
              // We found siblings, since we are removing the other operator, no need to remove this one
              continue;
            }
            List thisTargetColumns = pCtx.getRsToRuntimeValuesInfoMap().get(rs).getTargetColumns();
            List otherTargetColumns =
                pCtx.getRsToRuntimeValuesInfoMap().get(otherRSOp).getTargetColumns();
            if (!ExprNodeDescUtils.isSame(thisTargetColumns, otherTargetColumns)) {
              // Filter should be on the same columns, otherwise we do not proceed
              continue;
            }
            rCtx.opsToRemove.put(rs, targetTSOp);
            break;
          }
        }
      }

      return null;
    }
  }

  private static boolean isBloomFilterAgg(AggregationDesc agg) {
    return BLOOM_FILTER_FUNCTION.equals(agg.getGenericUDAFName());
  }

  private static class DynamicPruningRemovalRedundantProc implements SemanticNodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
                          Object... nodeOutputs) throws SemanticException {
      AppMasterEventOperator event = (AppMasterEventOperator) nd;
      if (!(event.getConf() instanceof DynamicPruningEventDesc)) {
        return null;
      }

      SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;

      DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf();
      TableScanOperator targetTSOp = desc.getTableScan();
      String targetColumnName = desc.getTargetColumnName();

      // Look for event ops above the current event op branch
      Operator op = event.getParentOperators().get(0);
      while (op.getChildOperators().size() < 2) {
        op = op.getParentOperators().get(0);
      }
      Set eventOps = OperatorUtils.findOperators(
          op, AppMasterEventOperator.class);
      for (AppMasterEventOperator otherEvent : eventOps) {
        if (!(otherEvent.getConf() instanceof DynamicPruningEventDesc)) {
          continue;
        }
        DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) otherEvent.getConf();
        if (otherEvent != event && otherDesc.getTableScan() == targetTSOp &&
            otherDesc.getTargetColumnName().equals(targetColumnName)) {
          if (rCtx.opsToRemove.containsKey(otherEvent)) {
            // We found siblings, since we are removing the other operator, no need to remove this one
            continue;
          }
          rCtx.opsToRemove.put(event, targetTSOp);
          break;
        }
      }

      return null;
    }
  }

  private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx)
      throws SemanticException {
    Map opRules = new LinkedHashMap<>();
    opRules.put(
        new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
            ReduceSinkOperator.getOperatorName() + "%" +
            GroupByOperator.getOperatorName() + "%" +
            ReduceSinkOperator.getOperatorName() + "%"),
        new SemiJoinRemovalProc(false, true));
    opRules.put(
        new RuleRegExp("R2",
            AppMasterEventOperator.getOperatorName() + "%"),
        new DynamicPruningRemovalRedundantProc());

    // Gather
    SemiJoinRemovalContext ctx =
        new SemiJoinRemovalContext(procCtx.parseContext);
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);

    // Remove
    for (Map.Entry, TableScanOperator> p : ctx.opsToRemove.entrySet()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue()));
      }
      GenTezUtils.removeBranch(p.getKey());
      if (p.getKey() instanceof AppMasterEventOperator) {
        GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue());
      } else if (p.getKey() instanceof ReduceSinkOperator) {
        GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue());
      } else {
        throw new SemanticException("Unexpected error - type for branch could not be recognized");
      }
    }
  }

  private class SemiJoinRemovalContext implements NodeProcessorCtx {
    private final ParseContext parseContext;
    private final Map, TableScanOperator> opsToRemove;

    private SemiJoinRemovalContext(final ParseContext parseContext) {
      this.parseContext = parseContext;
      this.opsToRemove = new HashMap<>();
    }
  }

  private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx)
      throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) {
      return;
    }

    Map opRules = new LinkedHashMap();
    opRules.put(
        new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"),
        new TopNKeyProcessor(
          HiveConf.getIntVar(procCtx.conf, HiveConf.ConfVars.HIVE_MAX_TOPN_ALLOWED),
          HiveConf.getFloatVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_THRESHOLD),
          HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_CHECK_BATCHES),
          HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS)));
    opRules.put(
            new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"),
            new TopNKeyPushdownProcessor());


    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List topNodes = new ArrayList();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ogw.startWalking(topNodes, null);
  }

  private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS,
                                             ParseContext parseContext,
                                             Map semijoins,
                                             Map> probeDecodeMJoins) {

    boolean parallelEdges = false;
    for (Operator op : mapjoin.getParentOperators()) {
      if (!(op instanceof ReduceSinkOperator)) {
        continue;
      }

      op = op.getParentOperators().get(0);

      // Follow the Reducesink operator upstream which is on small table side.
      while (!(op instanceof ReduceSinkOperator) &&
              !(op instanceof TableScanOperator) &&
              !(op.getChildren() != null && op.getChildren().size() > 1)) {
        if (op instanceof MapJoinOperator) {
          // Pick the correct parent, only one of the parents is not
          // ReduceSink, that is what we are looking for.
          for (Operator parentOp : op.getParentOperators()) {
            if (parentOp instanceof ReduceSinkOperator) {
              continue;
            }
            op = parentOp; // parent in current pipeline
            continue;
          }
        }
        op = op.getParentOperators().get(0);
      }

      // Bail out if RS or TS is encountered.
      if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) {
        continue;
      }

      // A branch is hit.
      for (Node nd : op.getChildren()) {
        if (nd instanceof SelectOperator) {
          Operator child = (Operator) nd;

          while (child.getChildOperators().size() > 0) {
            child = child.getChildOperators().get(0);
          }

          // If not ReduceSink Op, skip
          if (!(child instanceof ReduceSinkOperator)) {
            // This still could be DPP.
            if (child instanceof AppMasterEventOperator &&
                    ((AppMasterEventOperator) child).getConf() instanceof DynamicPruningEventDesc) {
              // DPP indeed, Set parallel edges true
              parallelEdges = true;
            }
            continue;
          }

          ReduceSinkOperator rs = (ReduceSinkOperator) child;
          SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(rs);
          if (sjInfo == null) {
            continue;
          }

          TableScanOperator ts = sjInfo.getTsOp();
          if (ts != bigTableTS) {
            // skip, not the one we are looking for.
            continue;
          }

          parallelEdges = true;

          // Keep track of Mj to probeDecode TS
          if (!probeDecodeMJoins.containsKey(ts)){
            probeDecodeMJoins.put(ts, new ArrayList<>());
          }
          probeDecodeMJoins.get(ts).add((MapJoinOperator) mapjoin);

          // Skip adding to SJ removal map when created by hint
          if (!sjInfo.getIsHint() && sjInfo.getShouldRemove()) {
            semijoins.put(rs, ts);
          }
        }
      }
    }
    return parallelEdges;
  }

  /*
   * Given an operator this method removes all semi join edges downstream (children) until it hits RS
   */
  private void removeSemiJoinEdges(Operator op, OptimizeTezProcContext procCtx,
                                   Map sjToRemove) throws SemanticException {
    if(op instanceof ReduceSinkOperator && op.getNumChild() == 0) {
      Map sjMap = procCtx.parseContext.getRsToSemiJoinBranchInfo();
      if(sjMap.get(op) != null) {
        sjToRemove.put((ReduceSinkOperator)op, sjMap.get(op).getTsOp());
      }
    }

    for(Operator child:op.getChildOperators()) {
      removeSemiJoinEdges(child, procCtx, sjToRemove);
    }
  }

  private void removeSemiJoinEdgesForUnion(OptimizeTezProcContext procCtx) throws SemanticException{
    // Get all the TS ops.
    List> topOps = new ArrayList<>();
    topOps.addAll(procCtx.parseContext.getTopOps().values());
    Set> unionOps = new HashSet<>();

    Map sjToRemove = new HashMap<>();
    for (Operator parent : topOps) {
      Deque> deque = new LinkedList<>();
      deque.add(parent);
      while (!deque.isEmpty()) {
        Operator op = deque.pollLast();
        if (op instanceof UnionOperator && !unionOps.contains(op)) {
          unionOps.add(op);
          removeSemiJoinEdges(op, procCtx, sjToRemove);
        }
        deque.addAll(op.getChildOperators());
      }
    }
    // remove sj
    if (sjToRemove.size() > 0) {
      for (Map.Entry entry : sjToRemove.entrySet()) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Semijoin optimization with Union operator. Removing semijoin "
                        + OperatorUtils.getOpNamePretty(entry.getKey()) + " - "
                        + OperatorUtils.getOpNamePretty(sjToRemove.get(entry.getKey())));
        }
        GenTezUtils.removeBranch(entry.getKey());
        GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, entry.getKey(), entry.getValue());
      }
    }
  }

  /*
   *  The algorithm looks at all the mapjoins in the operator pipeline until
   *  it hits RS Op and for each mapjoin examines if it has paralllel semijoin
   *  edge or dynamic partition pruning.
   *
   *  As an extension, the algorithm also looks for suitable table scan operators that
   *  could reduce the number of rows decoded at runtime using the information provided by
   *  the MapJoin operators of the branch when ProbeDecode feature is enabled.
   */
  private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx)
          throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN)) {
      // Not needed without mapjoin conversion
      return;
    }

    // Get all the TS ops.
    List> topOps = new ArrayList<>();
    topOps.addAll(procCtx.parseContext.getTopOps().values());

    Map semijoins = new HashMap<>();
    Map> probeDecodeMJoins = new HashMap<>();
    for (Operator parent : topOps) {
      // A TS can have multiple branches due to DPP Or Semijoin Opt.
      // USe DFS to traverse all the branches until RS is hit.
      Deque> deque = new LinkedList<>();
      deque.add(parent);
      while (!deque.isEmpty()) {
        Operator op = deque.pollLast();
        if (op instanceof ReduceSinkOperator) {
          // Done with this branch
          continue;
        }

        if (op instanceof MapJoinOperator) {
          // A candidate.
          if (!findParallelSemiJoinBranch(op, (TableScanOperator) parent,
                  procCtx.parseContext, semijoins, probeDecodeMJoins)) {
            // No parallel edge was found for the given mapjoin op,
            // no need to go down further, skip this TS operator pipeline.
            break;
          }
        }
        deque.addAll(op.getChildOperators());
      }
    }
    //  No need to remove SJ branches when we have semi-join reduction or when semijoins are enabled for parallel mapjoins.
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
      if (semijoins.size() > 0) {
        for (Entry semiEntry : semijoins.entrySet()) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Semijoin optimization with parallel edge to map join. Removing semijoin " +
                OperatorUtils.getOpNamePretty(semiEntry.getKey()) + " - " + OperatorUtils.getOpNamePretty(semiEntry.getValue()));
          }
          GenTezUtils.removeBranch(semiEntry.getKey());
          GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, semiEntry.getKey(), semiEntry.getValue());
        }
      }
    }
    if (LlapHiveUtils.isLlapMode(procCtx.conf) && procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_SCAN_PROBEDECODE)) {
      if (probeDecodeMJoins.size() > 0) {
        // When multiple MJ, select one based on a policy
        for (Map.Entry> probeTsMap : probeDecodeMJoins.entrySet()){
          TableScanOperator.ProbeDecodeContext tsCntx = null;
          // Currently supporting: LowestRatio policy
          // TODO: Add more policies and make the selection a conf property
          tsCntx = selectLowestRatioProbeDecodeMapJoin(probeTsMap.getKey(), probeTsMap.getValue());
          if (tsCntx != null) {
            LOG.debug("ProbeDecode MJ for TS {}  with CacheKey {} MJ Pos {} ColName {} with Ratio {}",
                    probeTsMap.getKey().getName(), tsCntx.getMjSmallTableCacheKey(), tsCntx.getMjSmallTablePos(),
                    tsCntx.getMjBigTableKeyColName(), tsCntx.getKeyRatio());
            probeTsMap.getKey().setProbeDecodeContext(tsCntx);
            probeTsMap.getKey().getConf().setProbeDecodeContext(tsCntx);
          }
        }
      }
    }
  }

  private static TableScanOperator.ProbeDecodeContext selectLowestRatioProbeDecodeMapJoin(TableScanOperator tsOp,
      List mjOps) throws SemanticException {
    MapJoinOperator selectedMJOp = null;
    double selectedMJOpRatio = 0;
    for (MapJoinOperator currMJOp : mjOps) {
      if (!isValidProbeDecodeMapJoin(currMJOp)) {
        continue;
      }
      // At this point we know it is a single Key MapJoin
      if (selectedMJOp == null) {
        // Set the first valid MJ
        selectedMJOp = currMJOp;
        selectedMJOpRatio = getProbeDecodeNDVRatio(tsOp, currMJOp);
        LOG.debug("ProbeDecode MJ {} with Ratio {}", selectedMJOp, selectedMJOpRatio);
      } else {
        double currMJRatio = getProbeDecodeNDVRatio(tsOp, currMJOp);
        if (currMJRatio < selectedMJOpRatio){
          LOG.debug("ProbeDecode MJ {} Ratio {} is lower than existing MJ {} with Ratio {}",
              currMJOp, currMJRatio, selectedMJOp, selectedMJOpRatio);
          selectedMJOp = currMJOp;
          selectedMJOpRatio = currMJRatio;
        }
      }
    }

    TableScanOperator.ProbeDecodeContext tsProbeDecodeCtx = null;
    // If there a valid MJ to be used for TS probeDecode make sure the MJ cache key is generated and
    // then propagate the new ProbeDecodeContext (to be used by LLap IO when executing the TSop)
    if (selectedMJOp != null) {
      String mjCacheKey = selectedMJOp.getConf().getCacheKey();
      if (mjCacheKey == null) {
        // Generate cache key if it has not been yet generated
        mjCacheKey = MapJoinDesc.generateCacheKey(selectedMJOp.getOperatorId());
        // Set in the conf of the map join operator
        selectedMJOp.getConf().setCacheKey(mjCacheKey);
      }

      byte posBigTable = (byte) selectedMJOp.getConf().getPosBigTable();
      Byte[] order = selectedMJOp.getConf().getTagOrder();
      Byte mjSmallTablePos = (order[0] == posBigTable ? order[1] : order[0]);

      List keyDesc = selectedMJOp.getConf().getKeys().get(posBigTable);
      ExprNodeColumnDesc keyCol = (ExprNodeColumnDesc) keyDesc.get(0);
      ExprNodeColumnDesc originTSColExpr = OperatorUtils.findTableOriginColExpr(keyCol, selectedMJOp, tsOp);
      if (originTSColExpr == null) {
        LOG.warn("ProbeDecode could not find origTSCol for mjCol: {} with MJ Schema: {}",
            keyCol, selectedMJOp.getSchema());
      } else if (!TypeInfoUtils.doPrimitiveCategoriesMatch(keyCol.getTypeInfo(), originTSColExpr.getTypeInfo())) {
        // src Col -> HT key Col needs explicit or implicit (Casting) conversion
        // as a result we cannot perform direct lookups on the HT
        LOG.warn("ProbeDecode origTSCol {} type missmatch mjCol {}", originTSColExpr, keyCol);
      } else {
        tsProbeDecodeCtx = new TableScanOperator.ProbeDecodeContext(mjCacheKey, mjSmallTablePos,
            originTSColExpr.getColumn(), selectedMJOpRatio);
      }
    }
    return tsProbeDecodeCtx;
  }

  // Return the ratio of: (distinct) JOIN_probe_key_column_rows / (distinct) JOIN_TS_target_column_rows
  private static double getProbeDecodeNDVRatio(TableScanOperator tsOp, MapJoinOperator mjOp) {
    long mjKeyCardinality = mjOp.getStatistics().getNumRows();
    long tsKeyCardinality = tsOp.getStatistics().getNumRows();

    byte posBigTable = (byte) mjOp.getConf().getPosBigTable();

    Byte[] order = mjOp.getConf().getTagOrder();
    Byte mjSmallTablePos = (order[0] == posBigTable ? order[1] : order[0]);
    Byte mjBigTablePos = (order[0] == posBigTable ? order[0] : order[1]);

    // Single Key MJ at this point
    List tsKeyDesc = mjOp.getConf().getKeys().get(mjBigTablePos);
    List mjKeyDesc = mjOp.getConf().getKeys().get(mjSmallTablePos);
    if (mjKeyDesc.get(0) instanceof ExprNodeColumnDesc) {
      ExprNodeColumnDesc tsKeyCol = (ExprNodeColumnDesc) tsKeyDesc.get(0);
      ExprNodeColumnDesc mjKeyCol = (ExprNodeColumnDesc) mjKeyDesc.get(0);

      ColStatistics mjStats = mjOp.getStatistics().getColumnStatisticsFromColName(mjKeyCol.getColumn());
      ColStatistics tsStats = tsOp.getStatistics().getColumnStatisticsFromColName(tsKeyCol.getColumn());

      if (canUseNDV(mjStats)) {
        mjKeyCardinality = mjStats.getCountDistint();
      }
      if (canUseNDV(tsStats)) {
        tsKeyCardinality = tsStats.getCountDistint();
      }
    }
    return mjKeyCardinality / (double) tsKeyCardinality;
  }

  /**
   * Returns true for a MapJoin operator that can be used for ProbeDecode.
   * MapJoin should be a single Key join, where the bigTable keyCol is only a ExprNodeColumnDesc
   * @param mapJoinOp
   * @return true for a valid MapJoin
   */
  private static boolean isValidProbeDecodeMapJoin(MapJoinOperator mapJoinOp) {
    Map> keyExprs = mapJoinOp.getConf().getKeys();
    List bigTableKeyExprs = keyExprs.get( (byte) mapJoinOp.getConf().getPosBigTable());
    return (bigTableKeyExprs.size() == 1) && (bigTableKeyExprs.get(0) instanceof ExprNodeColumnDesc);
  }

  private static boolean canUseNDV(ColStatistics colStats) {
    return (colStats != null) && (colStats.getCountDistint() >= 0);
  }

  private static double getBloomFilterCost(
      SelectOperator sel) {
    double cost = -1;
    Statistics selStats = sel.getStatistics();
    if (selStats != null) {
      cost = selStats.getNumRows();

      // Some other things that could be added here to model cost:
      // Cost of computing/sending partial BloomFilter results? BloomFilterSize * # mappers
      // For reduce-side join, add the cost of the semijoin table scan/dependent tablescans?
    }
    return cost;
  }

  private static long getCombinedKeyDomainCardinality(
      ColStatistics selColStat,
      ColStatistics selColSourceStat,
      ColStatistics tsColStat) {
    long keyDomainCardinality = -1;
    if (!canUseNDV(selColStat) || !canUseNDV(tsColStat)) {
      return -1;
    }

    long selColSourceNdv = canUseNDV(selColSourceStat) ? selColSourceStat.getCountDistint() : -1;
    boolean semiJoinKeyIsPK = StatsUtils.inferForeignKey(selColStat, tsColStat);
    if (semiJoinKeyIsPK) {
      // PK/FQ relationship: NDV of selColSourceStat is a superset of what is in tsColStat
      if (selColSourceNdv >= 0) {
        // Most accurate domain cardinality would be source column NDV if available.
        keyDomainCardinality = selColSourceNdv;
      }
    } else {
      if (selColSourceNdv >= 0) {
        // If semijoin keys and ts keys completely unrelated, the cardinality of both sets
        // could be obtained by adding both cardinalities. Would there be an average case?
        keyDomainCardinality = selColSourceNdv + tsColStat.getCountDistint();

        // Don't exceed the range if we have one.
        if (StatsUtils.hasDiscreteRange(selColStat)
            && StatsUtils.hasDiscreteRange(tsColStat)) {
          long range = 0;
          // Trying using the cardinality from the value range.
          ColStatistics.Range combinedRange = StatsUtils.combineRange(selColStat.getRange(), tsColStat.getRange());
          if (combinedRange != null) {
            range = StatsUtils.getRangeDelta(combinedRange);
          } else {
            range = StatsUtils.getRangeDelta(selColStat.getRange())
                + StatsUtils.getRangeDelta(tsColStat.getRange());
          }
          keyDomainCardinality = Math.min(keyDomainCardinality, range);
        }
      }
      // Otherwise, we tried ..
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Computing key domain cardinality, keyDomainCardinality=" + keyDomainCardinality
          + ", semiJoinKeyIsPK=" + semiJoinKeyIsPK
          + ", selColStat=" + selColStat
          + ", selColSourceStat=" + selColSourceStat
          + ", tsColStat=" + tsColStat);
    }

    return keyDomainCardinality;
  }

  private static double getBloomFilterSelectivity(
      SelectOperator sel, ExprNodeDesc selExpr,
      Statistics filStats, ExprNodeDesc tsExpr) {
    Statistics selStats = sel.getStatistics();
    assert selStats != null;
    assert filStats != null;
    // For cardinality values use numRows as default, try to use ColStats if available
    long selKeyCardinality = selStats.getNumRows();
    long tsKeyCardinality = filStats.getNumRows();
    long keyDomainCardinality = selKeyCardinality + tsKeyCardinality;

    ExprNodeColumnDesc selCol = ExprNodeDescUtils.getColumnExpr(selExpr);
    ExprNodeColumnDesc tsCol = ExprNodeDescUtils.getColumnExpr(tsExpr);
    if (selCol != null && tsCol != null) {
      // Check if there are column stats available for these columns
      ColStatistics selColStat = selStats.getColumnStatisticsFromColName(selCol.getColumn());
      ColStatistics filColStat = filStats.getColumnStatisticsFromColName(tsCol.getColumn());
      if (canUseNDV(selColStat)) {
        selKeyCardinality = selColStat.getCountDistint();
      }
      // Get colstats for the original table column for selCol if possible, this would have
      // more accurate information about the original NDV of the column before any filtering.
      ColStatistics selColSourceStat = null;
      if (selColStat != null) {
        ExprNodeDescUtils.ColumnOrigin selColSource = ExprNodeDescUtils.findColumnOrigin(selCol, sel);
        if (selColSource != null && selColSource.op.getStatistics() != null) {
          selColSourceStat = selColSource.op.getStatistics().getColumnStatisticsFromColName(
              selColSource.col.getColumn());
        }
      }
      long domainCardinalityFromColStats = getCombinedKeyDomainCardinality(
          selColStat, selColSourceStat, filColStat);
      if (domainCardinalityFromColStats >= 0) {
        keyDomainCardinality = domainCardinalityFromColStats;
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("BloomFilter selectivity for " + selCol + " to " + tsCol + ", selKeyCardinality=" + selKeyCardinality
          + ", tsKeyCardinality=" + tsKeyCardinality + ", keyDomainCardinality=" + keyDomainCardinality);
    }
    // Selectivity: key cardinality of semijoin / domain cardinality
    return selKeyCardinality / (double) keyDomainCardinality;
  }

  /**
   * Computes the benefit of applying the bloom filter.
   * 

* The benefit is defined as the number of tuples that are filtered/removed from the bloom. *

*/ private static double getBloomFilterBenefit(SelectOperator sel, List selExpr, Statistics filStats, List tsExpr) { if (sel.getStatistics() == null || filStats == null) { LOG.debug("No stats available to compute BloomFilter benefit"); return -1; } // Find the semijoin column with the smallest number of matches and keep its selectivity double selectivity = 1.0; for (int i = 0; i < tsExpr.size(); i++) { selectivity = Math.min(selectivity, getBloomFilterSelectivity(sel, selExpr.get(i), filStats, tsExpr.get(i))); } // Decrease the min selectivity by 5% for each additional column in the semijoin. // Consider the following semijoins: // SJ1(author.name, author.age); // SJ2(author.name). // Intuitively even if the min selectivity of both is 0.8 the semijoin with two columns (SJ1) // will match less tuples than the semijoin with one column (SJ2). selectivity -= selectivity * (tsExpr.size() - 1) * 0.05; // Selectivity cannot be less than 0.0 selectivity = Math.max(0.0, selectivity); // Benefit (rows filtered from ts): (1 - selectivity) * # ts rows return filStats.getNumRows() * (1 - selectivity); } private static double computeBloomFilterNetBenefit( SelectOperator sel, List selExpr, Statistics filStats, List tsExpr) { double netBenefit = 0; double benefit = getBloomFilterBenefit(sel, selExpr, filStats, tsExpr); if (benefit > 0 && filStats != null) { double cost = getBloomFilterCost(sel); if (cost > 0) { long filDataSize = filStats.getNumRows(); netBenefit = Math.max(benefit - cost, 0) / filDataSize; LOG.debug("BloomFilter benefit=" + benefit + ", cost=" + cost + ", tsDataSize=" + filDataSize + ", netBenefit=" + (benefit - cost)); } } LOG.debug("netBenefit=" + netBenefit); return netBenefit; } /** * Sort semijoin filters depending on the benefit (computed depending on selectivity and cost) * that they provide. We create three blocks: first all normal predicates, second between clauses * for the min/max dynamic values, and finally the in bloom filter predicates. The intuition is * that evaluating the between clause will be cheaper than evaluating the bloom filter predicates. * Hence, after this method runs, normal predicates come first (possibly sorted by Calcite), * then we will have sorted between clauses, and finally sorted in bloom filter clauses. */ private static void sortSemijoinFilters(OptimizeTezProcContext procCtx, ListMultimap globalReductionFactorMap) throws SemanticException { for (Entry> e : globalReductionFactorMap.asMap().entrySet()) { FilterOperator filterOp = e.getKey(); Collection semijoinInfos = e.getValue(); ExprNodeDesc pred = filterOp.getConf().getPredicate(); if (FunctionRegistry.isOpAnd(pred)) { LinkedHashSet allPreds = new LinkedHashSet<>(pred.getChildren()); List betweenPreds = new ArrayList<>(); List inBloomFilterPreds = new ArrayList<>(); // We check whether we can find semijoin predicates for (SemijoinOperatorInfo roi : semijoinInfos) { for (ExprNodeDesc expr : pred.getChildren()) { if (FunctionRegistry.isOpBetween(expr) && expr.getChildren().get(2) instanceof ExprNodeDynamicValueDesc) { // BETWEEN in SJ String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(2)) .getDynamicValue().getId(); List dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap() .get(roi.rsOperator).getDynamicValueIDs(); for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) { if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) { betweenPreds.add(expr); allPreds.remove(expr); break; } } } else if (FunctionRegistry.isOpInBloomFilter(expr) && expr.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) { // IN_BLOOM_FILTER in SJ String dynamicValueIdFromExpr = ((ExprNodeDynamicValueDesc) expr.getChildren().get(1)) .getDynamicValue().getId(); List dynamicValueIdsFromMap = procCtx.parseContext.getRsToRuntimeValuesInfoMap() .get(roi.rsOperator).getDynamicValueIDs(); for (String dynamicValueIdFromMap : dynamicValueIdsFromMap) { if (dynamicValueIdFromExpr.equals(dynamicValueIdFromMap)) { inBloomFilterPreds.add(expr); allPreds.remove(expr); break; } } } } } List newAndArgs = new ArrayList<>(allPreds); // First rest of predicates newAndArgs.addAll(betweenPreds); // Then sorted between predicates newAndArgs.addAll(inBloomFilterPreds); // Finally, sorted in bloom predicates ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance( FunctionRegistry.getFunctionInfo("and").getGenericUDF(), newAndArgs); filterOp.getConf().setPredicate(andExpr); } } } private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) throws SemanticException { Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); if (map.isEmpty()) { // Nothing to do return; } // Scale down stats for tables with DPP Map adjustedStatsMap = new HashMap<>(); List semijoinRsToRemove = new ArrayList<>(); double semijoinReductionThreshold = procCtx.conf.getFloatVar( HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD); // Using SortedSet to make iteration order deterministic final Comparator rsOpComp = (ReduceSinkOperator o1, ReduceSinkOperator o2) -> (o1.toString().compareTo(o2.toString())); SortedSet semiJoinRsOps = new TreeSet<>(rsOpComp); semiJoinRsOps.addAll(map.keySet()); ListMultimap globalReductionFactorMap = ArrayListMultimap.create(); while (!semiJoinRsOps.isEmpty()) { // We will gather the SJs to keep in the plan in the following map Map reductionFactorMap = new HashMap<>(); SortedSet semiJoinRsOpsNewIter = new TreeSet<>(rsOpComp); for (ReduceSinkOperator rs : semiJoinRsOps) { SemiJoinBranchInfo sjInfo = map.get(rs); if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { // Semijoin created using hint or marked useful, skip it continue; } // rs is semijoin optimization branch, which should look like -SEL-GB1-RS1-GB2-RS2 SelectOperator sel = OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0); // Check the ndv/rows from the SEL vs the destination tablescan the semijoin opt is going to. TableScanOperator ts = sjInfo.getTsOp(); RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs); List targetColumns = rti.getTargetColumns(); // In semijoin branches the SEL operator has the following forms: // SEL[c1] - single column semijoin reduction // SEL[c1, c2,..., ck, hash(hash(hash(c1, c2),...),ck)] - multi column semijoin reduction // The source columns in the above cases are c1, c2,...,ck. // We need to exclude the hash(...) expression, if it is present. List sourceColumns = sel.getConf().getColList().subList(0, targetColumns.size()); if (LOG.isDebugEnabled()) { LOG.debug("Computing BloomFilter cost/benefit for " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts) + " " + targetColumns + " "); } FilterOperator filterOperator = (FilterOperator) ts.getChildOperators().get(0); Statistics filterStats = adjustedStatsMap.get(filterOperator); if (filterStats == null && filterOperator.getStatistics() != null) { filterStats = filterOperator.getStatistics().clone(); adjustedStatsMap.put(filterOperator, filterStats); } double reductionFactor = computeBloomFilterNetBenefit(sel, sourceColumns, filterStats, targetColumns); if (reductionFactor < semijoinReductionThreshold) { // This semijoin optimization should be removed. Do it after we're done iterating semijoinRsToRemove.add(rs); } else { // This semijoin qualifies, add it to the result set if (filterStats != null) { ImmutableSet.Builder colNames = ImmutableSet.builder(); for (ExprNodeDesc tsExpr : targetColumns) { Set allReferencedColumns = ExprNodeDescUtils.findAllColumnDescs(tsExpr); for (ExprNodeColumnDesc col : allReferencedColumns) { colNames.add(col.getColumn()); } } // We check whether there was already another SJ over this TS that was selected // in previous iteration SemijoinOperatorInfo prevResult = reductionFactorMap.get(filterOperator); if (prevResult != null) { if (prevResult.reductionFactor < reductionFactor) { // We should pick up new SJ as its reduction factor is greater than the previous one // that we found. We add the previous RS where SJ was originating to RS ops for new // iteration reductionFactorMap.put(filterOperator, new SemijoinOperatorInfo(rs, filterOperator, filterStats, colNames.build(), reductionFactor)); semiJoinRsOpsNewIter.add(prevResult.rsOperator); if (LOG.isDebugEnabled()) { LOG.debug("Adding " + OperatorUtils.getOpNamePretty(prevResult.rsOperator) + " for re-iteration"); } } else { // We should pick up old SJ. We just need to add new RS where SJ was originating // to RS ops for new iteration semiJoinRsOpsNewIter.add(rs); if (LOG.isDebugEnabled()) { LOG.debug("Adding " + OperatorUtils.getOpNamePretty(rs) + " for re-iteration"); } } } else { // Another SJ did not exist for this TS, hence just add it to SJs to keep reductionFactorMap.put(filterOperator, new SemijoinOperatorInfo(rs, filterOperator, filterStats, colNames.build(), reductionFactor)); } } } } for (SemijoinOperatorInfo roi : reductionFactorMap.values()) { // This semijoin will be kept // We are going to adjust the filter statistics long newNumRows = (long) (1.0 - roi.reductionFactor) * roi.filterStats.getNumRows(); if (LOG.isDebugEnabled()) { LOG.debug("Old stats for {}: {}", roi.filterOperator, roi.filterStats); LOG.debug("Number of rows reduction: {}/{}", newNumRows, roi.filterStats.getNumRows()); } StatsUtils.updateStats(roi.filterStats, newNumRows, true, roi.filterOperator, roi.colNames); if (LOG.isDebugEnabled()) { LOG.debug("New stats for {}: {}", roi.filterOperator, roi.filterStats); } adjustedStatsMap.put(roi.filterOperator, roi.filterStats); globalReductionFactorMap.put(roi.filterOperator, roi); } semiJoinRsOps = semiJoinRsOpsNewIter; } for (ReduceSinkOperator rs : semijoinRsToRemove) { TableScanOperator ts = map.get(rs).getTsOp(); if (LOG.isDebugEnabled()) { LOG.debug("Reduction factor not satisfied for " + OperatorUtils.getOpNamePretty(rs) + "-" + OperatorUtils.getOpNamePretty(ts) + ". Removing semijoin optimization."); } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, ts); } if (!globalReductionFactorMap.isEmpty()) { sortSemijoinFilters(procCtx, globalReductionFactorMap); } } /** * Internal class to encapsulate information needed to evaluate stats * about a SJ that will be kept in the tree. */ private class SemijoinOperatorInfo { final ReduceSinkOperator rsOperator; final FilterOperator filterOperator; final ImmutableSet colNames; final Statistics filterStats; final double reductionFactor; private SemijoinOperatorInfo(ReduceSinkOperator rsOperator, FilterOperator filterOperator, Statistics filterStats, Collection colNames, double reductionFactor) { this.rsOperator = rsOperator; this.filterOperator = filterOperator; this.colNames = ImmutableSet.copyOf(colNames); this.filterStats = filterStats; this.reductionFactor = reductionFactor; } } private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException { // Stores the Tablescan operators processed to avoid redoing them. Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); for (ReduceSinkOperator rs : map.keySet()) { SemiJoinBranchInfo sjInfo = map.get(rs); TableScanOperator ts = sjInfo.getTsOp(); if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { continue; } // A TS can have multiple branches due to DPP Or Semijoin Opt. // Use DFS to traverse all the branches until RS or DPP is hit. Deque> deque = new LinkedList<>(); deque.add(ts); while (!deque.isEmpty()) { Operator op = deque.pollLast(); if (op instanceof AppMasterEventOperator && ((AppMasterEventOperator) op).getConf() instanceof DynamicPruningEventDesc) { // DPP. Now look up nDVs on both sides to see the selectivity. // -SEL-GB1-RS1-GB2-RS2 SelectOperator selOp = OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0); try { // Get nDVs on Semijoin edge side Statistics stats = selOp.getStatistics(); if (stats == null) { // No stats found on semijoin edge, do nothing break; } String selCol = ExprNodeDescUtils.extractColName( selOp.getConf().getColList().get(0)); ColStatistics colStatisticsSJ = stats .getColumnStatisticsFromColName(selCol); if (colStatisticsSJ == null) { // No column stats found for semijoin edge break; } long nDVs = colStatisticsSJ.getCountDistint(); if (nDVs > 0) { // Lookup nDVs on TS side. RuntimeValuesInfo rti = procCtx.parseContext .getRsToRuntimeValuesInfoMap().get(rs); // TODO Handle multi column semi-joins as part of HIVE-23934 ExprNodeDesc tsExpr = rti.getTargetColumns().get(0); FilterOperator fil = (FilterOperator) (ts.getChildOperators().get(0)); Statistics filStats = fil.getStatistics(); if (filStats == null) { // No stats found on target, do nothing break; } String colName = ExprNodeDescUtils.extractColName(tsExpr); ColStatistics colStatisticsTarget = filStats .getColumnStatisticsFromColName(colName); if (colStatisticsTarget == null) { // No column stats found on target break; } long nDVsOfTS = colStatisticsTarget.getCountDistint(); double nDVsOfTSFactored = nDVsOfTS * procCtx.conf.getFloatVar( ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_DPP_FACTOR); if ((long)nDVsOfTSFactored > nDVs) { if (LOG.isDebugEnabled()) { LOG.debug("nDVs = " + nDVs + ", nDVsOfTS = " + nDVsOfTS + " and nDVsOfTSFactored = " + nDVsOfTSFactored + "Adding semijoin branch from ReduceSink " + rs + " to TS " + sjInfo.getTsOp()); } sjInfo.setShouldRemove(false); } } } catch (NullPointerException e) { // Do nothing if (LOG.isDebugEnabled()) { LOG.debug("Caught NPE in markSemiJoinForDPP from ReduceSink " + rs + " to TS " + sjInfo.getTsOp()); } } break; } if (op instanceof TerminalOperator) { // Done with this branch continue; } deque.addAll(op.getChildOperators()); } } } private void bucketingVersionSanityCheck(OptimizeTezProcContext procCtx) throws SemanticException { // Fetch all the FileSinkOperators. Set fsOpsAll = new HashSet<>(); for (TableScanOperator ts : procCtx.parseContext.getTopOps().values()) { Set fsOps = OperatorUtils.findOperators( ts, FileSinkOperator.class); fsOpsAll.addAll(fsOps); } Map, Integer> processedOperators = new IdentityHashMap<>(); for (FileSinkOperator fsOp : fsOpsAll) { // Look for direct parent ReduceSinkOp // If there are more than 1 parent, bail out. Operator parent = fsOp; List> parentOps = parent.getParentOperators(); while (parentOps != null && parentOps.size() == 1) { parent = parentOps.get(0); if (!(parent instanceof ReduceSinkOperator)) { parentOps = parent.getParentOperators(); continue; } // Found the target RSOp 0 int bucketingVersion = fsOp.getConf().getTableInfo().getBucketingVersion(); if (fsOp.getConf().getTableInfo().getBucketingVersion() == -1) { break; } if (fsOp.getConf().getTableInfo().getBucketingVersion() != fsOp.getConf().getBucketingVersion()) { throw new RuntimeException("FsOp bucketingVersions is inconsistent with its tableinfo"); } if (processedOperators.containsKey(parent) && processedOperators.get(parent) != bucketingVersion) { throw new SemanticException(String.format( "Operator (%s) is already processed and is using bucketingVersion(%d); so it can't be changed to %d ", parent, processedOperators.get(parent), bucketingVersion)); } processedOperators.put(parent, bucketingVersion); break; } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy