All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.correlation.QueryPlanTreeTransformation Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer.correlation;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.DemuxOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer.CorrelationNodeProcCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.DemuxDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.MuxDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;

/**
 * QueryPlanTreeTransformation contains static methods used to transform
 * the query plan tree (operator tree) based on the correlation we have
 * detected by Correlation Optimizer.
 */
public class QueryPlanTreeTransformation {
  private static final Log LOG = LogFactory.getLog(QueryPlanTreeTransformation.class.getName());

  private static void setNewTag(IntraQueryCorrelation correlation,
      List> childrenOfDemux,
      ReduceSinkOperator rsop, Map bottomRSToNewTag)
          throws SemanticException {
    int newTag = bottomRSToNewTag.get(rsop);
    int oldTag = rsop.getConf().getTag();
    if (oldTag == -1) {
      // if this child of DemuxOperator does not use tag, we just set the oldTag to 0.
      oldTag = 0;
    }
    Operator child = CorrelationUtilities.getSingleChild(rsop, true);
    if (!childrenOfDemux.contains(child)) {
      childrenOfDemux.add(child);
    }
    int childIndex = childrenOfDemux.indexOf(child);
    correlation.setNewTag(newTag, oldTag, childIndex);
    rsop.getConf().setTag(newTag);
  }

  /**
   * Based on the correlation, we transform the query plan tree (operator tree).
   * In here, we first create DemuxOperator and all bottom ReduceSinkOperators
   * (bottom means near TableScanOperaotr) in the correlation will be be
   * the parents of the DemuxOperaotr. We also reassign tags to those
   * ReduceSinkOperators. Then, we use MuxOperators to replace ReduceSinkOperators
   * which are not bottom ones in this correlation.
   * Example: The original operator tree is ...
   *      JOIN2
   *      /    \
   *     RS4   RS5
   *    /        \
   *   GBY1     JOIN1
   *    |       /    \
   *   RS1     RS2   RS3
   * If GBY1, JOIN1, and JOIN2 can be executed in the same reducer
   * (optimized by Correlation Optimizer).
   * The new operator tree will be ...
   *      JOIN2
   *        |
   *       MUX
   *      /   \
   *    GBY1  JOIN1
   *      \    /
   *       DEMUX
   *      /  |  \
   *     /   |   \
   *    /    |    \
   *   RS1   RS2   RS3
   * @param pCtx
   * @param corrCtx
   * @param correlation
   * @throws SemanticException
   */
  protected static void applyCorrelation(
      ParseContext pCtx,
      CorrelationNodeProcCtx corrCtx,
      IntraQueryCorrelation correlation)
      throws SemanticException {

    final List bottomReduceSinkOperators =
        correlation.getBottomReduceSinkOperators();
    final int numReducers = correlation.getNumReducers();
    List> childrenOfDemux =
        new ArrayList>();
    List> parentRSsOfDemux =
        new ArrayList>();
    Map childIndexToOriginalNumParents =
        new HashMap();
    List keysSerializeInfos = new ArrayList();
    List valuessSerializeInfos = new ArrayList();
    Map bottomRSToNewTag =
        new HashMap();
    int newTag = 0;
    for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
      rsop.getConf().setNumReducers(numReducers);
      bottomRSToNewTag.put(rsop, newTag);
      parentRSsOfDemux.add(rsop);
      keysSerializeInfos.add(rsop.getConf().getKeySerializeInfo());
      valuessSerializeInfos.add(rsop.getConf().getValueSerializeInfo());
      Operator child = CorrelationUtilities.getSingleChild(rsop, true);
      if (!childrenOfDemux.contains(child)) {
        childrenOfDemux.add(child);
        int childIndex = childrenOfDemux.size() - 1;
        childIndexToOriginalNumParents.put(childIndex, child.getNumParent());
      }
      newTag++;
    }

    for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
      setNewTag(correlation, childrenOfDemux, rsop, bottomRSToNewTag);
    }

    // Create the DemuxOperaotr
    DemuxDesc demuxDesc =
        new DemuxDesc(
            correlation.getNewTagToOldTag(),
            correlation.getNewTagToChildIndex(),
            childIndexToOriginalNumParents,
            keysSerializeInfos,
            valuessSerializeInfos);
    Operator demuxOp = OperatorFactory.get(demuxDesc);
    demuxOp.setChildOperators(childrenOfDemux);
    demuxOp.setParentOperators(parentRSsOfDemux);
    for (Operator child: childrenOfDemux) {
      List> parentsWithMultipleDemux =
          new ArrayList>();
      boolean hasBottomReduceSinkOperators = false;
      boolean hasNonBottomReduceSinkOperators = false;
      for (int i = 0; i < child.getParentOperators().size(); i++) {
        Operator p = child.getParentOperators().get(i);
        assert p instanceof ReduceSinkOperator;
        ReduceSinkOperator rsop = (ReduceSinkOperator)p;
        if (bottomReduceSinkOperators.contains(rsop)) {
          hasBottomReduceSinkOperators = true;
          parentsWithMultipleDemux.add(demuxOp);
        } else {
          hasNonBottomReduceSinkOperators = true;
          parentsWithMultipleDemux.add(rsop);
        }
      }
      if (hasBottomReduceSinkOperators && hasNonBottomReduceSinkOperators) {
        child.setParentOperators(parentsWithMultipleDemux);
      } else {
        child.setParentOperators(Utilities.makeList(demuxOp));
      }
    }
    for (Operator parent: parentRSsOfDemux) {
      parent.setChildOperators(Utilities.makeList(demuxOp));
    }

    // replace all ReduceSinkOperators which are not at the bottom of
    // this correlation to MuxOperators
    Set handledRSs = new HashSet();
    for (ReduceSinkOperator rsop : correlation.getAllReduceSinkOperators()) {
      if (!bottomReduceSinkOperators.contains(rsop)) {
        if (handledRSs.contains(rsop)) {
          continue;
        }
        Operator childOP =
            CorrelationUtilities.getSingleChild(rsop, true);
        if (childOP instanceof GroupByOperator) {
          CorrelationUtilities.removeReduceSinkForGroupBy(
              rsop, (GroupByOperator)childOP, pCtx, corrCtx);
          List> parentsOfMux =
              new ArrayList>();
          Operator parentOp =
              CorrelationUtilities.getSingleParent(childOP, true);
          parentsOfMux.add(parentOp);
          Operator mux = OperatorFactory.get(
              new MuxDesc(parentsOfMux));
          mux.setChildOperators(Utilities.makeList(childOP));
          mux.setParentOperators(parentsOfMux);
          childOP.setParentOperators(Utilities.makeList(mux));
          parentOp.setChildOperators(Utilities.makeList(mux));
        } else {
          List> parentsOfMux =
              new ArrayList>();
          List> siblingOPs =
              CorrelationUtilities.findSiblingOperators(rsop);
          for (Operator op: siblingOPs) {
            if (op instanceof DemuxOperator) {
              parentsOfMux.add(op);
            } else if (op instanceof ReduceSinkOperator){
              GroupByOperator pGBYm =
                  CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
              if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) {
                // We get a semi join at here.
                // This map-side GroupByOperator needs to be removed
                CorrelationUtilities.removeOperator(
                    pGBYm, op, CorrelationUtilities.getSingleParent(pGBYm, true), pCtx);
              }
              handledRSs.add((ReduceSinkOperator)op);
              parentsOfMux.add(CorrelationUtilities.getSingleParent(op, true));
            } else {
              throw new SemanticException("An slibing of ReduceSinkOperator is nethier a " +
                  "DemuxOperator nor a ReduceSinkOperator");
            }
          }
          MuxDesc muxDesc = new MuxDesc(siblingOPs);
          Operator mux = OperatorFactory.get(muxDesc);
          mux.setChildOperators(Utilities.makeList(childOP));
          mux.setParentOperators(parentsOfMux);

          for (Operator op: parentsOfMux) {
            if (op instanceof DemuxOperator) {
              // op is a DemuxOperator and it directly connects to childOP.
              // We will add this MuxOperator between DemuxOperator
              // and childOP.
              if (op.getChildOperators().contains(childOP)) {
                op.replaceChild(childOP, mux);
              }
            } else {
              // op is not a DemuxOperator, so it should have
              // a single child.
              op.setChildOperators(Utilities.makeList(mux));
            }
          }
          childOP.setParentOperators(Utilities.makeList(mux));
        }
      }
    }
    for (ReduceSinkOperator rsop: handledRSs) {
      rsop.setChildOperators(null);
      rsop.setParentOperators(null);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy