All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationUtilities Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer.correlation;


import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.ScriptOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;

/**
 * Utilities for both CorrelationOptimizer and ReduceSinkDeDuplication.
 *
 */
public final class CorrelationUtilities {

  protected static boolean isExisted(ExprNodeDesc expr, List columns) {
    for (ExprNodeDesc thisExpr : columns) {
      if (thisExpr != null && thisExpr.isSame(expr)) {
        return true;
      }
    }
    return false;
  }

  protected static String getColumnName(
      Map opColumnExprMap, ExprNodeDesc expr) {
    for (Entry entry : opColumnExprMap.entrySet()) {
      ExprNodeDesc thisExpr = entry.getValue();
      if (thisExpr != null && thisExpr.isSame(expr)) {
        return entry.getKey();
      }
    }
    return null;
  }

  protected static boolean hasGroupingSet(ReduceSinkOperator cRS) throws SemanticException {
    GroupByOperator cGBYm = getSingleParent(cRS, GroupByOperator.class);
    if (cGBYm != null && cGBYm.getConf().isGroupingSetsPresent()) {
      return true;
    }
    return false;
  }

  /**
   * @param operator the input operator
   * @param throwException if throw a exception when the input operator has multiple parents
   * @return the single parent or null when the input operator has multiple parents and
   *         throwException is false;
   * @throws HiveException
   */
  protected static Operator getSingleParent(Operator operator,
      boolean throwException) throws SemanticException {
    List> parents = operator.getParentOperators();
    if (parents != null && parents.size() == 1) {
      return parents.get(0);
    }
    if (throwException) {
      if (parents == null) {
        throw new SemanticException("Operator " + operator.getName() + " (ID: " +
            operator.getIdentifier() + ") does not have any parent, but we expect 1 parent.");
      } else if (parents.size() > 1) {
        throw new SemanticException("Operator " + operator.getName() + " (ID: " +
            operator.getIdentifier() + ") has " + parents.size() +
            " parents, but we expect 1 parent.");
      }
    }
    return null;
  }

  protected static Operator getSingleParent(Operator operator) throws SemanticException {
    return getSingleParent(operator, false);
  }

  /**
   * @param operator the input operator
   * @param throwException if throw a exception when the input operator has multiple children
   * @return the single child or null when the input operator has multiple children and
   *         throwException is false;
   * @throws HiveException
   */
  protected static Operator getSingleChild(Operator operator,
      boolean throwException) throws SemanticException {
    List> children = operator.getChildOperators();
    if (children != null && children.size() == 1) {
      return children.get(0);
    }
    if (throwException) {
      if (children == null) {
        throw new SemanticException("Operator " + operator.getName() + " (ID: " +
            operator.getIdentifier() + ") does not have any parent, but we expect 1 parent.");
      } else if (children.size() > 1) {
        throw new SemanticException("Operator " + operator.getName() + " (ID: " +
            operator.getIdentifier() + ") has " + children.size() +
            " parents, but we expect 1 parent.");
      }
    }
    return null;
  }

  protected static Operator getSingleChild(Operator operator) throws SemanticException {
    return getSingleChild(operator, false);
  }

  protected static  T getSingleChild(Operator operator, Class type)
      throws SemanticException {
    Operator parent = getSingleChild(operator);
    return type.isInstance(parent) ? (T)parent : null;
  }

  protected static  T getSingleParent(Operator operator, Class type)
      throws SemanticException {
    Operator parent = getSingleParent(operator);
    return type.isInstance(parent) ? (T)parent : null;
  }

  protected static Operator getStartForGroupBy(ReduceSinkOperator cRS)
      throws SemanticException {
    Operator parent = getSingleParent(cRS);
    return parent instanceof GroupByOperator ? parent : cRS;  // skip map-aggr GBY
  }


  protected static boolean[] getSortedTags(JoinOperator joinOp) {
    boolean[] result = new boolean[joinOp.getParentOperators().size()];
    for (int tag = 0; tag < result.length; tag++) {
      result[tag] = isSortedTag(joinOp, tag);
    }
    return result;
  }

  // for left outer joins, left alias is sorted but right alias might be not
  // (nulls, etc.). vice versa.
  protected static boolean isSortedTag(JoinOperator joinOp, int tag) {
    for (JoinCondDesc cond : joinOp.getConf().getConds()) {
      switch (cond.getType()) {
        case JoinDesc.LEFT_OUTER_JOIN:
          if (cond.getRight() == tag) {
            return false;
          }
          continue;
        case JoinDesc.RIGHT_OUTER_JOIN:
          if (cond.getLeft() == tag) {
            return false;
          }
          continue;
        case JoinDesc.FULL_OUTER_JOIN:
          if (cond.getLeft() == tag || cond.getRight() == tag) {
            return false;
          }
      }
    }
    return true;
  }

  protected static int indexOf(ExprNodeDesc cexpr, ExprNodeDesc[] pexprs, Operator child,
      Operator[] parents, boolean[] sorted) throws SemanticException {
    for (int tag = 0; tag < parents.length; tag++) {
      if (sorted[tag] &&
          pexprs[tag].isSame(ExprNodeDescUtils.backtrack(cexpr, child, parents[tag]))) {
        return tag;
      }
    }
    return -1;
  }

  protected static > T findPossibleParent(Operator start, Class target,
      boolean trustScript) throws SemanticException {
    T[] parents = findPossibleParents(start, target, trustScript);
    return parents != null && parents.length == 1 ? parents[0] : null;
  }

  @SuppressWarnings("unchecked")
  protected static > T[] findPossibleParents(
      Operator start, Class target,
      boolean trustScript) throws SemanticException {
    Operator cursor = getSingleParent(start);
    for (; cursor != null; cursor = getSingleParent(cursor)) {
      if (target.isAssignableFrom(cursor.getClass())) {
        T[] array = (T[]) Array.newInstance(target, 1);
        array[0] = (T) cursor;
        return array;
      }
      if (cursor instanceof JoinOperator) {
        return findParents((JoinOperator) cursor, target);
      }
      if (cursor instanceof ScriptOperator && !trustScript) {
        return null;
      }
      if (!(cursor instanceof SelectOperator
          || cursor instanceof FilterOperator
          || cursor instanceof ForwardOperator
          || cursor instanceof ScriptOperator
          || cursor instanceof ReduceSinkOperator)) {
        return null;
      }
    }
    return null;
  }

  @SuppressWarnings("unchecked")
  protected static > T[] findParents(JoinOperator join, Class target)
      throws SemanticException {
    List> parents = join.getParentOperators();
    T[] result = (T[]) Array.newInstance(target, parents.size());
    for (int tag = 0; tag < result.length; tag++) {
      Operator cursor = parents.get(tag);
      for (; cursor != null; cursor = getSingleParent(cursor)) {
        if (target.isAssignableFrom(cursor.getClass())) {
          result[tag] = (T) cursor;
          break;
        }
      }
      if (result[tag] == null) {
        throw new IllegalStateException("failed to find " + target.getSimpleName()
            + " from " + join + " on tag " + tag);
      }
    }
    return result;
  }

  /**
   * Search the query plan tree from startPoint to the bottom. If there is no ReduceSinkOperator
   * between startPoint and the corresponding TableScanOperator, return the corresponding
   * TableScanOperator. Otherwise, return null.
   * @param startPoint the operator which the search will start at
   * @return the TableScanOperator traced from startPoint. Null, if the search encounters any
   * ReduceSinkOperator.
   */
  protected static Set findTableScanOperators(Operator startPoint) {
    if (startPoint instanceof ReduceSinkOperator) {
      assert startPoint.getNumParent() == 1; // for now
      startPoint = startPoint.getParentOperators().get(0);
    }
    return findTableScanOperators(startPoint, new LinkedHashSet());
  }

  private static Set findTableScanOperators(Operator current,
      Set found) {
    if (current instanceof TableScanOperator) {
      found.add((TableScanOperator) current);
      return found;
    }
    if (current instanceof ReduceSinkOperator || current.getNumParent() == 0) {
      return found;
    }
    for (Operator parent : current.getParentOperators()) {
      findTableScanOperators(parent, found);
    }
    return found;
  }


  /**
   * Find all sibling ReduceSinkOperators (which have the same child operator of op) of op (op
   * included).
   * @throws SemanticException
   */
  public static List findSiblingReduceSinkOperators(ReduceSinkOperator op)
      throws SemanticException {
    List siblingRSs = new ArrayList();
    Operator child = getSingleChild(op, true);
    for (Operator parent: child.getParentOperators()) {
      if (parent instanceof ReduceSinkOperator) {
        siblingRSs.add((ReduceSinkOperator)parent);
      } else {
        throw new SemanticException("An sibling of a ReduceSinkOperatpr is not a" +
            "ReduceSinkOperatpr.");
      }
    }
    return siblingRSs;
  }

  /**
   * Find all sibling operators (which have the same child operator of op) of op (op
   * included).
   * @throws SemanticException
   */
  public static List> findSiblingOperators(
      Operator op)
      throws SemanticException {
    Operator child = getSingleChild(op, true);
    return child.getParentOperators();
  }

  // replace the cRS to SEL operator
  protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS,
      ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
    RowSchema inputRS = childRS.getSchema();
    SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames());

    Operator parent = getSingleParent(childRS);
    parent.getChildOperators().clear();

    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
            select, new RowSchema(inputRS.getSignature()), parent);

    sel.setColumnExprMap(childRS.getColumnExprMap());

    sel.setChildOperators(childRS.getChildOperators());
    for (Operator ch : childRS.getChildOperators()) {
      ch.replaceParent(childRS, sel);
    }

    removeChildSelIfApplicable(getSingleChild(childRS), sel, context, procCtx);
    childRS.setChildOperators(null);
    childRS.setParentOperators(null);
    procCtx.addRemovedOperator(childRS);
    return sel;
  }

  //TODO: ideally this method should be removed in future, as in we need not to rely on removing
  // this select operator which likely is introduced by SortedDynPartitionOptimizer.
  // NonblockingdedupOptimizer should be able to merge this select Operator with its
  // parent. But, that is not working at the moment. See: dynpart_sort_optimization2.q

  private static void removeChildSelIfApplicable(Operator child, SelectOperator sel,
      ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {

    if (!(child instanceof SelectOperator)) {
     return;
   }
   if (child.getColumnExprMap() != null) {
     return;
   }

   SelectOperator selOp = (SelectOperator) child;

   for (ExprNodeDesc desc : selOp.getConf().getColList()) {
     if (!(desc instanceof ExprNodeColumnDesc)) {
       return;
     }
     ExprNodeColumnDesc col = (ExprNodeColumnDesc) desc;
     if(!col.getColumn().startsWith(ReduceField.VALUE.toString()+".") ||
         col.getTabAlias() != null || col.getIsPartitionColOrVirtualCol()){
       return;
     }
   }

   removeOperator(child, getSingleChild(child), sel, context);
   procCtx.addRemovedOperator(child);
  }

  protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupByOperator cGBYr,
      ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {

    Operator parent = getSingleParent(cRS);

    if (parent instanceof GroupByOperator) {
      // pRS-cGBYm-cRS-cGBYr (map aggregation) --> pRS-cGBYr(COMPLETE)
      // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
      GroupByOperator cGBYm = (GroupByOperator) parent;

      cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
              .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
      cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
      for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
        aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
      }
      cGBYr.setColumnExprMap(cGBYm.getColumnExprMap());
      cGBYr.setSchema(cGBYm.getSchema());
    } else {
      // pRS-cRS-cGBYr (no map aggregation) --> pRS-cGBYr(COMPLETE)
      // revert expressions of cGBYr to that of cRS
      cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(cGBYr.getConf().getKeys(), cGBYr, cRS));
      for (AggregationDesc aggr : cGBYr.getConf().getAggregators()) {
        aggr.setParameters(ExprNodeDescUtils.backtrack(aggr.getParameters(), cGBYr, cRS));
      }

      Map oldMap = cGBYr.getColumnExprMap();
      RowSchema oldRS = cGBYr.getSchema();

      Map newMap = new HashMap();
      ArrayList newRS = new ArrayList();

      List outputCols = cGBYr.getConf().getOutputColumnNames();
      for (int i = 0; i < outputCols.size(); i++) {
        String colName = outputCols.get(i);
        ColumnInfo colInfo = oldRS.getColumnInfo(colName);
        newRS.add(colInfo);
        ExprNodeDesc colExpr = ExprNodeDescUtils.backtrack(oldMap.get(colName), cGBYr, cRS);
        if (colExpr != null) {
          newMap.put(colInfo.getInternalName(), colExpr);
        }
      }
      cGBYr.setColumnExprMap(newMap);
      cGBYr.setSchema(new RowSchema(newRS));
    }
    cGBYr.getConf().setMode(GroupByDesc.Mode.COMPLETE);

    removeOperator(cRS, cGBYr, parent, context);
    procCtx.addRemovedOperator(cRS);

    if (parent instanceof GroupByOperator) {
      removeOperator(parent, cGBYr, getSingleParent(parent), context);
      procCtx.addRemovedOperator(cGBYr);
    }
  }

  /**
   * Throws an exception if the input operator is null
   *
   * @param operator
   * @throws SemanticException if the input operator is null
   */
  protected static void isNullOperator(Operator operator) throws SemanticException {
    if (operator == null) {
      throw new SemanticException("Operator is null.");
    }
  }

  /**
   * @param newOperator the operator will be inserted between child and parent
   * @param child
   * @param parent
   * @param context
   * @throws HiveException
   */
  protected static void insertOperatorBetween(
      Operator newOperator, Operator parent, Operator child)
          throws SemanticException {
    isNullOperator(newOperator);
    isNullOperator(parent);
    isNullOperator(child);

    if (parent != getSingleParent(child)) {
      throw new SemanticException("Operator " + parent.getName() + " (ID: " +
          parent.getIdentifier() + ") is not the only parent of Operator " +
          child.getName() + " (ID: " + child.getIdentifier() + ")");
    }
    if (child != getSingleChild(parent)) {
      throw new SemanticException("Operator " + child.getName() + " (ID: " +
          child.getIdentifier() + ") is not the only child of Operator " +
          parent.getName() + " (ID: " + parent.getIdentifier() + ")");
    }

    newOperator.setParentOperators(Utilities.makeList(parent));
    newOperator.setChildOperators(Utilities.makeList(child));

    child.setParentOperators(Utilities.makeList(newOperator));
    parent.setChildOperators(Utilities.makeList(newOperator));
  }

  public static void removeOperator(Operator target, ParseContext context) {
    assert target.getNumParent() == 1 && target.getNumChild() == 1;
    removeOperator(target,
        target.getChildOperators().get(0), target.getParentOperators().get(0), context);
  }

  protected static void removeOperator(Operator target, Operator child, Operator parent,
      ParseContext context) {
    for (Operator aparent : target.getParentOperators()) {
      aparent.replaceChild(target, child);
    }
    for (Operator achild : target.getChildOperators()) {
      achild.replaceParent(target, parent);
    }
    target.setChildOperators(null);
    target.setParentOperators(null);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy