All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcCtx Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;

/**
 * This class implements the processor context for Column Pruner.
 */
public class ColumnPrunerProcCtx implements NodeProcessorCtx {

  private final ParseContext pctx;

  private final Map, List> prunedColLists;

  private final Map>> joinPrunedColLists;

  private final Map> unionPrunedColLists;

  public ColumnPrunerProcCtx(ParseContext pctx) {
    this.pctx = pctx;
    prunedColLists = new HashMap, List>();
    joinPrunedColLists = new HashMap>>();
    unionPrunedColLists = new HashMap<>();
  }

  public ParseContext getParseContext() {
    return pctx;
  }

  public Map>> getJoinPrunedColLists() {
    return joinPrunedColLists;
  }

  public Map> getUnionPrunedColLists() {
    return unionPrunedColLists;
  }

  /**
   * @return the prunedColLists
   */
  public List getPrunedColList(Operator op) {
    return prunedColLists.get(op);
  }

  public Map, List> getPrunedColLists() {
    return prunedColLists;
  }

  /**
   * Creates the list of internal column names(these names are used in the
   * RowResolver and are different from the external column names) that are
   * needed in the subtree. These columns eventually have to be selected from
   * the table scan.
   *
   * @param curOp
   *          The root of the operator subtree.
   * @return List of the internal column names.
   * @throws SemanticException
   */
  public List genColLists(Operator curOp)
      throws SemanticException {
    if (curOp.getChildOperators() == null) {
      return null;
    }
    List colList = null;
    for (Operator child : curOp.getChildOperators()) {
      List prunList = null;
      if (child instanceof CommonJoinOperator) {
        int tag = child.getParentOperators().indexOf(curOp);
        prunList = joinPrunedColLists.get(child).get((byte) tag);
      } else if (child instanceof UnionOperator) {
        List positions = unionPrunedColLists.get(child);
        if (positions != null) {
          prunList = new ArrayList<>();
          RowSchema oldRS = curOp.getSchema();
          for (Integer pos : positions) {
            ColumnInfo colInfo = oldRS.getSignature().get(pos);
            prunList.add(colInfo.getInternalName());
          }
        }
      } else if (child instanceof FileSinkOperator) {
        prunList = new ArrayList<>();
        RowSchema oldRS = curOp.getSchema();
        for (ColumnInfo colInfo : oldRS.getSignature()) {
          prunList.add(colInfo.getInternalName());
        }
      } else {
        prunList = prunedColLists.get(child);
      }
      if (prunList == null) {
        continue;
      }
      if (colList == null) {
        colList = new ArrayList(prunList);
      } else {
        colList = Utilities.mergeUniqElems(colList, prunList);
      }
    }
    return colList;
  }

  /**
   * Creates the list of internal column names(these names are used in the
   * RowResolver and are different from the external column names) that are
   * needed in the subtree. These columns eventually have to be selected from
   * the table scan.
   *
   * @param curOp
   *          The root of the operator subtree.
   * @param child
   *          The consumer.
   * @return List of the internal column names.
   * @throws SemanticException
   */
  public List genColLists(Operator curOp,
          Operator child)
      throws SemanticException {
    if (curOp.getChildOperators() == null) {
      return null;
    }
    if (child instanceof CommonJoinOperator) {
      int tag = child.getParentOperators().indexOf(curOp);
      return joinPrunedColLists.get(child).get((byte) tag);
    } else if (child instanceof UnionOperator) {
      List positions = unionPrunedColLists.get(child);
      List prunList = new ArrayList<>();
      if (positions != null && positions.size() > 0) {
        RowSchema oldRS = curOp.getSchema();
        for (Integer pos : positions) {
          ColumnInfo colInfo = oldRS.getSignature().get(pos);
          prunList.add(colInfo.getInternalName());
        }
      }
      return prunList;
    } else {
      return prunedColLists.get(child);
    }
  }

  /**
   * Creates the list of internal column names from select expressions in a
   * select operator. This function is used for the select operator instead of
   * the genColLists function (which is used by the rest of the operators).
   *
   * @param op
   *          The select operator.
   * @return List of the internal column names.
   */
  public List getColsFromSelectExpr(SelectOperator op) {
    List cols = new ArrayList();
    SelectDesc conf = op.getConf();
    if(conf.isSelStarNoCompute()) {
      for (ColumnInfo colInfo : op.getSchema().getSignature()) {
        cols.add(colInfo.getInternalName());
      }
    }
    else {
      List exprList = conf.getColList();
        for (ExprNodeDesc expr : exprList) {
          cols = Utilities.mergeUniqElems(cols, expr.getCols());
        }
    }
    return cols;
  }

  /**
   * Creates the list of internal column names for select * expressions.
   *
   * @param op
   *          The select operator.
   * @param colList
   *          The list of internal column names returned by the children of the
   *          select operator.
   * @return List of the internal column names.
   */
  public List getSelectColsFromChildren(SelectOperator op,
      List colList) {
    List cols = new ArrayList();
    SelectDesc conf = op.getConf();

    if (colList != null  && conf.isSelStarNoCompute()) {
      cols.addAll(colList);
      return cols;
    }

    List selectExprs = conf.getColList();

    // The colList is the output columns used by child operators, they are
    // different
    // from input columns of the current operator. we need to find out which
    // input columns are used.
    List outputColumnNames = conf.getOutputColumnNames();
    for (int i = 0; i < outputColumnNames.size(); i++) {
      if (colList == null || colList.contains(outputColumnNames.get(i))) {
        ExprNodeDesc expr = selectExprs.get(i);
        cols = Utilities.mergeUniqElems(cols, expr.getCols());
      }
    }

    return cols;
  }

  /**
   * Create the list of internal columns for select tag of LV
   */
  public List getSelectColsFromLVJoin(RowSchema rs,
      List colList) throws SemanticException {
    List columns = new ArrayList();
    for (String col : colList) {
      if (rs.getColumnInfo(col) != null) {
        columns.add(col);
      }
    }
    return columns;
  }

  /**
   * If the input filter operator has direct child(ren) which are union operator,
   * and the filter's column is not the same as union's
   * create select operator between them. The select operator has same number of columns as
   * pruned child operator.
   *
   * @param curOp
   *          The filter operator which need to handle children.
   * @throws SemanticException
   */
  public void handleFilterUnionChildren(Operator curOp)
      throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
      return;
    }
    List parentPrunList = prunedColLists.get(curOp);
    if(parentPrunList == null || parentPrunList.size() == 0) {
      return;
    }
    FilterOperator filOp = (FilterOperator)curOp;
    List prunList = null;
    List[] childToParentIndex = null;

    for (Operator child : curOp.getChildOperators()) {
      if (child instanceof UnionOperator) {
        prunList = genColLists(child);
        if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
          continue;
        }

        ArrayList exprs = new ArrayList();
        ArrayList outputColNames = new ArrayList();
        Map colExprMap = new HashMap();
        ArrayList outputRS = new ArrayList();
        for (ColumnInfo colInfo : child.getSchema().getSignature()) {
          if (!prunList.contains(colInfo.getInternalName())) {
            continue;
          }
          ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(),
              colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
          exprs.add(colDesc);
          outputColNames.add(colInfo.getInternalName());
          ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(),
                  colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
          newCol.setAlias(colInfo.getAlias());
          outputRS.add(newCol);
          colExprMap.put(colInfo.getInternalName(), colDesc);
        }
        SelectDesc select = new SelectDesc(exprs, outputColNames, false);
        curOp.removeChild(child);
        SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
            select, new RowSchema(outputRS), curOp);
        OperatorFactory.makeChild(sel, child);
        sel.setColumnExprMap(colExprMap);

      }

    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy