org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcCtx Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
/**
* This class implements the processor context for Column Pruner.
*/
public class ColumnPrunerProcCtx implements NodeProcessorCtx {
private final ParseContext pctx;
private final Map, List> prunedColLists;
private final Map>> joinPrunedColLists;
public ColumnPrunerProcCtx(ParseContext pctx) {
this.pctx = pctx;
prunedColLists = new HashMap, List>();
joinPrunedColLists = new HashMap>>();
}
public ParseContext getParseContext() {
return pctx;
}
public Map>> getJoinPrunedColLists() {
return joinPrunedColLists;
}
/**
* @return the prunedColLists
*/
public List getPrunedColList(Operator extends OperatorDesc> op) {
return prunedColLists.get(op);
}
public Map, List> getPrunedColLists() {
return prunedColLists;
}
/**
* Creates the list of internal column names(these names are used in the
* RowResolver and are different from the external column names) that are
* needed in the subtree. These columns eventually have to be selected from
* the table scan.
*
* @param curOp
* The root of the operator subtree.
* @return List of the internal column names.
* @throws SemanticException
*/
public List genColLists(Operator extends OperatorDesc> curOp)
throws SemanticException {
if (curOp.getChildOperators() == null) {
return null;
}
List colList = null;
for (Operator extends OperatorDesc> child : curOp.getChildOperators()) {
List prunList;
if (child instanceof CommonJoinOperator) {
int tag = child.getParentOperators().indexOf(curOp);
prunList = joinPrunedColLists.get(child).get((byte) tag);
} else {
prunList = prunedColLists.get(child);
}
if (prunList == null) {
continue;
}
if (colList == null) {
colList = new ArrayList(prunList);
} else {
colList = Utilities.mergeUniqElems(colList, prunList);
}
}
return colList;
}
/**
* Creates the list of internal column names from select expressions in a
* select operator. This function is used for the select operator instead of
* the genColLists function (which is used by the rest of the operators).
*
* @param op
* The select operator.
* @return List of the internal column names.
*/
public List getColsFromSelectExpr(SelectOperator op) {
List cols = new ArrayList();
SelectDesc conf = op.getConf();
if(conf.isSelStarNoCompute()) {
for (ColumnInfo colInfo : op.getSchema().getSignature()) {
cols.add(colInfo.getInternalName());
}
}
else {
List exprList = conf.getColList();
for (ExprNodeDesc expr : exprList) {
cols = Utilities.mergeUniqElems(cols, expr.getCols());
}
}
return cols;
}
/**
* Creates the list of internal column names for select * expressions.
*
* @param op
* The select operator.
* @param colList
* The list of internal column names returned by the children of the
* select operator.
* @return List of the internal column names.
*/
public List getSelectColsFromChildren(SelectOperator op,
List colList) {
List cols = new ArrayList();
SelectDesc conf = op.getConf();
if (colList != null && conf.isSelStarNoCompute()) {
cols.addAll(colList);
return cols;
}
List selectExprs = conf.getColList();
// The colList is the output columns used by child operators, they are
// different
// from input columns of the current operator. we need to find out which
// input columns are used.
List outputColumnNames = conf.getOutputColumnNames();
for (int i = 0; i < outputColumnNames.size(); i++) {
if (colList == null || colList.contains(outputColumnNames.get(i))) {
ExprNodeDesc expr = selectExprs.get(i);
cols = Utilities.mergeUniqElems(cols, expr.getCols());
}
}
return cols;
}
/**
* Create the list of internal columns for select tag of LV
*/
public List getSelectColsFromLVJoin(RowSchema rs,
List colList) throws SemanticException {
List columns = new ArrayList();
for (String col : colList) {
if (rs.getColumnInfo(col) != null) {
columns.add(col);
}
}
return columns;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy