Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.index;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ColumnPrunerProcFactory;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
/**
* RewriteQueryUsingAggregateIndexCtx class stores the
* context for the {@link RewriteQueryUsingAggregateIndex}
* used to rewrite operator plan with index table instead of base table.
*/
public final class RewriteQueryUsingAggregateIndexCtx implements NodeProcessorCtx {
private static final Log LOG = LogFactory.getLog(RewriteQueryUsingAggregateIndexCtx.class.getName());
private RewriteQueryUsingAggregateIndexCtx(ParseContext parseContext, Hive hiveDb,
RewriteCanApplyCtx canApplyCtx) {
this.parseContext = parseContext;
this.hiveDb = hiveDb;
this.canApplyCtx = canApplyCtx;
this.indexTableName = canApplyCtx.getIndexTableName();
this.alias = canApplyCtx.getAlias();
this.aggregateFunction = canApplyCtx.getAggFunction();
this.indexKey = canApplyCtx.getIndexKey();
}
public static RewriteQueryUsingAggregateIndexCtx getInstance(ParseContext parseContext,
Hive hiveDb, RewriteCanApplyCtx canApplyCtx) {
return new RewriteQueryUsingAggregateIndexCtx(
parseContext, hiveDb, canApplyCtx);
}
private final Hive hiveDb;
private final ParseContext parseContext;
private RewriteCanApplyCtx canApplyCtx;
//We need the GenericUDAFEvaluator for GenericUDAF function "sum"
private GenericUDAFEvaluator eval = null;
private final String indexTableName;
private final String alias;
private final String aggregateFunction;
private ExprNodeColumnDesc aggrExprNode = null;
private String indexKey;
public ParseContext getParseContext() {
return parseContext;
}
public Hive getHiveDb() {
return hiveDb;
}
public String getIndexName() {
return indexTableName;
}
public GenericUDAFEvaluator getEval() {
return eval;
}
public void setEval(GenericUDAFEvaluator eval) {
this.eval = eval;
}
public void setAggrExprNode(ExprNodeColumnDesc aggrExprNode) {
this.aggrExprNode = aggrExprNode;
}
public ExprNodeColumnDesc getAggrExprNode() {
return aggrExprNode;
}
public String getAlias() {
return alias;
}
public String getAggregateFunction() {
return aggregateFunction;
}
public String getIndexKey() {
return indexKey;
}
public void setIndexKey(String indexKey) {
this.indexKey = indexKey;
}
public void invokeRewriteQueryProc() throws SemanticException {
this.replaceTableScanProcess(canApplyCtx.getTableScanOperator());
//We need aggrExprNode. Thus, replaceGroupByOperatorProcess should come before replaceSelectOperatorProcess
for (int index = 0; index < canApplyCtx.getGroupByOperators().size(); index++) {
this.replaceGroupByOperatorProcess(canApplyCtx.getGroupByOperators().get(index), index);
}
for (SelectOperator selectperator : canApplyCtx.getSelectOperators()) {
this.replaceSelectOperatorProcess(selectperator);
}
}
/**
* This method replaces the original TableScanOperator with the new
* TableScanOperator and metadata that scans over the index table rather than
* scanning over the original table.
*
*/
private void replaceTableScanProcess(TableScanOperator scanOperator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
String alias = rewriteQueryCtx.getAlias();
// Need to remove the original TableScanOperators from these data structures
// and add new ones
Map> topOps = rewriteQueryCtx.getParseContext()
.getTopOps();
// remove original TableScanOperator
topOps.remove(alias);
String indexTableName = rewriteQueryCtx.getIndexName();
Table indexTableHandle = null;
try {
indexTableHandle = rewriteQueryCtx.getHiveDb().getTable(indexTableName);
} catch (HiveException e) {
LOG.error("Error while getting the table handle for index table.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
// construct a new descriptor for the index table scan
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle);
indexTableScanDesc.setGatherStats(false);
String k = indexTableName + Path.SEPARATOR;
indexTableScanDesc.setStatsAggPrefix(k);
scanOperator.setConf(indexTableScanDesc);
// Construct the new RowResolver for the new TableScanOperator
ArrayList sigRS = new ArrayList();
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) indexTableHandle
.getDeserializer().getObjectInspector();
StructField field = rowObjectInspector.getStructFieldRef(rewriteQueryCtx.getIndexKey());
sigRS.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(
field.getFieldObjectInspector()), indexTableName, false));
} catch (SerDeException e) {
LOG.error("Error while creating the RowResolver for new TableScanOperator.");
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
RowSchema rs = new RowSchema(sigRS);
// Set row resolver for new table
String newAlias = indexTableName;
int index = alias.lastIndexOf(":");
if (index >= 0) {
newAlias = alias.substring(0, index) + ":" + indexTableName;
}
// Scan operator now points to other table
scanOperator.getConf().setAlias(newAlias);
scanOperator.setAlias(indexTableName);
topOps.put(newAlias, scanOperator);
rewriteQueryCtx.getParseContext().setTopOps(
(HashMap>) topOps);
ColumnPrunerProcFactory.setupNeededColumns(scanOperator, rs,
Arrays.asList(rewriteQueryCtx.getIndexKey()));
}
/**
* This method replaces the original SelectOperator with the new
* SelectOperator with a new column indexed_key_column.
*/
private void replaceSelectOperatorProcess(SelectOperator operator) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// we need to set the colList, outputColumnNames, colExprMap,
// rowSchema for only that SelectOperator which precedes the GroupByOperator
// count(indexed_key_column) needs to be replaced by
// sum(`_count_of_indexed_key_column`)
List selColList = operator.getConf().getColList();
selColList.add(rewriteQueryCtx.getAggrExprNode());
List selOutputColNames = operator.getConf().getOutputColumnNames();
selOutputColNames.add(rewriteQueryCtx.getAggrExprNode().getColumn());
operator.getColumnExprMap().put(rewriteQueryCtx.getAggrExprNode().getColumn(),
rewriteQueryCtx.getAggrExprNode());
RowSchema selRS = operator.getSchema();
List selRSSignature = selRS.getSignature();
// Need to create a new type for Column[_count_of_indexed_key_column] node
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
pti.setTypeName("bigint");
ColumnInfo newCI = new ColumnInfo(rewriteQueryCtx.getAggregateFunction(), pti, "", false);
selRSSignature.add(newCI);
selRS.setSignature((ArrayList) selRSSignature);
operator.setSchema(selRS);
}
/**
* We need to replace the count(indexed_column_key) GenericUDAF aggregation
* function for group-by construct to "sum" GenericUDAF. This method creates a
* new operator tree for a sample query that creates a GroupByOperator with
* sum aggregation function and uses that GroupByOperator information to
* replace the original GroupByOperator aggregation information. It replaces
* the AggregationDesc (aggregation descriptor) of the old GroupByOperator
* with the new Aggregation Desc of the new GroupByOperator.
* @return
*/
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index)
throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// We need to replace the GroupByOperator which is before RS
if (index == 0) {
// the query contains the sum aggregation GenericUDAF
String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)"
+ " from " + rewriteQueryCtx.getIndexName() + " group by "
+ rewriteQueryCtx.getIndexKey() + " ";
// retrieve the operator tree for the query, and the required GroupByOperator from it
Operator newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(
rewriteQueryCtx.getParseContext().getConf(),
selReplacementCommand);
// we get our new GroupByOperator here
GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(
newOperatorTree, GroupByOperator.class);
if (newGbyOperator == null) {
throw new SemanticException("Error replacing GroupBy operator.");
}
// we need this information to set the correct colList, outputColumnNames
// in SelectOperator
ExprNodeColumnDesc aggrExprNode = null;
// Construct the new AggregationDesc to get rid of the current
// internal names and replace them with new internal names
// as required by the operator tree
GroupByDesc newConf = newGbyOperator.getConf();
List newAggrList = newConf.getAggregators();
if (newAggrList != null && newAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : newAggrList) {
rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
rewriteQueryCtx.setAggrExprNode(aggrExprNode);
}
}
// Now the GroupByOperator has the new AggregationList;
// sum(`_count_of_indexed_key`)
// instead of count(indexed_key)
GroupByDesc oldConf = operator.getConf();
oldConf.setAggregators((ArrayList) newAggrList);
operator.setConf(oldConf);
} else {
// we just need to reset the GenericUDAFEvaluator and its name for this
// GroupByOperator whose parent is the ReduceSinkOperator
GroupByDesc childConf = (GroupByDesc) operator.getConf();
List childAggrList = childConf.getAggregators();
if (childAggrList != null && childAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : childAggrList) {
List paraList = aggregationDesc.getParameters();
List parametersOIList = new ArrayList();
for (ExprNodeDesc expr : paraList) {
parametersOIList.add(expr.getWritableObjectInspector());
}
GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum",
parametersOIList, false, false);
aggregationDesc.setGenericUDAFEvaluator(evaluator);
aggregationDesc.setGenericUDAFName("sum");
}
}
}
}
}