org.apache.hadoop.hive.ql.plan.PTFDeserializer Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.PTFPartition;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.LeadLagInfo;
import org.apache.hadoop.hive.ql.parse.WindowingExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFQueryInputDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag;
import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator;
import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver;
import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.WindowingTableFunctionResolver;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.util.ReflectionUtils;
@SuppressWarnings("deprecation")
public class PTFDeserializer {
PTFDesc ptfDesc;
StructObjectInspector inputOI;
Configuration hConf;
LeadLagInfo llInfo;
public PTFDeserializer(PTFDesc ptfDesc, StructObjectInspector inputOI, Configuration hConf) {
super();
this.ptfDesc = ptfDesc;
ptfDesc.setCfg(hConf);
this.inputOI = inputOI;
this.hConf = hConf;
llInfo = new LeadLagInfo();
ptfDesc.setLlInfo(llInfo);
}
public void initializePTFChain(PartitionedTableFunctionDef tblFnDef) throws HiveException {
Deque ptfChain = new ArrayDeque();
PTFInputDef currentDef = tblFnDef;
while (currentDef != null) {
ptfChain.push(currentDef);
currentDef = currentDef.getInput();
}
while (!ptfChain.isEmpty()) {
currentDef = ptfChain.pop();
if (currentDef instanceof PTFQueryInputDef) {
initialize((PTFQueryInputDef) currentDef, inputOI);
} else if (currentDef instanceof WindowTableFunctionDef) {
initializeWindowing((WindowTableFunctionDef) currentDef);
} else {
initialize((PartitionedTableFunctionDef) currentDef);
}
}
PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
}
public void initializeWindowing(WindowTableFunctionDef def) throws HiveException {
ShapeDetails inpShape = def.getInput().getOutputShape();
/*
* 1. setup resolve, make connections
*/
TableFunctionEvaluator tEval = def.getTFunction();
WindowingTableFunctionResolver tResolver =
(WindowingTableFunctionResolver) constructResolver(def.getResolverClassName());
tResolver.initialize(ptfDesc, def, tEval);
/*
* 2. initialize WFns.
*/
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
if (wFnDef.getArgs() != null) {
for (PTFExpressionDef arg : wFnDef.getArgs()) {
initialize(arg, inpShape);
}
}
if (wFnDef.getWindowFrame() != null) {
WindowFrameDef wFrmDef = wFnDef.getWindowFrame();
initialize(wFrmDef, inpShape);
}
setupWdwFnEvaluator(wFnDef);
}
ArrayList aliases = new ArrayList();
ArrayList fieldOIs = new ArrayList();
for (WindowFunctionDef wFnDef : def.getWindowFunctions()) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFDeserializer.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(
aliases, fieldOIs);
tResolver.setWdwProcessingOutputOI(wdwOutOI);
initialize(def.getOutputShape(), wdwOutOI);
tResolver.initializeOutputOI();
}
protected void initialize(PTFQueryInputDef def, StructObjectInspector OI) throws HiveException {
ShapeDetails outShape = def.getOutputShape();
initialize(outShape, OI);
}
protected void initialize(PartitionedTableFunctionDef def) throws HiveException {
ShapeDetails inpShape = def.getInput().getOutputShape();
/*
* 1. initialize args
*/
if (def.getArgs() != null) {
for (PTFExpressionDef arg : def.getArgs()) {
initialize(arg, inpShape);
}
}
/*
* 2. setup resolve, make connections
*/
TableFunctionEvaluator tEval = def.getTFunction();
// TableFunctionResolver tResolver = FunctionRegistry.getTableFunctionResolver(def.getName());
TableFunctionResolver tResolver = constructResolver(def.getResolverClassName());
tResolver.initialize(ptfDesc, def, tEval);
/*
* 3. give Evaluator chance to setup for RawInput execution; setup RawInput shape
*/
if (tEval.isTransformsRawInput()) {
tResolver.initializeRawInputOI();
initialize(def.getRawInputShape(), tEval.getRawInputOI());
} else {
def.setRawInputShape(inpShape);
}
inpShape = def.getRawInputShape();
/*
* 4. give Evaluator chance to setup for Output execution; setup Output shape.
*/
tResolver.initializeOutputOI();
initialize(def.getOutputShape(), tEval.getOutputOI());
}
static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException {
List args = def.getArgs();
List argOIs = new ArrayList();
ObjectInspector[] funcArgOIs = null;
if (args != null) {
for (PTFExpressionDef arg : args) {
argOIs.add(arg.getOI());
}
funcArgOIs = new ObjectInspector[args.size()];
funcArgOIs = argOIs.toArray(funcArgOIs);
}
GenericUDAFEvaluator wFnEval = def.getWFnEval();
ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs);
def.setWFnEval(wFnEval);
def.setOI(OI);
}
protected void initialize(WindowFrameDef winFrame, ShapeDetails inpShape) throws HiveException {
if (winFrame.getWindowType() == WindowType.RANGE) {
for (OrderExpressionDef exprDef : winFrame.getOrderDef().getExpressions()) {
initialize(exprDef, inpShape);
}
}
}
protected void initialize(PTFExpressionDef eDef, ShapeDetails inpShape) throws HiveException {
ExprNodeDesc exprNode = eDef.getExprNode();
ExprNodeEvaluator exprEval = WindowingExprNodeEvaluatorFactory.get(llInfo, exprNode);
ObjectInspector oi = initExprNodeEvaluator(exprEval, exprNode, inpShape);
eDef.setExprEvaluator(exprEval);
eDef.setOI(oi);
}
private ObjectInspector initExprNodeEvaluator(ExprNodeEvaluator exprEval,
ExprNodeDesc exprNode,
ShapeDetails inpShape)
throws HiveException {
ObjectInspector outOI;
outOI = exprEval.initialize(inpShape.getOI());
/*
* if there are any LeadLag functions in this Expression Tree: - setup a
* duplicate Evaluator for the 1st arg of the LLFuncDesc - initialize it
* using the InputInfo provided for this Expr tree - set the duplicate
* evaluator on the LLUDF instance.
*/
List llFuncExprs = llInfo.getLLFuncExprsInTopExpr(exprNode);
if (llFuncExprs != null) {
for (ExprNodeGenericFuncDesc llFuncExpr : llFuncExprs) {
ExprNodeDesc firstArg = llFuncExpr.getChildren().get(0);
ExprNodeEvaluator dupExprEval = WindowingExprNodeEvaluatorFactory.get(llInfo, firstArg);
dupExprEval.initialize(inpShape.getOI());
GenericUDFLeadLag llFn = (GenericUDFLeadLag) llFuncExpr.getGenericUDF();
llFn.setExprEvaluator(dupExprEval);
}
}
return outOI;
}
protected void initialize(ShapeDetails shp, StructObjectInspector OI) throws HiveException {
String serdeClassName = shp.getSerdeClassName();
Properties serDeProps = new Properties();
Map serdePropsMap = new LinkedHashMap();
addOIPropertiestoSerDePropsMap(OI, serdePropsMap);
for (String serdeName : serdePropsMap.keySet()) {
serDeProps.setProperty(serdeName, serdePropsMap.get(serdeName));
}
try {
AbstractSerDe serDe = ReflectionUtils.newInstance(hConf.getClassByName(serdeClassName).
asSubclass(AbstractSerDe.class), hConf);
SerDeUtils.initializeSerDe(serDe, hConf, serDeProps, null);
shp.setSerde(serDe);
StructObjectInspector outOI = PTFPartition.setupPartitionOutputOI(serDe, OI);
shp.setOI(outOI);
} catch (Exception se) {
throw new HiveException(se);
}
}
private static void addInputColumnsToList(ShapeDetails shape,
ArrayList fieldNames, ArrayList fieldOIs)
{
StructObjectInspector OI = shape.getOI();
for (StructField f : OI.getAllStructFieldRefs())
{
fieldNames.add(f.getFieldName());
fieldOIs.add(f.getFieldObjectInspector());
}
}
private TableFunctionResolver constructResolver(String className) throws HiveException {
try {
@SuppressWarnings("unchecked")
Class extends TableFunctionResolver> rCls = (Class extends TableFunctionResolver>)
JavaUtils.loadClass(className);
return ReflectionUtils.newInstance(rCls, null);
} catch (Exception e) {
throw new HiveException(e);
}
}
@SuppressWarnings({"unchecked"})
public static void addOIPropertiestoSerDePropsMap(StructObjectInspector OI,
Map serdePropsMap) {
if (serdePropsMap == null) {
return;
}
ArrayList extends Object>[] tInfo = getTypeMap(OI);
ArrayList columnNames = (ArrayList) tInfo[0];
ArrayList fields = (ArrayList) tInfo[1];
StringBuilder cNames = new StringBuilder();
StringBuilder cTypes = new StringBuilder();
for (int i = 0; i < fields.size(); i++)
{
cNames.append(i > 0 ? "," : "");
cTypes.append(i > 0 ? "," : "");
cNames.append(columnNames.get(i));
cTypes.append(fields.get(i).getTypeName());
}
serdePropsMap.put(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS,
cNames.toString());
serdePropsMap.put(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES,
cTypes.toString());
}
private static ArrayList extends Object>[] getTypeMap(
StructObjectInspector oi) {
StructTypeInfo t = (StructTypeInfo) TypeInfoUtils
.getTypeInfoFromObjectInspector(oi);
ArrayList fnames = t.getAllStructFieldNames();
ArrayList fields = t.getAllStructFieldTypeInfos();
return new ArrayList>[]
{fnames, fields};
}
/*
* If the final PTF in a PTFChain can stream its output, then set the OI of its OutputShape
* to the OI returned by the TableFunctionEvaluator.
*/
public static void alterOutputOIForStreaming(PTFDesc ptfDesc) {
PartitionedTableFunctionDef tDef = ptfDesc.getFuncDef();
TableFunctionEvaluator tEval = tDef.getTFunction();
if ( tEval.canIterateOutput() ) {
tDef.getOutputShape().setOI(tEval.getOutputOI());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy