All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.udf.ptf.MatchPath Maven / Gradle / Ivy

There is a newer version: 1.21.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.ptf;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.PTFPartition;
import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.PTFTranslator;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.TypeCheckCtx;
import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

/**
 * return rows that meet a specified pattern. Use symbols to specify a list of expressions
 * to match.
 * Pattern is used to specify a Path. The results list can contain expressions based on
 * the input columns and also the matched Path.
 * 
    *
  1. pattern: pattern for the Path. Path is 'dot' separated list of symbols. * Each element is treated as a symbol. Elements that end in '*' or '+' are interpreted with * the usual meaning of zero or more, one or more respectively. For e.g. * "LATE.EARLY*.ONTIMEOREARLY" implies a sequence of flights * where the first occurrence was LATE, followed by zero or more EARLY flights, * followed by a ONTIME or EARLY flight. *
  2. symbols specify a list of name, expression pairs. For e.g. * 'LATE', arrival_delay > 0, 'EARLY', arrival_delay < 0 , 'ONTIME', arrival_delay == 0. * These symbols can be used in the Pattern defined above. *
  3. resultSelectList specified as a select list. * The expressions in the selectList are evaluated in the context where all the * input columns are available, plus the attribute * "tpath" is available. Path is a collection of rows that represents the matching Path. *
*/ public class MatchPath extends TableFunctionEvaluator { private transient String patternStr; private transient SymbolsInfo symInfo; private transient String resultExprStr; private transient SymbolFunction syFn; private ResultExprInfo resultExprInfo; /* * the names of the Columns of the input to MatchPath. Used to setup the tpath Struct column. */ private HashMap inputColumnNamesMap; @Override public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws HiveException { while (pItr.hasNext()) { Object iRow = pItr.next(); SymbolFunctionResult syFnRes = SymbolFunction.match(syFn, iRow, pItr); if (syFnRes.matches ) { int sz = syFnRes.nextRow - (pItr.getIndex() - 1); Object selectListInput = MatchPath.getSelectListInput(iRow, tableDef.getInput().getOutputShape().getOI(), pItr, sz); ArrayList oRow = new ArrayList(); for(ExprNodeEvaluator resExprEval : resultExprInfo.resultExprEvals) { oRow.add(resExprEval.evaluate(selectListInput)); } outP.append(oRow); } } } static void throwErrorWithSignature(String message) throws SemanticException { throw new SemanticException(String.format( "MatchPath signature is: SymbolPattern, one or more SymbolName, " + "expression pairs, the result expression as a select list. Error %s", message)); } public HashMap getInputColumnNames() { return inputColumnNamesMap; } public void setInputColumnNames(HashMap inputColumnNamesMap) { this.inputColumnNamesMap = inputColumnNamesMap; } public static class MatchPathResolver extends TableFunctionResolver { @Override protected TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef) { return new MatchPath(); } /** *
    *
  • check structure of Arguments: *
      *
    1. First arg should be a String *
    2. then there should be an even number of Arguments: * String, expression; expression should be Convertible to Boolean. *
    3. finally there should be a String. *
    *
  • convert pattern into a NNode chain. *
  • convert symbol args into a Symbol Map. *
  • parse selectList into SelectList struct. The inputOI used to translate * these expressions should be based on the * columns in the Input, the 'path.attr' *
*/ @Override public void setupOutputOI() throws SemanticException { MatchPath evaluator = (MatchPath) getEvaluator(); PartitionedTableFunctionDef tDef = evaluator.getTableDef(); List args = tDef.getArgs(); int argsNum = args == null ? 0 : args.size(); if ( argsNum < 4 ) { throwErrorWithSignature("at least 4 arguments required"); } validateAndSetupPatternStr(evaluator, args); validateAndSetupSymbolInfo(evaluator, args, argsNum); validateAndSetupResultExprStr(evaluator, args, argsNum); setupSymbolFunctionChain(evaluator); /* * setup OI for input to resultExpr select list */ RowResolver selectListInputRR = MatchPath.createSelectListRR(evaluator, tDef.getInput()); /* * parse ResultExpr Str and setup OI. */ ResultExpressionParser resultExprParser = new ResultExpressionParser(evaluator.resultExprStr, selectListInputRR); try { resultExprParser.translate(); } catch(HiveException he) { throw new SemanticException(he); } evaluator.resultExprInfo = resultExprParser.getResultExprInfo(); StructObjectInspector OI = evaluator.resultExprInfo.resultOI; setOutputOI(OI); } @Override public List getReferencedColumns() throws SemanticException { MatchPath matchPath = (MatchPath) evaluator; List columns = new ArrayList<>(); for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } return columns; } /* * validate and setup patternStr */ private void validateAndSetupPatternStr(MatchPath evaluator, List args) throws SemanticException { PTFExpressionDef symboPatternArg = args.get(0); ObjectInspector symbolPatternArgOI = symboPatternArg.getOI(); if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolPatternArgOI) || (symbolPatternArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector)symbolPatternArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING ) { throwErrorWithSignature("Currently the symbol Pattern must be a Constant String."); } evaluator.patternStr = ((ConstantObjectInspector)symbolPatternArgOI). getWritableConstantValue().toString(); } /* * validate and setup SymbolInfo */ private void validateAndSetupSymbolInfo(MatchPath evaluator, List args, int argsNum) throws SemanticException { int symbolArgsSz = argsNum - 2; if ( symbolArgsSz % 2 != 0) { throwErrorWithSignature("Symbol Name, Expression need to be specified in pairs: " + "there are odd number of symbol args"); } evaluator.symInfo = new SymbolsInfo(symbolArgsSz/2); for(int i=1; i <= symbolArgsSz; i += 2) { PTFExpressionDef symbolNameArg = args.get(i); ObjectInspector symbolNameArgOI = symbolNameArg.getOI(); if ( !ObjectInspectorUtils.isConstantObjectInspector(symbolNameArgOI) || (symbolNameArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector)symbolNameArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING ) { throwErrorWithSignature( String.format("Currently a Symbol Name(%s) must be a Constant String", symbolNameArg.getExpressionTreeString())); } String symbolName = ((ConstantObjectInspector)symbolNameArgOI). getWritableConstantValue().toString(); PTFExpressionDef symolExprArg = args.get(i+1); ObjectInspector symolExprArgOI = symolExprArg.getOI(); if ( (symolExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector)symolExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN ) { throwErrorWithSignature(String.format("Currently a Symbol Expression(%s) " + "must be a boolean expression", symolExprArg.getExpressionTreeString())); } evaluator.symInfo.add(symbolName, symolExprArg); } } /* * validate and setup resultExprStr */ private void validateAndSetupResultExprStr(MatchPath evaluator, List args, int argsNum) throws SemanticException { PTFExpressionDef resultExprArg = args.get(argsNum - 1); ObjectInspector resultExprArgOI = resultExprArg.getOI(); if ( !ObjectInspectorUtils.isConstantObjectInspector(resultExprArgOI) || (resultExprArgOI.getCategory() != ObjectInspector.Category.PRIMITIVE) || ((PrimitiveObjectInspector)resultExprArgOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING ) { throwErrorWithSignature("Currently the result Expr parameter must be a Constant String."); } evaluator.resultExprStr = ((ConstantObjectInspector)resultExprArgOI). getWritableConstantValue().toString(); } /* * setup SymbolFunction chain. */ private void setupSymbolFunctionChain(MatchPath evaluator) throws SemanticException { SymbolParser syP = new SymbolParser(evaluator.patternStr, evaluator.symInfo.symbolExprsNames, evaluator.symInfo.symbolExprsEvaluators, evaluator.symInfo.symbolExprsOIs); syP.parse(); evaluator.syFn = syP.getSymbolFunction(); } @Override public boolean transformsRawInput() { return false; } @Override public void initializeOutputOI() throws HiveException { try { MatchPath evaluator = (MatchPath) getEvaluator(); PartitionedTableFunctionDef tDef = evaluator.getTableDef(); List args = tDef.getArgs(); int argsNum = args.size(); validateAndSetupPatternStr(evaluator, args); validateAndSetupSymbolInfo(evaluator, args, argsNum); validateAndSetupResultExprStr(evaluator, args, argsNum); setupSymbolFunctionChain(evaluator); /* * setup OI for input to resultExpr select list */ StructObjectInspector selectListInputOI = MatchPath.createSelectListOI( evaluator, tDef.getInput()); ResultExprInfo resultExprInfo = evaluator.resultExprInfo; ArrayList selectListExprOIs = new ArrayList(); resultExprInfo.resultExprEvals = new ArrayList(); for(int i=0 ; i < resultExprInfo.resultExprNodes.size(); i++) { ExprNodeDesc selectColumnExprNode =resultExprInfo.resultExprNodes.get(i); ExprNodeEvaluator selectColumnExprEval = ExprNodeEvaluatorFactory.get(selectColumnExprNode); ObjectInspector selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); resultExprInfo.resultExprEvals.add(selectColumnExprEval); selectListExprOIs.add(selectColumnOI); } resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( resultExprInfo.resultExprNames, selectListExprOIs); setOutputOI(resultExprInfo.resultOI); } catch(SemanticException se) { throw new HiveException(se); } } @Override public ArrayList getOutputColumnNames() { MatchPath evaluator = (MatchPath) getEvaluator(); return evaluator.resultExprInfo.getResultExprNames(); } } public ResultExprInfo getResultExprInfo() { return resultExprInfo; } public void setResultExprInfo(ResultExprInfo resultExprInfo) { this.resultExprInfo = resultExprInfo; } static class SymbolsInfo { int sz; ArrayList symbolExprsDecs; ArrayList symbolExprsEvaluators; ArrayList symbolExprsOIs; ArrayList symbolExprsNames; SymbolsInfo(int sz) { this.sz = sz; symbolExprsEvaluators = new ArrayList(sz); symbolExprsOIs = new ArrayList(sz); symbolExprsNames = new ArrayList(sz); symbolExprsDecs = new ArrayList<>(sz); } void add(String name, PTFExpressionDef arg) { symbolExprsNames.add(name); symbolExprsEvaluators.add(arg.getExprEvaluator()); symbolExprsOIs.add(arg.getOI()); symbolExprsDecs.add(arg.getExprNode()); } } public static class ResultExprInfo { ArrayList resultExprNames; ArrayList resultExprNodes; private transient ArrayList resultExprEvals; private transient StructObjectInspector resultOI; public ArrayList getResultExprNames() { return resultExprNames; } public void setResultExprNames(ArrayList resultExprNames) { this.resultExprNames = resultExprNames; } public ArrayList getResultExprNodes() { return resultExprNodes; } public void setResultExprNodes(ArrayList resultExprNodes) { this.resultExprNodes = resultExprNodes; } } public static abstract class SymbolFunction { SymbolFunctionResult result; public SymbolFunction() { result = new SymbolFunctionResult(); } public static SymbolFunctionResult match(SymbolFunction syFn, Object row, PTFPartitionIterator pItr) throws HiveException { int resetToIdx = pItr.getIndex() - 1; try { return syFn.match(row, pItr); } finally { pItr.resetToIndex(resetToIdx); } } protected abstract SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) throws HiveException; protected abstract boolean isOptional(); } public static class Symbol extends SymbolFunction { ExprNodeEvaluator symbolExprEval; Converter converter; public Symbol(ExprNodeEvaluator symbolExprEval, ObjectInspector symbolOI) { this.symbolExprEval = symbolExprEval; converter = ObjectInspectorConverters.getConverter( symbolOI, PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) throws HiveException { Object val = null; val = symbolExprEval.evaluate(row); val = converter.convert(val); result.matches = ((Boolean) val).booleanValue(); result.nextRow = pItr.getIndex(); return result; } @Override protected boolean isOptional() { return false; } } public static class Star extends SymbolFunction { SymbolFunction symbolFn; public Star(SymbolFunction symbolFn) { this.symbolFn = symbolFn; } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) throws HiveException { result.matches = true; SymbolFunctionResult rowResult = symbolFn.match(row, pItr); while (rowResult.matches && pItr.hasNext()) { row = pItr.next(); rowResult = symbolFn.match(row, pItr); } result.nextRow = pItr.getIndex(); if(pItr.hasNext()) { result.nextRow -= 1; } return result; } @Override protected boolean isOptional() { return true; } } public static class Plus extends SymbolFunction { SymbolFunction symbolFn; public Plus(SymbolFunction symbolFn) { this.symbolFn = symbolFn; } @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) throws HiveException { SymbolFunctionResult rowResult = symbolFn.match(row, pItr); if (!rowResult.matches) { result.matches = false; result.nextRow = pItr.getIndex() - 1; return result; } result.matches = true; while (rowResult.matches && pItr.hasNext()) { row = pItr.next(); rowResult = symbolFn.match(row, pItr); } result.nextRow = pItr.getIndex() - 1; return result; } @Override protected boolean isOptional() { return false; } } public static class Chain extends SymbolFunction { ArrayList components; public Chain(ArrayList components) { this.components = components; } /* * Iterate over the Symbol Functions in the Chain: * - If we are not at the end of the Iterator (i.e. row != null ) * - match the current componentFn * - if it returns false, then return false * - otherwise set row to the next row from the Iterator. * - if we are at the end of the Iterator * - skip any optional Symbol Fns (star patterns) at the end. * - but if we come to a non optional Symbol Fn, return false. * - if we match all Fns in the chain return true. */ @Override protected SymbolFunctionResult match(Object row, PTFPartitionIterator pItr) throws HiveException { SymbolFunctionResult componentResult = null; for (SymbolFunction sFn : components) { if (row != null) { componentResult = sFn.match(row, pItr); if (!componentResult.matches) { result.matches = false; result.nextRow = componentResult.nextRow; return result; } row = pItr.resetToIndex(componentResult.nextRow); } else { if (!sFn.isOptional()) { result.matches = false; result.nextRow = componentResult.nextRow; return result; } } } result.matches = true; result.nextRow = componentResult.nextRow; return result; } @Override protected boolean isOptional() { return false; } } public static class SymbolFunctionResult { /* * does the row match the pattern represented by this SymbolFunction */ public boolean matches; /* * what is the index of the row beyond the set of rows that match this pattern. */ public int nextRow; } public static class SymbolParser { String patternStr; String[] symbols; HashMap symbolExprEvalMap; ArrayList symbolFunctions; Chain symbolFnChain; public SymbolParser(String patternStr, ArrayList symbolNames, ArrayList symbolExprEvals, ArrayList symbolExprOIs) { super(); this.patternStr = patternStr; symbolExprEvalMap = new HashMap(); int sz = symbolNames.size(); for(int i=0; i < sz; i++) { String symbolName = symbolNames.get(i); ExprNodeEvaluator symbolExprEval = symbolExprEvals.get(i); ObjectInspector symbolExprOI = symbolExprOIs.get(i); symbolExprEvalMap.put(symbolName.toLowerCase(), new Object[] {symbolExprEval, symbolExprOI}); } } public SymbolFunction getSymbolFunction() { return symbolFnChain; } public void parse() throws SemanticException { symbols = patternStr.split("\\."); symbolFunctions = new ArrayList(); for(String symbol : symbols) { boolean isStar = symbol.endsWith("*"); boolean isPlus = symbol.endsWith("+"); symbol = (isStar || isPlus) ? symbol.substring(0, symbol.length() - 1) : symbol; Object[] symbolDetails = symbolExprEvalMap.get(symbol.toLowerCase()); if ( symbolDetails == null ) { throw new SemanticException(String.format("Unknown Symbol %s", symbol)); } ExprNodeEvaluator symbolExprEval = (ExprNodeEvaluator) symbolDetails[0]; ObjectInspector symbolExprOI = (ObjectInspector) symbolDetails[1]; SymbolFunction sFn = new Symbol(symbolExprEval, symbolExprOI); if ( isStar ) { sFn = new Star(sFn); } else if ( isPlus ) { sFn = new Plus(sFn); } symbolFunctions.add(sFn); } symbolFnChain = new Chain(symbolFunctions); } } /* * ResultExpression is a Select List with the following variation: * - the select keyword is optional. The parser checks if the expression doesn't start with * select; if not it prefixes it. * - Window Fn clauses are not permitted. * - expressions can operate on the input columns plus the psuedo column 'path' * which is array of * structs. The shape of the struct is * the same as the input. */ public static class ResultExpressionParser { String resultExprString; RowResolver selectListInputRowResolver; TypeCheckCtx selectListInputTypeCheckCtx; StructObjectInspector selectListInputOI; ArrayList selectSpec; ResultExprInfo resultExprInfo; public ResultExpressionParser(String resultExprString, RowResolver selectListInputRowResolver) { this.resultExprString = resultExprString; this.selectListInputRowResolver = selectListInputRowResolver; } public void translate() throws SemanticException, HiveException { setupSelectListInputInfo(); fixResultExprString(); parse(); validateSelectExpr(); buildSelectListEvaluators(); } public ResultExprInfo getResultExprInfo() { return resultExprInfo; } private void buildSelectListEvaluators() throws SemanticException, HiveException { resultExprInfo = new ResultExprInfo(); resultExprInfo.resultExprEvals = new ArrayList(); resultExprInfo.resultExprNames = new ArrayList(); resultExprInfo.resultExprNodes = new ArrayList(); //result ArrayList selectListExprOIs = new ArrayList(); int i = 0; for(WindowExpressionSpec expr : selectSpec) { String selectColName = expr.getAlias(); ASTNode selectColumnNode = expr.getExpression(); ExprNodeDesc selectColumnExprNode = ResultExpressionParser.buildExprNode(selectColumnNode, selectListInputTypeCheckCtx); ExprNodeEvaluator selectColumnExprEval = ExprNodeEvaluatorFactory.get(selectColumnExprNode); ObjectInspector selectColumnOI = null; selectColumnOI = selectColumnExprEval.initialize(selectListInputOI); selectColName = getColumnName(selectColName, selectColumnExprNode, i); resultExprInfo.resultExprEvals.add(selectColumnExprEval); selectListExprOIs.add(selectColumnOI); resultExprInfo.resultExprNodes.add(selectColumnExprNode); resultExprInfo.resultExprNames.add(selectColName); i++; } resultExprInfo.resultOI = ObjectInspectorFactory.getStandardStructObjectInspector( resultExprInfo.resultExprNames, selectListExprOIs); } private void setupSelectListInputInfo() throws SemanticException { selectListInputTypeCheckCtx = new TypeCheckCtx(selectListInputRowResolver); selectListInputTypeCheckCtx.setUnparseTranslator(null); /* * create SelectListOI */ selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver); } private void fixResultExprString() { String r = resultExprString.trim(); if (r.length()<6 || !r.substring(0, 6).toLowerCase().equals("select")) { r = "select " + r; } resultExprString = r; } private void parse() throws SemanticException { selectSpec = SemanticAnalyzer.parseSelect(resultExprString); } private void validateSelectExpr() throws SemanticException { for (WindowExpressionSpec expr : selectSpec) { PTFTranslator.validateNoLeadLagInValueBoundarySpec(expr.getExpression()); } } private String getColumnName(String alias, ExprNodeDesc exprNode, int colIdx) { if (alias != null) { return alias; } else if (exprNode instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) exprNode; return colDesc.getColumn(); } return "matchpath_col_" + colIdx; } public static ExprNodeDesc buildExprNode(ASTNode expr, TypeCheckCtx typeCheckCtx) throws SemanticException { // todo: use SemanticAnalyzer::genExprNodeDesc // currently SA not available to PTFTranslator. Map map = TypeCheckProcFactory .genExprNode(expr, typeCheckCtx); ExprNodeDesc desc = map.get(expr); if (desc == null) { String errMsg = typeCheckCtx.getError(); if ( errMsg == null) { errMsg = "Error in parsing "; } throw new SemanticException(errMsg); } return desc; } } public static final String PATHATTR_NAME = "tpath"; /* * add array to the list of columns */ protected static RowResolver createSelectListRR(MatchPath evaluator, PTFInputDef inpDef) throws SemanticException { RowResolver rr = new RowResolver(); RowResolver inputRR = inpDef.getOutputShape().getRr(); evaluator.inputColumnNamesMap = new HashMap(); ArrayList inputColumnNames = new ArrayList(); ArrayList inpColOIs = new ArrayList(); for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { ColumnInfo cInfo = new ColumnInfo(inpCInfo); String colAlias = cInfo.getAlias(); String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); if (tabColAlias != null) { colAlias = tabColAlias[1]; } ASTNode inExpr = null; inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR); if ( inExpr != null ) { rr.putExpression(inExpr, cInfo); colAlias = inExpr.toStringTree().toLowerCase(); } else { colAlias = colAlias == null ? cInfo.getInternalName() : colAlias; rr.put(cInfo.getTabAlias(), colAlias, cInfo); } evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias); inputColumnNames.add(colAlias); inpColOIs.add(cInfo.getObjectInspector()); } StandardListObjectInspector pathAttrOI = ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, inpColOIs)); ColumnInfo pathColumn = new ColumnInfo(PATHATTR_NAME, TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI), null, false, false); rr.put(null, PATHATTR_NAME, pathColumn); return rr; } protected static StructObjectInspector createSelectListOI(MatchPath evaluator, PTFInputDef inpDef) { StructObjectInspector inOI = inpDef.getOutputShape().getOI(); ArrayList inputColumnNames = new ArrayList(); ArrayList selectListNames = new ArrayList(); ArrayList fieldOIs = new ArrayList(); for(StructField f : inOI.getAllStructFieldRefs()) { String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName()); if ( inputColName != null ) { inputColumnNames.add(inputColName); selectListNames.add(f.getFieldName()); fieldOIs.add(f.getFieldObjectInspector()); } } StandardListObjectInspector pathAttrOI = ObjectInspectorFactory.getStandardListObjectInspector( ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, fieldOIs)); ArrayList selectFieldOIs = new ArrayList(); selectFieldOIs.addAll(fieldOIs); selectFieldOIs.add(pathAttrOI); selectListNames.add(MatchPath.PATHATTR_NAME); return ObjectInspectorFactory.getStandardStructObjectInspector( selectListNames, selectFieldOIs); } public static Object getSelectListInput(Object currRow, ObjectInspector rowOI, PTFPartitionIterator pItr, int sz) throws HiveException { ArrayList oRow = new ArrayList(); List currRowAsStdObject = (List) ObjectInspectorUtils .copyToStandardObject(currRow, rowOI); oRow.addAll(currRowAsStdObject); oRow.add(getPath(currRow, rowOI, pItr, sz)); return oRow; } public static ArrayList getPath(Object currRow, ObjectInspector rowOI, PTFPartitionIterator pItr, int sz) throws HiveException { int idx = pItr.getIndex() - 1; ArrayList path = new ArrayList(); path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); int pSz = 1; while (pSz < sz && pItr.hasNext()) { currRow = pItr.next(); path.add(ObjectInspectorUtils.copyToStandardObject(currRow, rowOI)); pSz++; } pItr.resetToIndex(idx); return path; } }