Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
public class RexNodeConverter {
private static final Log LOG = LogFactory.getLog(RexNodeConverter.class);
private static class InputCtx {
private final RelDataType calciteInpDataType;
private final ImmutableMap hiveNameToPosMap;
private final RowResolver hiveRR;
private final int offsetInCalciteSchema;
private InputCtx(RelDataType calciteInpDataType, ImmutableMap hiveNameToPosMap,
RowResolver hiveRR, int offsetInCalciteSchema) {
this.calciteInpDataType = calciteInpDataType;
this.hiveNameToPosMap = hiveNameToPosMap;
this.hiveRR = hiveRR;
this.offsetInCalciteSchema = offsetInCalciteSchema;
}
};
private final RelOptCluster cluster;
private final ImmutableList inputCtxs;
private final boolean flattenExpr;
public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
ImmutableMap nameToPosMap, int offset, boolean flattenExpr) {
this.cluster = cluster;
this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset));
this.flattenExpr = flattenExpr;
}
public RexNodeConverter(RelOptCluster cluster, List inpCtxLst, boolean flattenExpr) {
this.cluster = cluster;
this.inputCtxs = ImmutableList. builder().addAll(inpCtxLst).build();
this.flattenExpr = flattenExpr;
}
public RexNode convert(ExprNodeDesc expr) throws SemanticException {
if (expr instanceof ExprNodeGenericFuncDesc) {
return convert((ExprNodeGenericFuncDesc) expr);
} else if (expr instanceof ExprNodeConstantDesc) {
return convert((ExprNodeConstantDesc) expr);
} else if (expr instanceof ExprNodeColumnDesc) {
return convert((ExprNodeColumnDesc) expr);
} else if (expr instanceof ExprNodeFieldDesc) {
return convert((ExprNodeFieldDesc) expr);
} else {
throw new RuntimeException("Unsupported Expression");
}
// TODO: handle ExprNodeColumnListDesc
}
private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException {
RexNode rexNode = convert(fieldDesc.getDesc());
if (rexNode instanceof RexCall) {
// regular case of accessing nested field in a column
return cluster.getRexBuilder().makeFieldAccess(rexNode, fieldDesc.getFieldName(), true);
} else {
// This may happen for schema-less tables, where columns are dynamically
// supplied by serdes.
throw new CalciteSemanticException("Unexpected rexnode : "
+ rexNode.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table);
}
}
private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List childRexNodeLst = new LinkedList();
Builder argTypeBldr = ImmutableList. builder();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary
&& func.getTypeInfo().getCategory() == Category.PRIMITIVE
&& (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0)
.getTypeInfo(), func.getChildren().get(1).getTypeInfo());
}
for (ExprNodeDesc childExpr : func.getChildren()) {
tmpExprNode = childExpr;
if (tgtDT != null
&& TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare) {
// For compare, we will convert requisite children
tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
}
}
argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
tmpRN = convert(tmpExprNode);
childRexNodeLst.add(tmpRN);
}
// See if this is an explicit cast.
RexNode expr = null;
RelDataType retType = null;
expr = handleExplicitCast(func, childRexNodeLst);
if (expr == null) {
// This is not a cast; process the function.
retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(),
func.getGenericUDF(), argTypeBldr.build(), retType);
expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
} else {
retType = expr.getType();
}
// TODO: Cast Function in Calcite have a bug where it infer type on cast throws
// an exception
if (flattenExpr && (expr instanceof RexCall)
&& !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(),
RexUtil.flatten(call.getOperands(), call.getOperator()));
}
return expr;
}
private boolean castExprUsingUDFBridge(GenericUDF gUDF) {
boolean castExpr = false;
if (gUDF != null && gUDF instanceof GenericUDFBridge) {
String udfClassName = ((GenericUDFBridge) gUDF).getUdfClassName();
if (udfClassName != null) {
int sp = udfClassName.lastIndexOf('.');
// TODO: add method to UDFBridge to say if it is a cast func
if (sp >= 0 & (sp + 1) < udfClassName.length()) {
udfClassName = udfClassName.substring(sp + 1);
if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte")
|| udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger")
|| udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort")
|| udfClassName.equals("UDFToFloat") || udfClassName.equals("UDFToString"))
castExpr = true;
}
}
}
return castExpr;
}
private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List childRexNodeLst)
throws CalciteSemanticException {
RexNode castExpr = null;
if (childRexNodeLst != null && childRexNodeLst.size() == 1) {
GenericUDF udf = func.getGenericUDF();
if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar)
|| (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate)
|| (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) {
castExpr = cluster.getRexBuilder().makeAbstractCast(
TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()),
childRexNodeLst.get(0));
}
}
return castExpr;
}
private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException {
InputCtx ctxLookingFor = null;
if (inputCtxs.size() == 1) {
ctxLookingFor = inputCtxs.get(0);
} else {
String tableAlias = col.getTabAlias();
String colAlias = col.getColumn();
int noInp = 0;
for (InputCtx ic : inputCtxs) {
if (tableAlias == null || ic.hiveRR.hasTableAlias(tableAlias)) {
if (ic.hiveRR.getPosition(colAlias) >= 0) {
ctxLookingFor = ic;
noInp++;
}
}
}
if (noInp > 1)
throw new RuntimeException("Ambigous column mapping");
}
return ctxLookingFor;
}
protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException {
InputCtx ic = getInputCtx(col);
int pos = ic.hiveNameToPosMap.get(col.getColumn());
return cluster.getRexBuilder().makeInputRef(
ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema);
}
private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE),
MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE);
protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
ConstantObjectInspector coi = literal.getWritableObjectInspector();
Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(),
coi);
RexNode calciteLiteral = null;
// TODO: Verify if we need to use ConstantObjectInspector to unwrap data
switch (hiveTypeCategory) {
case BOOLEAN:
calciteLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue());
break;
case BYTE:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
break;
case SHORT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
break;
case INT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
break;
case LONG:
calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
break;
// TODO: is Decimal an exact numeric or approximate numeric?
case DECIMAL:
if (value instanceof HiveDecimal) {
value = ((HiveDecimal) value).bigDecimalValue();
} else if (value instanceof Decimal128) {
value = ((Decimal128) value).toBigDecimal();
}
if (value == null) {
// We have found an invalid decimal value while enforcing precision and
// scale. Ideally,
// we would replace it with null here, which is what Hive does. However,
// we need to plumb
// this thru up somehow, because otherwise having different expression
// type in AST causes
// the plan generation to fail after CBO, probably due to some residual
// state in SA/QB.
// For now, we will not run CBO in the presence of invalid decimal
// literals.
throw new CalciteSemanticException("Expression " + literal.getExprString()
+ " is not a valid decimal", UnsupportedFeature.Invalid_decimal);
// TODO: return createNullLiteral(literal);
}
BigDecimal bd = (BigDecimal) value;
BigInteger unscaled = bd.unscaledValue();
if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
calciteLiteral = rexBuilder.makeExactLiteral(bd);
} else {
// CBO doesn't support unlimited precision decimals. In practice, this
// will work...
// An alternative would be to throw CboSemanticException and fall back
// to no CBO.
RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL,
bd.scale(), unscaled.toString().length());
calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
}
break;
case FLOAT:
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Float) value), calciteDataType);
break;
case DOUBLE:
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal((Double) value), calciteDataType);
break;
case CHAR:
if (value instanceof HiveChar) {
value = ((HiveChar) value).getValue();
}
calciteLiteral = rexBuilder.makeLiteral((String) value);
break;
case VARCHAR:
if (value instanceof HiveVarchar) {
value = ((HiveVarchar) value).getValue();
}
calciteLiteral = rexBuilder.makeLiteral((String) value);
break;
case STRING:
calciteLiteral = rexBuilder.makeLiteral((String) value);
break;
case DATE:
Calendar cal = new GregorianCalendar();
cal.setTime((Date) value);
calciteLiteral = rexBuilder.makeDateLiteral(cal);
break;
case TIMESTAMP:
Calendar c = null;
if (value instanceof Calendar) {
c = (Calendar)value;
} else {
c = Calendar.getInstance();
c.setTimeInMillis(((Timestamp)value).getTime());
}
calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED);
break;
case INTERVAL_YEAR_MONTH:
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths());
calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths,
new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1,1)));
break;
case INTERVAL_DAY_TIME:
// Calcite day-time interval is millis value as BigDecimal
// Seconds converted to millis
BigDecimal secsValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000);
// Nanos converted to millis
BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getNanos(), 6);
calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd),
new SqlIntervalQualifier(TimeUnit.DAY, TimeUnit.SECOND, new SqlParserPos(1,1)));
break;
case VOID:
calciteLiteral = cluster.getRexBuilder().makeLiteral(null,
cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true);
break;
case BINARY:
case UNKNOWN:
default:
throw new RuntimeException("UnSupported Literal");
}
return calciteLiteral;
}
private RexNode createNullLiteral(ExprNodeDesc expr) throws CalciteSemanticException {
return cluster.getRexBuilder().makeNullLiteral(
TypeConverter.convert(expr.getTypeInfo(), cluster.getTypeFactory()).getSqlTypeName());
}
public static RexNode convert(RelOptCluster cluster, ExprNodeDesc joinCondnExprNode,
List inputRels, LinkedHashMap relToHiveRR,
Map> relToHiveColNameCalcitePosMap, boolean flattenExpr)
throws SemanticException {
List inputCtxLst = new ArrayList();
int offSet = 0;
for (RelNode r : inputRels) {
inputCtxLst.add(new InputCtx(r.getRowType(), relToHiveColNameCalcitePosMap.get(r), relToHiveRR
.get(r), offSet));
offSet += r.getRowType().getFieldCount();
}
return (new RexNodeConverter(cluster, inputCtxLst, flattenExpr)).convert(joinCondnExprNode);
}
}