org.apache.calcite.piglet.PigRelSqlUdfs Maven / Gradle / Ivy
Show all versions of calcite-piglet Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.piglet;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.linq4j.function.Functions;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.schema.ScalarFunction;
import org.apache.calcite.schema.impl.ScalarFunctionImpl;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlOperandCountRange;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.MultisetSqlType;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlOperandMetadata;
import org.apache.calcite.sql.type.SqlOperandTypeChecker;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
import org.apache.pig.FuncSpec;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import com.google.common.collect.ImmutableList;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.apache.calcite.piglet.PigTypes.TYPE_FACTORY;
/**
* User-defined functions ({@link SqlUserDefinedFunction UDFs})
* needed for Pig-to-{@link RelNode} translation.
*/
public class PigRelSqlUdfs {
private PigRelSqlUdfs() {
}
// Defines ScalarFunc from their implementations
private static final ScalarFunction PIG_TUPLE_FUNC =
ScalarFunctionImpl.create(PigRelSqlUdfs.class, "buildTuple");
private static final ScalarFunction PIG_BAG_FUNC =
ScalarFunctionImpl.create(PigRelSqlUdfs.class, "buildBag");
private static final ScalarFunction MULTISET_PROJECTION_FUNC =
ScalarFunctionImpl.create(PigRelSqlUdfs.class, "projectMultiset");
/**
* Multiset projection projects a subset of columns from the component type
* of a multiset type. The result is still a multiset but the component
* type only has a subset of columns of the original component type
*
* For example, given a multiset type
* {@code M = [(A: int, B: double, C: varchar)]},
* a projection
* {@code MULTISET_PROJECTION(M, A, C)}
* gives a new multiset
* {@code N = [(A: int, C: varchar)]}.
*/
static final SqlUserDefinedFunction MULTISET_PROJECTION =
new PigUserDefinedFunction("MULTISET_PROJECTION",
multisetProjectionInfer(), multisetProjectionCheck(),
MULTISET_PROJECTION_FUNC);
/**
* Creates a Pig Tuple from a list of relational operands.
*
* @param operands Relational operands
* @return Pig Tuple SqlUDF
*/
static SqlUserDefinedFunction createPigTupleUDF(ImmutableList operands) {
return new PigUserDefinedFunction("PIG_TUPLE",
infer(PigRelSqlUdfs.PIG_TUPLE_FUNC),
OperandTypes.operandMetadata(getTypeFamilies(operands),
typeFactory -> getRelDataTypes(operands), i -> "arg" + i,
i -> false),
PigRelSqlUdfs.PIG_TUPLE_FUNC);
}
/**
* Creates a Pig DataBag from a list of relational operands.
*
* @param operands Relational operands
* @return Pig DataBag SqlUDF
*/
static SqlUserDefinedFunction createPigBagUDF(ImmutableList operands) {
final SqlOperandMetadata operandMetadata =
OperandTypes.operandMetadata(getTypeFamilies(operands),
typeFactory -> getRelDataTypes(operands), i -> "arg" + i,
i -> false);
return new PigUserDefinedFunction("PIG_BAG",
infer(PigRelSqlUdfs.PIG_BAG_FUNC), operandMetadata,
PigRelSqlUdfs.PIG_BAG_FUNC);
}
/**
* Creates a generic SqlUDF operator from a Pig UDF.
*
* @param udfName Name of the UDF
* @param method Method "exec" for implementing the UDF
* @param funcSpec Pig Funcspec
* @param inputType Argument type for the input
* @param returnType Function return data type
*/
static SqlUserDefinedFunction createGeneralPigUdf(String udfName,
Method method, FuncSpec funcSpec, RelDataType inputType,
RelDataType returnType) {
final SqlOperandMetadata operandMetadata =
OperandTypes.operandMetadata(ImmutableList.of(SqlTypeFamily.ANY),
typeFactory -> ImmutableList.of(inputType), i -> "arg" + i,
i -> false);
return new PigUserDefinedFunction(udfName, opBinding -> returnType,
operandMetadata, ScalarFunctionImpl.createUnsafe(method), funcSpec);
}
/**
* Returns a {@link SqlReturnTypeInference} for multiset projection operator.
*/
private static SqlReturnTypeInference multisetProjectionInfer() {
return opBinding -> {
final MultisetSqlType source = (MultisetSqlType) opBinding.getOperandType(0);
final List fields = source.getComponentType().getFieldList();
// Project a multiset of single column
if (opBinding.getOperandCount() == 2) {
final int fieldNo = opBinding.getOperandLiteralValue(1, Integer.class);
if (fields.size() == 1) {
// Corner case: source with only single column, nothing to do.
assert fieldNo == 0;
return source;
} else {
return TYPE_FACTORY.createMultisetType(fields.get(fieldNo).getType(), -1);
}
}
// Construct a multiset of records of the input argument types
final List destNames = new ArrayList<>();
final List destTypes = new ArrayList<>();
for (int i = 1; i < opBinding.getOperandCount(); i++) {
final int fieldNo = opBinding.getOperandLiteralValue(i, Integer.class);
destNames.add(fields.get(fieldNo).getName());
destTypes.add(fields.get(fieldNo).getType());
}
return TYPE_FACTORY.createMultisetType(
TYPE_FACTORY.createStructType(destTypes, destNames), -1);
};
}
/**
* Returns a {@link SqlOperandTypeChecker} for multiset projection operator.
*/
private static SqlOperandMetadata multisetProjectionCheck() {
// This should not really be a UDF. A SQL UDF has a fixed number of named
// parameters, and this does not. But let's pretend that it has two
// parameters of type 'ANY'
final int paramCount = 2;
return new SqlOperandMetadata() {
@Override public boolean checkOperandTypes(
SqlCallBinding callBinding, boolean throwOnFailure) {
// Need at least two arguments
if (callBinding.getOperandCount() < 2) {
return false;
}
// The first argument should be a multiset
if (!(callBinding.getOperandType(0) instanceof MultisetSqlType)) {
return false;
}
// All the subsequent arguments should be appropriate integers
final MultisetSqlType source = (MultisetSqlType) callBinding.getOperandType(0);
final int maxFieldNo = source.getComponentType().getFieldCount() - 1;
for (int i = 1; i < callBinding.getOperandCount(); i++) {
if (!(callBinding.getOperandLiteralValue(i, Comparable.class)
instanceof BigDecimal)) {
return false;
}
final int fieldNo =
callBinding.getOperandLiteralValue(i, Integer.class);
// Field number should between 0 and maxFieldNo
if (fieldNo < 0 || fieldNo > maxFieldNo) {
return false;
}
}
return true;
}
@Override public SqlOperandCountRange getOperandCountRange() {
return SqlOperandCountRanges.from(2);
}
@Override public String getAllowedSignatures(SqlOperator op, String opName) {
return opName + "(...)";
}
@Override public List paramTypes(
RelDataTypeFactory typeFactory) {
return Functions.generate(paramCount,
i -> typeFactory.createSqlType(SqlTypeName.ANY));
}
@Override public List paramNames() {
return Functions.generate(paramCount, i -> "arg" + i);
}
@Override public boolean isFixedParameters() {
return true;
}
};
}
/**
* Helper method to return a list of SqlTypeFamily for a given list of
* relational operands.
*
* @param operands List of relational operands
* @return List of SqlTypeFamily objects
*/
private static List getTypeFamilies(ImmutableList operands) {
List ret = new ArrayList<>();
for (RexNode operand : operands) {
SqlTypeFamily family = operand.getType().getSqlTypeName().getFamily();
ret.add(family != null ? family : SqlTypeFamily.ANY);
}
return ret;
}
/**
* Helper method to return a list of RelDataType for a given list of
* relational operands.
*
* @param operands List of relational operands
* @return List of RelDataTypes
*/
private static List getRelDataTypes(ImmutableList operands) {
List ret = new ArrayList<>();
for (RexNode operand : operands) {
ret.add(operand.getType());
}
return ret;
}
/**
* Gets the SqlReturnTypeInference that can infer the return type from a
* function.
*
* @param function ScalarFunction
* @return SqlReturnTypeInference
*/
private static SqlReturnTypeInference infer(final ScalarFunction function) {
return opBinding -> getRelDataType(function);
}
/**
* Gets the return data type for a given function.
*
* @param function ScalarFunction
* @return returned data type
*/
private static RelDataType getRelDataType(ScalarFunction function) {
final JavaTypeFactory typeFactory = TYPE_FACTORY;
final RelDataType type = function.getReturnType(typeFactory);
if (type instanceof RelDataTypeFactoryImpl.JavaType
&& ((RelDataTypeFactoryImpl.JavaType) type).getJavaClass()
== Object.class) {
return typeFactory.createTypeWithNullability(
typeFactory.createSqlType(SqlTypeName.ANY), true);
}
return typeFactory.toSql(type);
}
/**
* Implementation for PIG_TUPLE functions. Builds a Pig Tuple from
* an array of objects
*
* @param elements Array of element objects
* @return Pig Tuple
*/
public static Tuple buildTuple(Object... elements) {
return TupleFactory.getInstance().newTuple(Arrays.asList(elements));
}
/**
* Implementation for PIG_BAG functions. Builds a Pig DataBag from
* the corresponding input
*
* @param elements Input that contains a bag
* @return Pig Tuple
*/
public static Tuple buildBag(Object... elements) {
final TupleFactory tupleFactory = TupleFactory.getInstance();
final BagFactory bagFactory = BagFactory.getInstance();
// Convert each row into a Tuple
List tupleList = new ArrayList<>();
if (elements != null) {
// The first input contains a list of rows for the bag
final List bag = (elements[0] instanceof List)
? (List) elements[0]
: Collections.singletonList(elements[0]);
for (Object row : bag) {
tupleList.add(tupleFactory.newTuple(Arrays.asList(row)));
}
}
// Then build a bag from the tuple list
DataBag resultBag = bagFactory.newDefaultBag(tupleList);
// The returned result is a new Tuple with the newly constructed DataBag
// as the first item.
List