org.apache.pig.builtin.HiveUDAF Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.builtin;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.apache.pig.impl.util.hive.HiveUtils;
/**
* Use Hive UDAF or GenericUDAF.
* Example:
* define avg HiveUDAF('avg');
* A = load 'mydata' as (name:chararray, num:double);
* B = group A by name;
* C = foreach B generate group, avg(A.num);
*/
public class HiveUDAF extends HiveUDFBase implements Algebraic {
private boolean inited = false;
private String funcName;
private String params;
private GenericUDAFResolver udaf;
static class SchemaAndEvaluatorInfo {
private TypeInfo inputTypeInfo;
private TypeInfo outputTypeInfo;
private TypeInfo intermediateOutputTypeInfo;
private ObjectInspector[] inputObjectInspectorAsArray;
private ObjectInspector[] intermediateInputObjectInspectorAsArray;
private StructObjectInspector inputObjectInspector;
private ObjectInspector intermediateInputObjectInspector;
private ObjectInspector intermediateOutputObjectInspector;
private ObjectInspector outputObjectInspector;
private GenericUDAFEvaluator evaluator;
private static TypeInfo getInputTypeInfo(Schema inputSchema) throws IOException {
FieldSchema innerFieldSchema = inputSchema.getField(0).schema.getField(0);
ResourceFieldSchema rfs = new ResourceFieldSchema(innerFieldSchema);
TypeInfo inputTypeInfo = HiveUtils.getTypeInfo(rfs);
return inputTypeInfo;
}
private static ObjectInspector[] getInputObjectInspectorAsArray(TypeInfo inputTypeInfo,
ConstantObjectInspectInfo constantsInfo) throws IOException {
StructObjectInspector inputObjectInspector = (StructObjectInspector)HiveUtils.createObjectInspector(inputTypeInfo);
ObjectInspector[] arguments = new ObjectInspector[inputObjectInspector.getAllStructFieldRefs().size()];
for (int i=0;i {
private boolean inited = false;
private String funcName;
ConstantObjectInspectInfo constantsInfo;
private SchemaAndEvaluatorInfo schemaAndEvaluatorInfo = new SchemaAndEvaluatorInfo();
private static TupleFactory tf = TupleFactory.getInstance();
public Initial(String funcName) {
this.funcName = funcName;
}
public Initial(String funcName, String params) throws IOException {
this.funcName = funcName;
constantsInfo = ConstantObjectInspectInfo.parse(params);
}
@Override
public Tuple exec(Tuple input) throws IOException {
try {
if (!inited) {
schemaAndEvaluatorInfo.init(getInputSchema(), instantiateUDAF(funcName), Mode.PARTIAL1, constantsInfo);
inited = true;
}
DataBag b = (DataBag)input.get(0);
AggregationBuffer agg = schemaAndEvaluatorInfo.evaluator.getNewAggregationBuffer();
for (Iterator it = b.iterator(); it.hasNext();) {
Tuple t = it.next();
List inputs = schemaAndEvaluatorInfo.inputObjectInspector.getStructFieldsDataAsList(t);
schemaAndEvaluatorInfo.evaluator.iterate(agg, inputs.toArray());
}
Object returnValue = schemaAndEvaluatorInfo.evaluator.terminatePartial(agg);
Tuple result = tf.newTuple();
result.append(HiveUtils.convertHiveToPig(returnValue, schemaAndEvaluatorInfo.intermediateOutputObjectInspector, null));
return result;
} catch (Exception e) {
throw new IOException(e);
}
}
}
static public class Intermediate extends EvalFunc {
private boolean inited = false;
private String funcName;
ConstantObjectInspectInfo constantsInfo;
private SchemaAndEvaluatorInfo schemaAndEvaluatorInfo = new SchemaAndEvaluatorInfo();
private static TupleFactory tf = TupleFactory.getInstance();
public Intermediate(String funcName) {
this.funcName = funcName;
}
public Intermediate(String funcName, String params) throws IOException {
this.funcName = funcName;
constantsInfo = ConstantObjectInspectInfo.parse(params);
}
@Override
public Tuple exec(Tuple input) throws IOException {
try {
if (!inited) {
schemaAndEvaluatorInfo.init(getInputSchema(), instantiateUDAF(funcName), Mode.PARTIAL2, constantsInfo);
inited = true;
}
DataBag b = (DataBag)input.get(0);
AggregationBuffer agg = schemaAndEvaluatorInfo.evaluator.getNewAggregationBuffer();
for (Iterator it = b.iterator(); it.hasNext();) {
Tuple t = it.next();
schemaAndEvaluatorInfo.evaluator.merge(agg, t.get(0));
}
Object returnValue = schemaAndEvaluatorInfo.evaluator.terminatePartial(agg);
Tuple result = tf.newTuple();
result.append(HiveUtils.convertHiveToPig(returnValue, schemaAndEvaluatorInfo.intermediateOutputObjectInspector, null));
return result;
} catch (Exception e) {
throw new IOException(e);
}
}
}
static public class Final extends EvalFunc