org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.generic;
import java.util.ArrayDeque;
import java.util.Deque;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@Description(name = "max", value = "_FUNC_(expr) - Returns the maximum value of expr")
public class GenericUDAFMax extends AbstractGenericUDAFResolver {
static final Log LOG = LogFactory.getLog(GenericUDAFMax.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Exactly one argument is expected.");
}
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
if (!ObjectInspectorUtils.compareSupported(oi)) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Cannot support comparison of map<> type or complex type containing map<>.");
}
return new GenericUDAFMaxEvaluator();
}
@UDFType(distinctLike=true)
public static class GenericUDAFMaxEvaluator extends GenericUDAFEvaluator {
private transient ObjectInspector inputOI;
private transient ObjectInspector outputOI;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
assert (parameters.length == 1);
super.init(m, parameters);
inputOI = parameters[0];
// Copy to Java object because that saves object creation time.
// Note that on average the number of copies is log(N) so that's not
// very important.
outputOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
ObjectInspectorCopyOption.JAVA);
return outputOI;
}
/** class for storing the current max value */
static class MaxAgg extends AbstractAggregationBuffer {
Object o;
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
MaxAgg result = new MaxAgg();
return result;
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
MaxAgg myagg = (MaxAgg) agg;
myagg.o = null;
}
boolean warned = false;
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
assert (parameters.length == 1);
merge(agg, parameters[0]);
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
return terminate(agg);
}
@Override
public void merge(AggregationBuffer agg, Object partial)
throws HiveException {
if (partial != null) {
MaxAgg myagg = (MaxAgg) agg;
int r = ObjectInspectorUtils.compare(myagg.o, outputOI, partial, inputOI);
if (myagg.o == null || r < 0) {
myagg.o = ObjectInspectorUtils.copyToStandardObject(partial, inputOI,
ObjectInspectorCopyOption.JAVA);
}
}
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
MaxAgg myagg = (MaxAgg) agg;
return myagg.o;
}
@Override
public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
BoundaryDef start = wFrmDef.getStart();
BoundaryDef end = wFrmDef.getEnd();
return new MaxStreamingFixedWindow(this, start.getAmt(), end.getAmt());
}
}
/*
* Based on the Paper by Daniel Lemire: Streaming Max-Min filter using no more
* than 3 comparisons per elem.
*
* 1. His algorithm works on fixed size windows up to the current row. For row
* 'i' and window 'w' it computes the min/max for window (i-w, i). 2. The core
* idea is to keep a queue of (max, idx) tuples. A tuple in the queue
* represents the max value in the range (prev tuple.idx, idx). Using the
* queue data structure and following 2 operations it is easy to see that
* maxes can be computed: - on receiving the ith row; drain the queue from the
* back of any entries whose value is less than the ith entry; add the ith
* value as a tuple in the queue (i-val, i) - on the ith step, check if the
* element at the front of the queue has reached its max range of influence;
* i.e. frontTuple.idx + w > i. If yes we can remove it from the queue. - on
* the ith step o/p the front of the queue as the max for the ith entry.
*
* Here we modify the algorithm: 1. to handle window's that are of the form
* (i-p, i+f), where p is numPreceding,f = numFollowing - we start outputing
* rows only after receiving f rows. - the formula for 'influence range' of an
* idx accounts for the following rows. 2. optimize for the case when
* numPreceding is Unbounded. In this case only 1 max needs to be tarcked at
* any given time.
*/
static class MaxStreamingFixedWindow extends
GenericUDAFStreamingEvaluator
© 2015 - 2025 Weber Informatics LLC | Privacy Policy