hivemall.tools.array.ArraySumUDAF Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package hivemall.tools.array;
import hivemall.utils.lang.ArrayUtils;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nonnull;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
//@formatter:off
@SuppressWarnings("deprecation")
@Description(name = "array_sum", value = "_FUNC_(array) - Returns an array"
+ " in which each element is summed up",
extended = "WITH input as (\n" +
" select array(1.0, 2.0, 3.0) as nums\n" +
" UNION ALL\n" +
" select array(2.0, 3.0, 4.0) as nums\n" +
")\n" +
"select\n" +
" array_sum(nums)\n" +
"from\n" +
" input;\n" +
"\n" +
"[\"3.0\",\"5.0\",\"7.0\"]"
)
// @formatter:on
public final class ArraySumUDAF extends UDAF {
public ArraySumUDAF() {}
public static class Evaluator implements UDAFEvaluator {
private PartialResult partial;
public Evaluator() {}
@Override
public void init() {
this.partial = null;
}
public boolean iterate(List tuple) throws HiveException {
if (tuple == null) {
return true;
}
if (tuple.isEmpty()) {// a special case
return true;
}
final int size = tuple.size();
if (partial == null) {
this.partial = new PartialResult(size);
}
partial.update(tuple);
return true;
}
public PartialResult terminatePartial() {
return partial;
}
public boolean merge(PartialResult other) throws HiveException {
if (other == null) {
return true;
}
if (partial == null) {
this.partial = new PartialResult(other._size);
}
partial.merge(other);
return true;
}
public List terminate() {
if (partial == null) {
return null;
}
final int size = partial._size;
final List sum = partial._sum;
final DoubleWritable[] ary = new DoubleWritable[size];
for (int i = 0; i < size; i++) {
Double d = sum.get(i);
ary[i] = new DoubleWritable(d.doubleValue());
}
return Arrays.asList(ary);
}
}
public static class PartialResult {
int _size;
// note that primitive array cannot be serialized by JDK serializer
List _sum;
public PartialResult() {}
PartialResult(int size) throws HiveException {
assert (size > 0) : size;
this._size = size;
this._sum = ArrayUtils.toList(new double[size]);
}
void update(@Nonnull final List tuple) throws HiveException {
if (tuple.size() != _size) {// a corner case
throw new HiveException(
"Mismatch in the number of elements at tuple: " + tuple.toString());
}
final List sum = _sum;
for (int i = 0, len = _size; i < len; i++) {
Double v = tuple.get(i);
if (v != null) {
double d = sum.get(i).doubleValue() + v.doubleValue();
sum.set(i, Double.valueOf(d));
}
}
}
void merge(PartialResult other) throws HiveException {
if (other._size != _size) {
throw new HiveException("Mismatch in the number of elements");
}
final List sum = _sum, o_sum = other._sum;
for (int i = 0, len = _size; i < len; i++) {
double d = sum.get(i).doubleValue() + o_sum.get(i).doubleValue();
sum.set(i, Double.valueOf(d));
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy