hivemall.ftvec.trans.QuantifiedFeaturesUDTF Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package hivemall.ftvec.trans;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.Identifier;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
@Description(name = "quantified_features",
value = "_FUNC_(boolean output, col1, col2, ...) - Returns an identified features in a dense array")
public final class QuantifiedFeaturesUDTF extends GenericUDTF {
private BooleanObjectInspector boolOI;
private PrimitiveObjectInspector[] doubleOIs;
private Identifier[] identifiers;
private DoubleWritable[] columnValues;
// lazy instantiation to avoid org.apache.hive.com.esotericsoftware.kryo.KryoException: java.lang.NullPointerException
private transient Object[] forwardObjs;
@SuppressWarnings("unchecked")
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
int size = argOIs.length;
if (size < 2) {
throw new UDFArgumentException(
"quantified_features takes at least two arguments: " + size);
}
this.boolOI = HiveUtils.asBooleanOI(argOIs[0]);
int outputSize = size - 1;
this.doubleOIs = new PrimitiveObjectInspector[outputSize];
this.identifiers = new Identifier[outputSize];
this.columnValues = new DoubleWritable[outputSize];
for (int i = 0; i < outputSize; i++) {
ObjectInspector argOI = argOIs[i + 1];
if (HiveUtils.isNumberOI(argOI)) {
doubleOIs[i] = HiveUtils.asDoubleCompatibleOI(argOI);
} else {
identifiers[i] = new Identifier();
}
columnValues[i] = new DoubleWritable(Double.NaN);
}
this.forwardObjs = null;
List fieldNames = new ArrayList(outputSize);
List fieldOIs = new ArrayList(outputSize);
fieldNames.add("features");
fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
boolean outputRow = boolOI.get(args[0]);
if (outputRow) {
final DoubleWritable[] values = this.columnValues;
if (forwardObjs == null) {
this.forwardObjs = new Object[] {Arrays.asList(values)};
}
// updating columnValues simultaneously changes forwardObjs
for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) {
Object arg = args[i + 1];
Identifier identifier = identifiers[i];
if (identifier == null) {
double v = PrimitiveObjectInspectorUtils.getDouble(arg, doubleOIs[i]);
values[i].set(v);
} else {
if (arg == null) {
throw new HiveException(
"Found Null in the input: " + Arrays.toString(args));
} else {
String k = arg.toString();
int id = identifier.valueOf(k);
values[i].set(id);
}
}
}
forward(forwardObjs);
} else {// load only
for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) {
Identifier identifier = identifiers[i];
if (identifier != null) {
Object arg = args[i + 1];
if (arg != null) {
String k = arg.toString();
identifier.valueOf(k);
}
}
}
}
}
@Override
public void close() throws HiveException {
this.boolOI = null;
this.doubleOIs = null;
this.identifiers = null;
this.columnValues = null;
this.forwardObjs = null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy