hivemall.anomaly.ChangeFinderUDF Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package hivemall.anomaly;
import hivemall.UDFWithOptions;
import hivemall.annotations.Since;
import hivemall.utils.collections.DoubleRingBuffer;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.Preconditions;
import hivemall.utils.lang.Primitives;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;
@Description(name = "changefinder",
value = "_FUNC_(double|array x [, const string options])"
+ " - Returns outlier/change-point scores and decisions using ChangeFinder."
+ " It will return a tuple = 2, "K must be greater than 1: " + _params.k);
Preconditions.checkArgument(_params.r1 > 0.d && _params.r1 < 1.d,
"r1 must be in range (0,1): " + _params.r1);
Preconditions.checkArgument(_params.r2 > 0.d && _params.r2 < 1.d,
"r2 must be in range (0,1): " + _params.r2);
Preconditions.checkArgument(_params.T1 >= 2, "T1 must be greater than 1: " + _params.T1);
Preconditions.checkArgument(_params.T2 >= 2, "T2 must be greater than 1: " + _params.T2);
return cl;
}
@Override
public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs)
throws UDFArgumentException {
if (argOIs.length < 1 || argOIs.length > 2) {
throw new UDFArgumentException(
"_FUNC_(double|array x [, const string options]) takes 1 or 2 arguments: "
+ Arrays.toString(argOIs));
}
this._params = new Parameters();
if (argOIs.length == 2) {
String options = HiveUtils.getConstString(argOIs[1]);
processOptions(options);
}
ObjectInspector argOI0 = argOIs[0];
if (HiveUtils.isListOI(argOI0)) {
ListObjectInspector listOI = HiveUtils.asListOI(argOI0);
this._changeFinder = new ChangeFinder2D(_params, listOI);
} else if (HiveUtils.isNumberOI(argOI0)) {
PrimitiveObjectInspector xOI = HiveUtils.asDoubleCompatibleOI(argOI0);
this._changeFinder = new ChangeFinder1D(_params, xOI);
}
this._scores = new double[2];
final Object[] result;
final ArrayList fieldNames = new ArrayList();
final ArrayList fieldOIs = new ArrayList();
fieldNames.add("outlier_score");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
fieldNames.add("changepoint_score");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
if (_params.outlierThreshold != -1d) {
fieldNames.add("is_outlier");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
this._isOutlier = new BooleanWritable(false);
if (_params.changepointThreshold != -1d) {
fieldNames.add("is_changepoint");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
result = new Object[4];
this._isChangepoint = new BooleanWritable(false);
result[3] = _isChangepoint;
} else {
result = new Object[3];
}
result[2] = _isOutlier;
} else {
result = new Object[2];
}
this._outlierScore = new DoubleWritable(0d);
result[0] = _outlierScore;
this._changepointScore = new DoubleWritable(0d);
result[1] = _changepointScore;
this._result = result;
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public Object[] evaluate(@Nonnull DeferredObject[] args) throws HiveException {
Object x = args[0].get();
if (x == null) {
return _result;
}
_changeFinder.update(x, _scores);
double outlierScore = _scores[0];
double changepointScore = _scores[1];
_outlierScore.set(outlierScore);
_changepointScore.set(changepointScore);
if (_isOutlier != null) {
_isOutlier.set(outlierScore >= _params.outlierThreshold);
if (_isChangepoint != null) {
_isChangepoint.set(changepointScore >= _params.changepointThreshold);
}
}
return _result;
}
@Override
public void close() throws IOException {
this._result = null;
this._outlierScore = null;
this._changepointScore = null;
this._isOutlier = null;
this._isChangepoint = null;
}
@Override
public String getDisplayString(String[] children) {
return "changefinder(" + Arrays.toString(children) + ")";
}
static final class Parameters {
int k = 7;
double r1 = 0.02d;
double r2 = 0.02d;
int T1 = 7;
int T2 = 7;
double outlierThreshold = -1d;
double changepointThreshold = -1d;
LossFunction lossFunc1 = LossFunction.hellinger;
LossFunction lossFunc2 = LossFunction.hellinger;
Parameters() {}
void set(@Nonnull LossFunction func) {
this.lossFunc1 = func;
this.lossFunc2 = func;
}
}
public interface ChangeFinder {
void update(@Nonnull Object arg, @Nonnull double[] outScores) throws HiveException;
}
static double smoothing(@Nonnull final DoubleRingBuffer scores) {
double sum = 0.d;
for (double score : scores.getRing()) {
sum += score;
}
int size = scores.size();
return sum / size;
}
public enum LossFunction {
logloss, hellinger;
static LossFunction resolve(@Nullable final String name) {
if (logloss.name().equalsIgnoreCase(name)) {
return logloss;
} else if (hellinger.name().equalsIgnoreCase(name)) {
return hellinger;
} else {
throw new IllegalArgumentException("Unsupported LossFunction: " + name);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy