All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.anomaly.ChangeFinderUDF Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.anomaly;

import hivemall.UDFWithOptions;
import hivemall.annotations.Since;
import hivemall.utils.collections.DoubleRingBuffer;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.Preconditions;
import hivemall.utils.lang.Primitives;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;

@Description(name = "changefinder",
        value = "_FUNC_(double|array x [, const string options])"
                + " - Returns outlier/change-point scores and decisions using ChangeFinder."
                + " It will return a tuple = 2, "K must be greater than 1: " + _params.k);
        Preconditions.checkArgument(_params.r1 > 0.d && _params.r1 < 1.d,
            "r1 must be in range (0,1): " + _params.r1);
        Preconditions.checkArgument(_params.r2 > 0.d && _params.r2 < 1.d,
            "r2 must be in range (0,1): " + _params.r2);
        Preconditions.checkArgument(_params.T1 >= 2, "T1 must be greater than 1: " + _params.T1);
        Preconditions.checkArgument(_params.T2 >= 2, "T2 must be greater than 1: " + _params.T2);

        return cl;
    }

    @Override
    public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs)
            throws UDFArgumentException {
        if (argOIs.length < 1 || argOIs.length > 2) {
            throw new UDFArgumentException(
                "_FUNC_(double|array x [, const string options]) takes 1 or 2 arguments: "
                        + Arrays.toString(argOIs));
        }

        this._params = new Parameters();
        if (argOIs.length == 2) {
            String options = HiveUtils.getConstString(argOIs[1]);
            processOptions(options);
        }

        ObjectInspector argOI0 = argOIs[0];
        if (HiveUtils.isListOI(argOI0)) {
            ListObjectInspector listOI = HiveUtils.asListOI(argOI0);
            this._changeFinder = new ChangeFinder2D(_params, listOI);
        } else if (HiveUtils.isNumberOI(argOI0)) {
            PrimitiveObjectInspector xOI = HiveUtils.asDoubleCompatibleOI(argOI0);
            this._changeFinder = new ChangeFinder1D(_params, xOI);
        }

        this._scores = new double[2];

        final Object[] result;
        final ArrayList fieldNames = new ArrayList();
        final ArrayList fieldOIs = new ArrayList();
        fieldNames.add("outlier_score");
        fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
        fieldNames.add("changepoint_score");
        fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
        if (_params.outlierThreshold != -1d) {
            fieldNames.add("is_outlier");
            fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
            this._isOutlier = new BooleanWritable(false);
            if (_params.changepointThreshold != -1d) {
                fieldNames.add("is_changepoint");
                fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
                result = new Object[4];
                this._isChangepoint = new BooleanWritable(false);
                result[3] = _isChangepoint;
            } else {
                result = new Object[3];
            }
            result[2] = _isOutlier;
        } else {
            result = new Object[2];
        }
        this._outlierScore = new DoubleWritable(0d);
        result[0] = _outlierScore;
        this._changepointScore = new DoubleWritable(0d);
        result[1] = _changepointScore;
        this._result = result;

        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    @Override
    public Object[] evaluate(@Nonnull DeferredObject[] args) throws HiveException {
        Object x = args[0].get();
        if (x == null) {
            return _result;
        }

        _changeFinder.update(x, _scores);

        double outlierScore = _scores[0];
        double changepointScore = _scores[1];
        _outlierScore.set(outlierScore);
        _changepointScore.set(changepointScore);
        if (_isOutlier != null) {
            _isOutlier.set(outlierScore >= _params.outlierThreshold);
            if (_isChangepoint != null) {
                _isChangepoint.set(changepointScore >= _params.changepointThreshold);
            }
        }

        return _result;
    }

    @Override
    public void close() throws IOException {
        this._result = null;
        this._outlierScore = null;
        this._changepointScore = null;
        this._isOutlier = null;
        this._isChangepoint = null;
    }

    @Override
    public String getDisplayString(String[] children) {
        return "changefinder(" + Arrays.toString(children) + ")";
    }

    static final class Parameters {
        int k = 7;
        double r1 = 0.02d;
        double r2 = 0.02d;
        int T1 = 7;
        int T2 = 7;
        double outlierThreshold = -1d;
        double changepointThreshold = -1d;
        LossFunction lossFunc1 = LossFunction.hellinger;
        LossFunction lossFunc2 = LossFunction.hellinger;

        Parameters() {}

        void set(@Nonnull LossFunction func) {
            this.lossFunc1 = func;
            this.lossFunc2 = func;
        }

    }

    public interface ChangeFinder {
        void update(@Nonnull Object arg, @Nonnull double[] outScores) throws HiveException;
    }

    static double smoothing(@Nonnull final DoubleRingBuffer scores) {
        double sum = 0.d;
        for (double score : scores.getRing()) {
            sum += score;
        }
        int size = scores.size();
        return sum / size;
    }

    public enum LossFunction {
        logloss, hellinger;

        static LossFunction resolve(@Nullable final String name) {
            if (logloss.name().equalsIgnoreCase(name)) {
                return logloss;
            } else if (hellinger.name().equalsIgnoreCase(name)) {
                return hellinger;
            } else {
                throw new IllegalArgumentException("Unsupported LossFunction: " + name);
            }
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy