All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.anomaly.SingularSpectrumTransformUDF Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package hivemall.anomaly;

import hivemall.UDFWithOptions;
import hivemall.annotations.Since;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.lang.Preconditions;
import hivemall.utils.lang.Primitives;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;

/**
 * Change-point detection based on Singular Spectrum Transformation (SST).
 *
 * References:
 * 
    *
  • T. Ide and K. Inoue, "Knowledge Discovery from Heterogeneous Dynamic Systems using * Change-Point Correlations", SDM'05.
  • *
  • T. Ide and K. Tsuda, "Change-point detection using Krylov subspace learning", SDM'07.
  • *
*/ @Description(name = "sst", value = "_FUNC_(double|array x [, const string options])" + " - Returns change-point scores and decisions using Singular Spectrum Transformation (SST)." + " It will return a tuple ") @UDFType(deterministic = false, stateful = true) @Since(version = "0.5-rc.1") public final class SingularSpectrumTransformUDF extends UDFWithOptions { private transient Parameters _params; private transient SingularSpectrumTransform _sst; private transient double[] _scores; private transient Object[] _result; private transient DoubleWritable _changepointScore; @Nullable private transient BooleanWritable _isChangepoint; public SingularSpectrumTransformUDF() {} // Visible for testing Parameters getParameters() { return _params; } @Override protected Options getOptions() { Options opts = new Options(); opts.addOption("w", "window", true, "Number of samples which affects change-point score [default: 30]"); opts.addOption("n", "n_past", true, "Number of past windows for change-point scoring [default: equal to `w` = 30]"); opts.addOption("m", "n_current", true, "Number of current windows for change-point scoring [default: equal to `w` = 30]"); opts.addOption("g", "current_offset", true, "Offset of the current windows from the updating sample [default: `-w` = -30]"); opts.addOption("r", "n_component", true, "Number of singular vectors (i.e. principal components) [default: 3]"); opts.addOption("k", "n_dim", true, "Number of dimensions for the Krylov subspaces [default: 5 (`2*r` if `r` is even, `2*r-1` otherwise)]"); opts.addOption("score", "scorefunc", true, "Score function [default: svd, ika]"); opts.addOption("th", "threshold", true, "Score threshold (inclusive) for determining change-point existence [default: -1, do not output decision]"); return opts; } @Override protected CommandLine processOptions(String optionValues) throws UDFArgumentException { CommandLine cl = parseOptions(optionValues); this._params.w = Primitives.parseInt(cl.getOptionValue("w"), _params.w); this._params.n = Primitives.parseInt(cl.getOptionValue("n"), _params.w); this._params.m = Primitives.parseInt(cl.getOptionValue("m"), _params.w); this._params.g = Primitives.parseInt(cl.getOptionValue("g"), -1 * _params.w); this._params.r = Primitives.parseInt(cl.getOptionValue("r"), _params.r); this._params.k = Primitives.parseInt(cl.getOptionValue("k"), (_params.r % 2 == 0) ? (2 * _params.r) : (2 * _params.r - 1)); this._params.scoreFunc = ScoreFunction.resolve(cl.getOptionValue("scorefunc", ScoreFunction.svd.name())); if ((_params.w != _params.n || _params.w != _params.m) && _params.scoreFunc == ScoreFunction.ika) { throw new UDFArgumentException("IKA-based efficient SST requires w = n = m"); } this._params.changepointThreshold = Primitives.parseDouble(cl.getOptionValue("th"), _params.changepointThreshold); Preconditions.checkArgument(_params.w >= 2, UDFArgumentException.class, "w must be greater than 1: " + _params.w); Preconditions.checkArgument(_params.r >= 1, UDFArgumentException.class, "r must be greater than 0: " + _params.r); Preconditions.checkArgument(_params.k >= 1, UDFArgumentException.class, "k must be greater than 0: " + _params.k); Preconditions.checkArgument(_params.k >= _params.r, UDFArgumentException.class, "k must be equals to or greater than r: k=" + _params.k + ", r" + _params.r); Preconditions.checkArgument( _params.changepointThreshold > 0.d && _params.changepointThreshold < 1.d, UDFArgumentException.class, "changepointThreshold must be in range (0, 1): " + _params.changepointThreshold); return cl; } @Override public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs) throws UDFArgumentException { if (argOIs.length < 1 || argOIs.length > 2) { throw new UDFArgumentException( "_FUNC_(double|array x [, const string options]) takes 1 or 2 arguments: " + Arrays.toString(argOIs)); } this._params = new Parameters(); if (argOIs.length == 2) { String options = HiveUtils.getConstString(argOIs[1]); processOptions(options); } ObjectInspector argOI0 = argOIs[0]; PrimitiveObjectInspector xOI = HiveUtils.asDoubleCompatibleOI(argOI0); this._sst = new SingularSpectrumTransform(_params, xOI); this._scores = new double[1]; final Object[] result; final ArrayList fieldNames = new ArrayList(); final ArrayList fieldOIs = new ArrayList(); fieldNames.add("changepoint_score"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); if (_params.changepointThreshold != -1d) { fieldNames.add("is_changepoint"); fieldOIs.add(PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); result = new Object[2]; this._isChangepoint = new BooleanWritable(false); result[1] = _isChangepoint; } else { result = new Object[1]; } this._changepointScore = new DoubleWritable(0.d); result[0] = _changepointScore; this._result = result; return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } @Override public Object[] evaluate(@Nonnull DeferredObject[] args) throws HiveException { Object x = args[0].get(); if (x == null) { return _result; } _sst.update(x, _scores); double changepointScore = _scores[0]; _changepointScore.set(changepointScore); if (_isChangepoint != null) { _isChangepoint.set(changepointScore >= _params.changepointThreshold); } return _result; } @Override public void close() throws IOException { this._result = null; this._changepointScore = null; this._isChangepoint = null; } @Override public String getDisplayString(String[] children) { return "sst(" + Arrays.toString(children) + ")"; } static final class Parameters { int w = 30; int n = 30; int m = 30; int g = -30; int r = 3; int k = 5; @Nonnull ScoreFunction scoreFunc = ScoreFunction.svd; double changepointThreshold = -1.d; Parameters() {} void set(@Nonnull ScoreFunction func) { this.scoreFunc = func; } } public interface SingularSpectrumTransformInterface { void update(@Nonnull Object arg, @Nonnull double[] outScores) throws HiveException; } public enum ScoreFunction { svd, ika; static ScoreFunction resolve(@Nullable final String name) throws UDFArgumentException { if (svd.name().equalsIgnoreCase(name)) { return svd; } else if (ika.name().equalsIgnoreCase(name)) { return ika; } else { throw new UDFArgumentException("Unsupported ScoreFunction: " + name); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy