All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.clusterers.outliers.Angiulli.ApproxSTORM Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

The newest version!
/*
 *    ApproxSTORM.java
 *    Copyright (C) 2013 Aristotle University of Thessaloniki, Greece
 *    @author D. Georgiadis, A. Gounaris, A. Papadopoulos, K. Tsichlas, Y. Manolopoulos
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *    
 *    
 */

package moa.clusterers.outliers.Angiulli;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import java.util.Vector;
import moa.clusterers.outliers.Angiulli.ISBIndex.ISBNode;
import moa.clusterers.outliers.Angiulli.ISBIndex.ISBSearchResult;
import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import com.yahoo.labs.samoa.instances.Instance;


public class ApproxSTORM extends STORMBase {
    public class ISBNodeAppr extends ISBNode {
        public Long count_after, count_before;
        public double fract_before;
        
        public ISBNodeAppr(Instance inst, StreamObj obj, Long id, int k) {
            super(inst, obj, id);
            m_k = k;
            count_after = 0L;
            count_before = 0L;
            fract_before = 0;
        }
    }
    
    public FloatOption radiusOption = new FloatOption("radius", 'r', "Search radius.", 0.1);
    public IntOption kOption = new IntOption("k", 't', "Parameter k.", 50);
    public IntOption queryFreqOption = new IntOption("queryFreq", 'q', "Query frequency.", 1);
    public FloatOption pOption = new FloatOption("p", 'p', "Parameter p.", 0.1);
    
    Set safe_inliers; // list of safe inliers
    int m_FractWindowSize;  
    Random m_Random;
    
    @Override
    public String getObjectInfo(Object obj) {
        if (obj == null) return null;
        
        ISBNodeAppr node = (ISBNodeAppr) obj;
        
        ArrayList infoTitle = new ArrayList();
        ArrayList infoValue = new ArrayList();
        StringBuilder sb = new StringBuilder();

        // show node position
        for (int i = 0; i < node.obj.dimensions(); i++) {
            infoTitle.add("Dim" + (i+1));
            infoValue.add(String.format("%.3f", node.obj.get(i)));
        }
        
        // show node properties
        infoTitle.add("id");
        infoValue.add(String.format("%d", node.id));
        infoTitle.add("count_after");
        infoValue.add(String.format("%d", node.count_after));
        infoTitle.add("count_before");
        infoValue.add(String.format("%d", node.count_before));
        
        sb.append("");
        sb.append("");
        int i = 0;
        while(i < infoTitle.size() && i < infoValue.size()){
            sb.append("");
            i++;
        }
        sb.append("
"+infoTitle.get(i)+":"+infoValue.get(i)+"
"); sb.append(""); return sb.toString(); } public ApproxSTORM() { // System.out.println("DistanceOutliersAppr: created"); } @Override public void Init() { super.Init(); m_WindowSize = windowSizeOption.getValue(); m_radius = radiusOption.getValue(); m_k = kOption.getValue(); m_QueryFreq = queryFreqOption.getValue(); m_FractWindowSize = (int) (pOption.getValue() * m_WindowSize); Println("Init DistanceOutliersAppr:"); Println(" window_size: " + m_WindowSize); Println(" radius: " + m_radius); Println(" k: " + m_k); Println(" query_freq: " + m_QueryFreq); m_Random = new Random(); objId = FIRST_OBJ_ID; // init object identifier // create fifo windowNodes = new Vector(); // create ISB ISB = new ISBIndex(m_radius, m_k); // create safe_inliers list safe_inliers = new HashSet(); // init statistics m_nBothInlierOutlier = 0; m_nOnlyInlier = 0; m_nOnlyOutlier = 0; } void AddSafeInlier(ISBNode node) { safe_inliers.add(node); } ISBNode GetSafeInlier(int idx) { ISBNode node = null; Iterator it = safe_inliers.iterator(); while (idx >= 0) { node = (ISBNodeAppr)it.next(); idx--; } return node; } boolean IsSafeInlier(ISBNodeAppr node) { return node.count_after >= m_k; } void PrintSafeInliers() { Print("Safe inliers: "); ISBNode node; Iterator it = safe_inliers.iterator(); while (it.hasNext()) { node = (ISBNode) it.next(); Print(node.id + " "); } Println(" "); } void RemoveNode(ISBNode node) { // remove node from ISB ISB.Remove(node); // remove from fifo windowNodes.remove(node); // remove node from safe_inliers safe_inliers.remove(node); // remove from outliers RemoveExpiredOutlier(new Outlier(node.inst, node.id, node)); // update statistics UpdateStatistics(node); } void RemoveSafeInlier(ISBNode node) { // remove node from ISB ISB.Remove(node); // remove node from safe_inliers safe_inliers.remove(node); } void DeleteExpiredNode() { if (windowNodes.size() <= 0) return; // get oldest node ISBNode node = windowNodes.get(0); // check if node has expired if (node.id < GetWindowStart()) { if (bTrace) { Print("Delete expired node: "); PrintNode(node); } // remove node RemoveNode(node); } } @Override protected void ProcessNewStreamObj(Instance inst) { if (bShowProgress) ShowProgress("Processed " + objId + " stream objects."); // PrintInstance(inst); double[] values = getInstanceValues(inst); StreamObj obj = new StreamObj(values); if (bTrace) Println("\n- - - - - - - - - - - -\n"); // delete a node if it has expired DeleteExpiredNode(); // create new ISB node ISBNodeAppr nodeNew = new ISBNodeAppr(inst, obj, objId, m_k); if (bTrace) { Print("New obj: "); PrintNode(nodeNew); } // update object identifier objId++; // init nodeNew nodeNew.count_after = 1L; nodeNew.count_before = 0L; // perform range query search if (bTrace) Println("Perform range query seach:"); nRangeQueriesExecuted++; Vector nodes = ISB.RangeSearch(nodeNew, m_radius); // process each returned node int nSafeInliers; Long count_si_before = 0L; for (ISBSearchResult res : nodes) { ISBNodeAppr n = (ISBNodeAppr) res.node; if (bTrace) { Printf(" Found at d=%.2f: ", res.distance); PrintNode(res.node); } n.count_after++; if (IsSafeInlier(n)) { if (bTrace) Println(" Safe inlier: id=" + n.id); AddSafeInlier(n); count_si_before++; } nSafeInliers = safe_inliers.size(); if (nSafeInliers > m_FractWindowSize) { // get a random safe inlier: 0 <= idx < nSafeInliers int idx = m_Random.nextInt(nSafeInliers); ISBNode si = GetSafeInlier(idx); if (bTrace) Println(" Remove random safe inlier: id=" + si.id); // remove node from ISB and safe-inliers-list RemoveSafeInlier(si); } nodeNew.count_before++; } // Set fract_before of curr_node which is determined as the ratio // between the number of preceding neighbors of curr_node in ISB // which are safe inliers and the total number of safe inliers in // ISB, at the arrival time of curr_node. nSafeInliers = safe_inliers.size(); if (nSafeInliers > 0) { nodeNew.fract_before = (double)count_si_before / (double)nSafeInliers; } else { if (bTrace) Println("Set fract before: no safe inliers yet, set 0."); nodeNew.fract_before = 0; } if (bTrace) { Println("Node: "); Println(" count_after=" + nodeNew.count_after); Println(" count_before=" + nodeNew.count_before); Printf(" fract_before=%.3f\n", nodeNew.fract_before); Println("Insert new node to ISB."); } // insert node to ISB ISB.Insert(nodeNew); // insert node at window windowNodes.add(nodeNew); if (bTrace) { PrintWindow(); PrintSafeInliers(); } if (CanSearch()) { // invoke query function to detect outliers SearchOutliers(); } else { // update statistics outlierness of new node UpdateNodeStatistics(nodeNew); } } void SearchOutliers() { if (bTrace) Println("Invoke query: "); ISBNodeAppr node; // process each node in the ISB (also in window) for (int i = 0; i < windowNodes.size(); i++) { node = (ISBNodeAppr) windowNodes.get(i); if (bTrace) { Print(" Process node: "); PrintNode(node); } UpdateNodeType(node); } } void UpdateNodeType(ISBNodeAppr node) { double succ_neighs, prec_neighs; // get number of succeeding neighbors succ_neighs = node.count_after; if (bTrace) Println(" succ_neighs: " + succ_neighs); // get number of preceding neighbors prec_neighs = node.fract_before * (double)Math.abs((node.id + m_WindowSize) - GetWindowEnd()); if (bTrace) Println(" prec_neighs: " + prec_neighs); // check if node is an outlier if (succ_neighs + prec_neighs < m_k) { SaveOutlier(node); if (bTrace) { Print("*** Outlier: "); PrintNode(node); } } else { RemoveOutlier(node); } } void UpdateNodeStatistics(ISBNodeAppr node) { double succ_neighs = node.count_after; double prec_neighs = node.fract_before * (double)Math.abs((node.id + m_WindowSize) - GetWindowEnd()); if (succ_neighs + prec_neighs < m_k) { node.nOutlier++; // update statistics } else { node.nInlier++; // update statistics } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy