
moa.clusterers.outliers.MCOD.ISBIndex Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA
provides a framework for data stream mining and includes tools for evaluation
and a collection of machine learning algorithms. Related to the WEKA project,
also written in Java, while scaling to more demanding problems.
/*
* ISBIndex.java
* Copyright (C) 2013 Aristotle University of Thessaloniki, Greece
* @author D. Georgiadis, A. Gounaris, A. Papadopoulos, K. Tsichlas, Y. Manolopoulos
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
*/
package moa.clusterers.outliers.MCOD;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import com.yahoo.labs.samoa.instances.Instance;
public class ISBIndex {
public static class ISBNode implements Comparable {
public static enum NodeType { OUTLIER, INLIER_MC, INLIER_PD }
public Instance inst;
public StreamObj obj;
public Long id;
public MicroCluster mc;
public Set Rmc;
public int count_after;
public NodeType nodeType;
private ArrayList nn_before;
// statistics
public int nOutlier;
public int nInlier;
public ISBNode(Instance inst, StreamObj obj, Long id) {
this.inst = inst;
this.obj = obj;
this.id = id;
// init statistics
nOutlier = 0;
nInlier = 0;
// init other fields
InitNode();
}
public void InitNode() {
this.mc = null;
this.Rmc = new TreeSet();
this.count_after = 1;
this.nodeType = NodeType.INLIER_PD;
this.nn_before = new ArrayList();
}
@Override
public int compareTo(ISBNode t) {
if (this.id > t.id)
return +1;
else if (this.id < t.id)
return -1;
return 0;
}
public void AddPrecNeigh(ISBNode node) {
int pos = Collections.binarySearch(nn_before, node);
if (pos < 0) {
// item does not exist, so add it to the right position
nn_before.add(-(pos + 1), node);
}
}
public void RemovePrecNeigh(ISBNode node) {
int pos = Collections.binarySearch(nn_before, node);
if (pos >= 0) {
// item exists
nn_before.remove(pos);
}
}
public ISBNode GetMinPrecNeigh(Long sinceId) {
if (nn_before.size() > 0) {
int startPos;
ISBNode dummy = new ISBNode(null, null, sinceId);
int pos = Collections.binarySearch(nn_before, dummy);
if (pos < 0) {
// item does not exist, should insert at position startPos
startPos = -(pos + 1);
} else {
// item exists at startPos
startPos = pos;
}
if (startPos < nn_before.size()) {
return nn_before.get(startPos);
}
}
return null;
}
public int CountPrecNeighs(Long sinceId) {
if (nn_before.size() > 0) {
// get number of neighs with id >= sinceId
int startPos;
ISBNode dummy = new ISBNode(null, null, sinceId);
int pos = Collections.binarySearch(nn_before, dummy);
if (pos < 0) {
// item does not exist, should insert at position startPos
startPos = -(pos + 1);
} else {
// item exists at startPos
startPos = pos;
}
if (startPos < nn_before.size()) {
return nn_before.size() - startPos;
}
}
return 0;
}
public List Get_nn_before() {
return nn_before;
}
}
MTreeStreamObjects mtree;
Map> mapNodes;
double m_radius;
int m_k; // k nearest neighbors
public ISBIndex(double radius, int k) {
mtree = new MTreeStreamObjects();
mapNodes = new HashMap>();
m_radius = radius;
m_k = k;
}
Vector GetAllNodes() {
Vector v = new Vector();
Iterator it = mapNodes.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pairs = (Map.Entry) it.next();
Set setNodes = (Set) pairs.getValue();
for (ISBNode n : setNodes) {
v.add(n);
}
}
return v;
}
public static class ISBSearchResult {
public ISBNode node;
public double distance;
public ISBSearchResult(ISBNode n, double distance) {
this.node = n;
this.distance = distance;
}
}
public Vector RangeSearch(ISBNode node, double radius) {
Vector results = new Vector();
StreamObj obj;
double d;
MTreeStreamObjects.Query query = mtree.getNearestByRange(node.obj, radius);
for (MTreeStreamObjects.ResultItem q : query) {
// get next obj found within range
obj = q.data;
// get distance of obj from query
d = q.distance;
// get all nodes referencing obj
Vector nodes = MapGetNodes(obj);
for (int i = 0; i < nodes.size(); i++)
results.add(new ISBSearchResult(nodes.get(i), d));
}
return results;
}
public void Insert(ISBNode node) {
// insert object of node at mtree
mtree.add(node.obj);
// insert node at map
MapInsert(node);
}
public void Remove(ISBNode node) {
// remove from map
MapDelete(node);
// check if stream object at mtree is still being referenced
if (MapCountObjRefs(node.obj) <= 0) {
// delete stream object from mtree
mtree.remove(node.obj);
}
}
Vector MapGetNodes(StreamObj obj) {
int h = obj.hashCode();
Vector v = new Vector();
if (mapNodes.containsKey(h)) {
Set s = mapNodes.get(h);
ISBNode node;
Iterator i = s.iterator();
while (i.hasNext()) {
node = i.next();
if (node.obj.equals(obj))
v.add(node);
}
}
return v;
}
int MapCountObjRefs(StreamObj obj) {
int h = obj.hashCode();
int iCount = 0;
if (mapNodes.containsKey(h)) {
Set s = mapNodes.get(h);
ISBNode n;
Iterator i = s.iterator();
while (i.hasNext()) {
n = i.next();
if (n.obj.equals(obj))
iCount++;
}
}
return iCount;
}
void MapInsert(ISBNode node) {
int h = node.obj.hashCode();
Set s;
if (mapNodes.containsKey(h)) {
s = mapNodes.get(h);
s.add(node);
}
else {
s = new HashSet();
s.add(node);
mapNodes.put(h, s);
}
}
void MapDelete(ISBNode node) {
int h = node.obj.hashCode();
if (mapNodes.containsKey(h)) {
Set s = mapNodes.get(h);
s.remove(node);
if (s.isEmpty()) { // ### added
mapNodes.remove(h);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy