All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.clusterers.denstream.WithDBSCAN Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/**
 * Subspace MOA [DenStream_DBSCAN.java]
 * 
 * DenStream with DBSCAN as the macro-clusterer.
 * 
 * @author Stephan Wels ([email protected])
 * @editor Yunsu Kim
 * Data Management and Data Exploration Group, RWTH Aachen University
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *    
 *    
 */

package moa.clusterers.denstream;

import java.util.ArrayList;

import moa.cluster.Cluster;
import moa.cluster.Clustering;
import moa.clusterers.AbstractClusterer;
import moa.clusterers.macro.dbscan.DBScan;
import moa.core.Measurement;
import moa.options.FloatOption;
import moa.options.IntOption;
import weka.core.DenseInstance;
import weka.core.Instance;

public class WithDBSCAN extends AbstractClusterer {
	
	private static final long serialVersionUID = 1L;
	
	public IntOption horizonOption = new IntOption("horizon", 'h',
			"Range of the window.", 1000);
	public FloatOption epsilonOption = new FloatOption("epsilon", 'e',
			"Defines the epsilon neighbourhood", 0.02, 0, 1);
	// public IntOption minPointsOption = new IntOption("minPoints", 'p',
	// "Minimal number of points cluster has to contain.", 10);

	public FloatOption betaOption = new FloatOption("beta", 'b', "", 0.2, 0,
			1);
	public FloatOption muOption = new FloatOption("mu", 'm', "", 1, 0,
			Double.MAX_VALUE);
	public IntOption initPointsOption = new IntOption("initPoints", 'i',
			"Number of points to use for initialization.", 1000);

	 public FloatOption offlineOption = new FloatOption("offline", 'o',
	 "offline multiplier for epsilion.", 2, 2, 20);
	 
	 public FloatOption lambdaOption = new FloatOption("lambda", 'l', "",
			 0.25,
			 0, 1);
	 
	 public IntOption speedOption = new IntOption("processingSpeed", 's',
				"Number of incoming points per time unit.", 100, 1, 1000);

	private double weightThreshold = 0.01;
	double lambda;
	double epsilon;
	int minPoints;
	double mu;
	double beta;

	Clustering p_micro_cluster;
	Clustering o_micro_cluster;
	ArrayList initBuffer;

	boolean initialized;
	private long timestamp = 0;
	Timestamp currentTimestamp;
	long tp;
	
	/* #point variables */
	protected int numInitPoints;
	protected int numProcessedPerUnit;
	protected int processingSpeed;
	// TODO Some variables to prevent duplicated processes

	private class DenPoint extends DenseInstance {
		
		private static final long serialVersionUID = 1L;
		
		protected boolean covered;

		public DenPoint(Instance nextInstance, Long timestamp) {
			super(nextInstance);
			this.setDataset(nextInstance.dataset());
		}
	}

	@Override
	public void resetLearningImpl() {
		// init DenStream
		currentTimestamp = new Timestamp();
//		lambda = -Math.log(weightThreshold) / Math.log(2)
//						/ (double) horizonOption.getValue();
		lambda = lambdaOption.getValue();

		epsilon = epsilonOption.getValue();
		minPoints = (int) muOption.getValue();// minPointsOption.getValue();
		mu = (int) muOption.getValue();
		beta = betaOption.getValue();

		initialized = false;
		p_micro_cluster = new Clustering();
		o_micro_cluster = new Clustering();
		initBuffer = new ArrayList();
		
		tp = Math.round(1 / lambda * Math.log((beta * mu) / (beta * mu - 1))) + 1;
		
		numProcessedPerUnit = 0;
		processingSpeed = speedOption.getValue();
	}

	public void initialDBScan() {
		for (int p = 0; p < initBuffer.size(); p++) {
			DenPoint point = initBuffer.get(p);
			if (!point.covered) {
				point.covered = true;
				ArrayList neighbourhood = getNeighbourhoodIDs(point,
						initBuffer, epsilon);
				if (neighbourhood.size() > minPoints) {
					MicroCluster mc = new MicroCluster(point,
							point.numAttributes(), timestamp, lambda,
							currentTimestamp);
					expandCluster(mc, initBuffer, neighbourhood);
					p_micro_cluster.add(mc);
				} else {
					point.covered = false;
				}
			}
		}
	}

	@Override
	public void trainOnInstanceImpl(Instance inst) {
		DenPoint point = new DenPoint(inst, timestamp);
		numProcessedPerUnit++;
		
		/* Controlling the stream speed */
		if (numProcessedPerUnit % processingSpeed == 0) {
			timestamp++;
			currentTimestamp.setTimestamp(timestamp);
		}		
		
		// ////////////////
		// Initialization//
		// ////////////////
		if (!initialized) {
			initBuffer.add(point);
			if (initBuffer.size() >= initPointsOption.getValue()) {
				initialDBScan();
				initialized = true;
			}
		} else {
			// ////////////
			// Merging(p)//
			// ////////////
			boolean merged = false;
			if (p_micro_cluster.getClustering().size() != 0) {
				MicroCluster x = nearestCluster(point, p_micro_cluster);
				MicroCluster xCopy = x.copy();
				xCopy.insert(point, timestamp);
				if (xCopy.getRadius(timestamp) <= epsilon) {
					x.insert(point, timestamp);
					merged = true;
				}
			}
			if (!merged && (o_micro_cluster.getClustering().size() != 0)) {
				MicroCluster x = nearestCluster(point, o_micro_cluster);
				MicroCluster xCopy = x.copy();
				xCopy.insert(point, timestamp);

				if (xCopy.getRadius(timestamp) <= epsilon) {
					x.insert(point, timestamp);
					merged = true;
					if (x.getWeight() > beta * mu) {
						o_micro_cluster.getClustering().remove(x);
						p_micro_cluster.getClustering().add(x);
					}
				}
			}
			if (!merged) {
				o_micro_cluster.getClustering().add(
						new MicroCluster(point.toDoubleArray(), point
								.toDoubleArray().length, timestamp, lambda,
								currentTimestamp));
			}

			// //////////////////////////
			// Periodic cluster removal//
			// //////////////////////////
			if (timestamp % tp == 0) {
				ArrayList removalList = new ArrayList();
				for (Cluster c : p_micro_cluster.getClustering()) {
					if (((MicroCluster) c).getWeight() < beta * mu) {
						removalList.add((MicroCluster) c);
					}
				}
				for (Cluster c : removalList) {
					p_micro_cluster.getClustering().remove(c);
				}

				for (Cluster c : o_micro_cluster.getClustering()) {
					long t0 = ((MicroCluster) c).getCreationTime();
					double xsi1 = Math
							.pow(2, (-lambda * (timestamp - t0 + tp))) - 1;
					double xsi2 = Math.pow(2, -lambda * tp) - 1;
					double xsi = xsi1 / xsi2;
					if (((MicroCluster) c).getWeight() < xsi) {
						removalList.add((MicroCluster) c);
					}
				}
				for (Cluster c : removalList) {
					o_micro_cluster.getClustering().remove(c);
				}
			}

		}
	}

	private void expandCluster(MicroCluster mc, ArrayList points,
			ArrayList neighbourhood) {
		for (int p : neighbourhood) {
			DenPoint npoint = points.get(p);
			if (!npoint.covered) {
				npoint.covered = true;
				mc.insert(npoint, timestamp);
				ArrayList neighbourhood2 = getNeighbourhoodIDs(npoint,
						initBuffer, epsilon);
				if (neighbourhood.size() > minPoints) {
					expandCluster(mc, points, neighbourhood2);
				}
			}
		}
	}

	private ArrayList getNeighbourhoodIDs(DenPoint point,
			ArrayList points, double eps) {
		ArrayList neighbourIDs = new ArrayList();
		for (int p = 0; p < points.size(); p++) {
			DenPoint npoint = points.get(p);
			if (!npoint.covered) {
				double dist = distance(point.toDoubleArray(), points.get(p)
						.toDoubleArray());
				if (dist < eps) {
					neighbourIDs.add(p);
				}
			}
		}
		return neighbourIDs;
	}

	private MicroCluster nearestCluster(DenPoint p, Clustering cl) {
		MicroCluster min = null;
		double minDist = 0;
		for (int c = 0; c < cl.size(); c++) {
			MicroCluster x = (MicroCluster) cl.get(c);
			if (min == null) {
				min = x;
			}
			double dist = distance(p.toDoubleArray(), x.getCenter());
			dist -= x.getRadius(timestamp);
			if (dist < minDist) {
				minDist = dist;
				min = x;
			}
		}
		return min;

	}

	private double distance(double[] pointA, double[] pointB) {
		double distance = 0.0;
		for (int i = 0; i < pointA.length; i++) {
			double d = pointA[i] - pointB[i];
			distance += d * d;
		}
		return Math.sqrt(distance);
	}

	public Clustering getClusteringResult() {
		DBScan dbscan = new DBScan(p_micro_cluster,offlineOption.getValue() * epsilon, minPoints);
		return dbscan.getClustering(p_micro_cluster);
	}

	@Override
	public boolean implementsMicroClusterer() {
		return true;
	}

	@Override
	public Clustering getMicroClusteringResult() {
		return p_micro_cluster;
	}

	@Override
	protected Measurement[] getModelMeasurementsImpl() {
		throw new UnsupportedOperationException("Not supported yet.");
	}

	@Override
	public void getModelDescription(StringBuilder out, int indent) {
	}

	public boolean isRandomizable() {
		return true;
	}

	public double[] getVotesForInstance(Instance inst) {
		return null;
	}
	
	public String getParameterString() {
		StringBuffer sb = new StringBuffer();
		sb.append(this.getClass().getSimpleName() + " ");

		sb.append("-" + horizonOption.getCLIChar() + " ");
		sb.append(horizonOption.getValueAsCLIString() + " ");

		sb.append("-" + epsilonOption.getCLIChar() + " ");
		sb.append(epsilonOption.getValueAsCLIString() + " ");

		sb.append("-" + betaOption.getCLIChar() + " ");
		sb.append(betaOption.getValueAsCLIString() + " ");

		sb.append("-" + muOption.getCLIChar() + " ");
		sb.append(muOption.getValueAsCLIString() + " ");

		sb.append("-" + lambdaOption.getCLIChar() + " ");
		sb.append(lambdaOption.getValueAsCLIString() + " ");

		sb.append("-" + initPointsOption.getCLIChar() + " ");
		// NO " " at the end! results in errors on windows systems
		sb.append(initPointsOption.getValueAsCLIString());

		return sb.toString();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy