All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.clusterers.streamkm.BucketManager Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
package moa.clusterers.streamkm;


/**
 *
 * @author Marcel R. Ackermann, Christiane Lammersen, Marcus Maertens, Christoph Raupach, 
Christian Sohler, Kamil Swierkot
 */
public class BucketManager  {

	protected class Bucket {
		int cursize;
		Point[] points;
		Point[] spillover;
		
		public Bucket(int d, int maxsize){
			this.cursize = 0;
			this.points = new Point[maxsize];
			this.spillover = new Point[maxsize];
			for(int i=0; i= this.maxBucketsize) {
			//printf("Bucket 0 full \n");
			//start spillover process
			int curbucket  = 0;
			int nextbucket = 1;

			//check if the next bucket is empty
			if(this.buckets[nextbucket].cursize == 0){
				//copy the bucket	
				int i;
				for(i=0; i n is a power of 2 and we return the contents of the last bucket

	Case2: the last bucket is not full
	=> we compute a coreset of all nonempty buckets 

	this operation should only be called after the streaming process is finished
	**/
	Point[] getCoresetFromManager(int d){
		Point[] coreset = new Point[d];
		int i = 0;
		if(this.buckets[this.numberOfBuckets-1].cursize == this.maxBucketsize){
			coreset = this.buckets[this.numberOfBuckets-1].points;

		} else {
			//find the first nonempty bucket
			for(i=0; i < this.numberOfBuckets; i++){
				if(this.buckets[i].cursize != 0){
					coreset = this.buckets[i].points;
					break;
				}
			}		
			//as long as there is a nonempty bucket compute a coreset
			int j;
			for(j=i+1; j < this.numberOfBuckets; j++){
				if(this.buckets[j].cursize != 0){
					//output the coreset into the spillover of bucket j
					this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize,
						this.maxBucketsize,d, 
						this.buckets[j].points,coreset,
						this.buckets[j].spillover, this.clustererRandom); 
					coreset = this.buckets[j].spillover;			
				}
			}
		}
		return coreset;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy