moa.clusterers.streamkm.BucketManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA
provides a framework for data stream mining and includes tools for evaluation
and a collection of machine learning algorithms. Related to the WEKA project,
also written in Java, while scaling to more demanding problems.
package moa.clusterers.streamkm;
/**
*
* @author Marcel R. Ackermann, Christiane Lammersen, Marcus Maertens, Christoph Raupach,
Christian Sohler, Kamil Swierkot
*/
public class BucketManager {
protected class Bucket {
int cursize;
Point[] points;
Point[] spillover;
public Bucket(int d, int maxsize){
this.cursize = 0;
this.points = new Point[maxsize];
this.spillover = new Point[maxsize];
for(int i=0; i= this.maxBucketsize) {
//printf("Bucket 0 full \n");
//start spillover process
int curbucket = 0;
int nextbucket = 1;
//check if the next bucket is empty
if(this.buckets[nextbucket].cursize == 0){
//copy the bucket
int i;
for(i=0; i n is a power of 2 and we return the contents of the last bucket
Case2: the last bucket is not full
=> we compute a coreset of all nonempty buckets
this operation should only be called after the streaming process is finished
**/
Point[] getCoresetFromManager(int d){
Point[] coreset = new Point[d];
int i = 0;
if(this.buckets[this.numberOfBuckets-1].cursize == this.maxBucketsize){
coreset = this.buckets[this.numberOfBuckets-1].points;
} else {
//find the first nonempty bucket
for(i=0; i < this.numberOfBuckets; i++){
if(this.buckets[i].cursize != 0){
coreset = this.buckets[i].points;
break;
}
}
//as long as there is a nonempty bucket compute a coreset
int j;
for(j=i+1; j < this.numberOfBuckets; j++){
if(this.buckets[j].cursize != 0){
//output the coreset into the spillover of bucket j
this.treeCoreset.unionTreeCoreset(this.maxBucketsize,this.maxBucketsize,
this.maxBucketsize,d,
this.buckets[j].points,coreset,
this.buckets[j].spillover, this.clustererRandom);
coreset = this.buckets[j].spillover;
}
}
}
return coreset;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy