All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stream.counter.CountSketch Maven / Gradle / Ivy

package stream.counter;

import java.util.Set;

import stream.counter.hashing.SimpleHashFactory;
import stream.learner.Learner;

/**
 * 

* {@link Learner}-part of the implementation of the CountSketch algorithm from * the paper 'Finding frequent items in data streams' written by 'Charikar, M., * Chen, K., and Farach-colton, M. (2002)'. *

* * @author Marcin Skirzynski (main work), Benedikt Kulmann (modifications) * @see CountSketchModel * * @param */ public class CountSketch implements Counter { private static final long serialVersionUID = 1L; protected CountSketchModel model; /** *

* Constructor of the CountSketch algorithm. This construction can take * quite a long time since the construction of the hashfunctions is rather * time-consuming. *

* * @param domain * The (estim.) domain, i.e. how many different items are * expected * @param numberOfHashFunctions * The number of hashfunctions which determine a bucket * @param numberOfBuckets * The number of buckets where a counter will be maintained * @param k * parameter for the top-k variant. If you want to disable the * top-k overhead, than set k to 0 or lower */ public CountSketch(int domain, int numberOfHashFunctions, int numberOfBuckets, int k) { model = new CountSketchModel(domain, numberOfHashFunctions, numberOfBuckets, k, new SimpleHashFactory()); } /** * Same as {@link #CountSketch(int, int, int, int)} but with disabled top-k. */ public CountSketch(int domain, int numberOfHashFunctions, int numberOfBuckets) { this(domain, numberOfHashFunctions, numberOfBuckets, 0); } protected CountSketch() { // a class extending this class might want to use another model type... } public void init() { } /** *

* Counts the item by passing it to the internal data strucutre. *

* *

* If a k greater than zero was set, the top-k map will be maintained also. *

* * @param item * The item to count */ @Override public void count(T item) { boolean kGreaterZero = model.updateData(item); if (!kGreaterZero) { return; } if (model.isTopItem(item)) { model.incrementCount(item); } else if (model.notYetKItems()) { model.insertTopItem(item, 1L); } else { /** * Remove the item with the lowest frequency if the new item has a * higher frequency. */ CountEntry lowFreqItem = model.getItemWithLowestCount(); long estimatedFreq = model.estimateFrequency(item); if (lowFreqItem.frequency < estimatedFreq) { model.removeTopItem(lowFreqItem.item); model.insertTopItem(item, estimatedFreq); } } System.out.println("number of elements: " + model.topItems.size()); } /** * @see stream.counter.Counter#getTotalCount() */ @Override public Long getTotalCount() { return model.getTotalCount(); } /** * @see stream.counter.Counter#keySet() */ @Override public Set keySet() { return model.keySet(); } /** * @see stream.counter.Counter#getCount(java.lang.Object) */ @Override public Long getCount(T element) { return model.getCount(element); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy