All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stream.counter.StickySamplingModel Maven / Gradle / Ivy

package stream.counter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import stream.learner.Model;

/**
 * {@link Model}-part of the implementation of the "Sticky Sampling" algorithm
 * as described in the paper "Approximate Frequency Counts over Data Streams"
 * written by Gurmeet Singh Manku and Rajeev Motwani
 * 
 * @author Benedikt Kulmann
 * @see StickySampling
 */
public final class StickySamplingModel implements StaticFrequentItemModel {

	private static final long serialVersionUID = 1L;

	/**
	 * 

* Threshold whether an element is frequent or not. *

*/ private final double support; /** *

* Epsilon around support. *

* *

* Rule of thumb: 10% of support *

*/ private final double error; /** *

* The data structure which holds all counting information. *

*/ private final Map> dataStructure; /** * The total count of all counted elements in the stream so far. */ private long elementsCounted; /** * Creates a new instance of the StickySamplingModel, used by * {@link StickySampling}

* * @param support * Threshold whether an element of the data structure is * frequent. Has to be out of (0,1). * @param error * An epsilon for the threshold. Has to be out of (0,1). */ public StickySamplingModel(double support, double error) { if (support <= 0 || support >= 1) { throw new IllegalArgumentException("Support has to be > 0 and < 1."); } if (error <= 0 || error >= 1) { throw new IllegalArgumentException("Error has to be > 0 and < 1."); } this.support = support; this.error = error; this.elementsCounted = 0; this.dataStructure = new ConcurrentHashMap>(); } /** *

* Removes the {@link CountEntry} associated with the provided item from the * internal data structure. *

* * @param itemToRemove * The item whose {@link CountEntry} shall be removed */ void removeItem(T itemToRemove) { dataStructure.remove(itemToRemove); } /** *

* Returns whether the internal data structure contains a counter for the * provided item. *

* * @param item * The item in question * @return True if the internal data structure contains a counter for the * provided item, false otherwise. */ boolean containsItem(T item) { return dataStructure.containsKey(item); } /** *

* Increment the count frequency of the provided item by 1. *

* * @param item * The item whose frequency shall be incremented by 1. */ void incrementCount(T item) { dataStructure.get(item).frequency++; elementsCounted++; } /** *

* Decrements the count frequency of the provided item by 1. Used within the * {@link StickySampling#adaptNewSamplingRate()} method of the algorithm. *

* * @param item * The item whose count frequency shall be decremented by 1. */ void decrementCount(T item) { dataStructure.get(item).frequency--; } /** *

* Returns whether the count frequency of the provided item corresponds to 0 * (i.e. frequency == 0 or item doesn't exist within the internal data * structure). *

* * @param item * The item in question. * @return true if the count frequency of the provided item corresponds to 0 */ boolean frequencyIsZero(T item) { return !dataStructure.containsKey(item) || dataStructure.get(item).frequency == 0; } /** * Inserts the provided item into the internal data structure with an * initial count of 1. * * @param item * The item which shall be inserted into the internal data * structure */ void insertEntry(T item) { dataStructure.put(item, new CountEntry(item, 1)); elementsCounted++; } /** * {@inheritDoc} */ @Override public Long getTotalCount() { return elementsCounted; } /** * {@inheritDoc} */ @Override public Collection getFrequentItems() { Collection frequentItems = new ArrayList(); for (CountEntry entry : dataStructure.values()) { if (isFrequent(entry.frequency)) { frequentItems.add(entry.item); } } return frequentItems; } /** *

* Returns whether the provided frequency is a frequent one in terms of * sticky sampling. *

* * @param frequency * The frequency which shall be tested * @return True if the frequency would classify an item as frequent in terms * of sticky sampling, false otherwise */ public boolean isFrequent(long frequency) { return frequency >= (support - error) * elementsCounted; } /** * {@inheritDoc} */ @Override public Set keySet() { return dataStructure.keySet(); } /** * {@inheritDoc} */ @Override public String toString() { StringBuilder sb = new StringBuilder("StickySamplingModel["); for (T key : keySet()) { sb.append(dataStructure.get(key)).append(";"); } sb.append("]"); return sb.toString(); } /** * @see stream.counter.Counter#count(java.lang.Object) */ @Override public void count(T element) { this.incrementCount(element); } /** * @see stream.counter.Counter#getCount(java.lang.Object) */ @Override public Long getCount(T element) { CountEntry entry = dataStructure.get(element); if (entry == null) return 0L; return entry.frequency; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy