
org.mydotey.quantile.kll.KllQuantileEstimator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quantile-estimator Show documentation
Show all versions of quantile-estimator Show documentation
quantile algorithms' implementations for stream data
The newest version!
package org.mydotey.quantile.kll;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.mydotey.quantile.QuantileEstimator;
/**
* @author koqizhao
*
* Mar 30, 2018
*
* see also: https://github.com/edoliberty/streaming-quantiles
*
*/
public class KllQuantileEstimator implements QuantileEstimator {
private KllQuantileEstimatorConfig _config;
private List _compactors;
private int _h;
private int _size;
private int _maxSize;
public KllQuantileEstimator(KllQuantileEstimatorConfig config) {
Objects.requireNonNull(config, "config is null");
_config = config;
_compactors = new ArrayList<>();
grow();
}
@Override
public void add(T value) {
_compactors.get(0).add(value);
_size++;
if (_size >= _maxSize)
compress();
}
@Override
public Map get(List quantiles) {
List itemsAndQuantiles = cdf();
if (itemsAndQuantiles.isEmpty())
return null;
HashMap results = new HashMap<>();
int i = 0, j = 0;
while (i < quantiles.size() && j < itemsAndQuantiles.size()) {
Double quantile = quantiles.get(i);
ItemAndQuantile itemAndQuantile = itemsAndQuantiles.get(j);
if (itemAndQuantile.quantile < quantile) {
j++;
continue;
}
results.put(quantile, itemAndQuantile.item);
i++;
}
if (i < quantiles.size()) {
T result = itemsAndQuantiles.get(j - 1).item;
for (; i < quantiles.size(); i++) {
Double quantile = quantiles.get(i);
results.put(quantile, result);
}
}
return results;
}
protected void grow() {
_compactors.add(new Compactor());
_h = _compactors.size();
int maxSize = 0;
for (int i = 0; i < _h; i++) {
maxSize += capacity(i);
}
_maxSize = maxSize;
}
protected int capacity(int height) {
int depth = _h - 1 - height;
return (int) Math.ceil(Math.pow(_config.getC(), depth) * _config.getK()) + 1;
}
protected void compress() {
for (int i = 0; i < _compactors.size(); i++) {
if (_compactors.get(i).size() >= capacity(i)) {
if (i + 1 >= _h)
grow();
_compactors.get(i + 1).addAll(_compactors.get(i).compact());
int size = 0;
for (int j = 0; j < _compactors.size(); j++)
size += _compactors.get(j).size();
_size = size;
break;
}
}
}
protected List cdf() {
List itemsAndWeights = new ArrayList<>();
for (int i = 0; i < _compactors.size(); i++) {
for (int j = 0; j < _compactors.get(i).size(); j++) {
itemsAndWeights.add(new ItemAndWeight(_compactors.get(i).get(j), (int) Math.pow(2, i)));
}
}
int totWeight = 0;
for (ItemAndWeight itemAndWeight : itemsAndWeights)
totWeight += itemAndWeight.weight;
Collections.sort(itemsAndWeights);
List cdf = new ArrayList<>();
int cumWeight = 0;
for (ItemAndWeight itemAndWeight : itemsAndWeights) {
cumWeight += itemAndWeight.weight;
cdf.add(new ItemAndQuantile(itemAndWeight.item, (double) cumWeight / totWeight));
}
return cdf;
}
protected void merge(KllQuantileEstimator other) {
while (_h < other._h)
grow();
for (int i = 0; i < other._h; i++)
_compactors.get(i).addAll(other._compactors.get(i));
int size = 0;
for (int i = 0; i < _compactors.size(); i++)
size += _compactors.get(i).size();
_size = size;
while (_size >= _maxSize)
compress();
}
protected int rank(T value) {
int r = 0;
for (int i = 0; i < _compactors.size(); i++) {
for (int j = 0; j < _compactors.get(i).size(); j++) {
if (_config.getComparator().compare(_compactors.get(i).get(j), value) <= 0)
r += Math.pow(2, i);
}
}
return r;
}
protected List ranks() {
List itemsAndWeights = new ArrayList<>();
for (int i = 0; i < _compactors.size(); i++) {
for (int j = 0; j < _compactors.get(i).size(); j++) {
itemsAndWeights.add(new ItemAndWeight(_compactors.get(i).get(j), (int) Math.pow(2, i)));
}
}
Collections.sort(itemsAndWeights);
List ranksList = new ArrayList<>();
int cumWeight = 0;
for (ItemAndWeight itemAndWeight : itemsAndWeights) {
cumWeight += itemAndWeight.weight;
ranksList.add(new ItemAndWeight(itemAndWeight.item, cumWeight));
}
return ranksList;
}
protected class Compactor extends ArrayList {
private static final long serialVersionUID = 1L;
public List compact() {
sort(KllQuantileEstimator.this._config.getComparator());
List result = new ArrayList<>();
if (Math.random() < 0.5) {
while (size() >= 2) {
int index = size() - 1;
remove(index);
result.add(remove(index - 1));
}
} else {
while (size() >= 2) {
int index = size() - 1;
result.add(remove(index));
remove(index - 1);
}
}
return result;
}
}
protected class ItemAndWeight implements Comparable {
public T item;
public int weight;
public ItemAndWeight(T item, int weight) {
this.item = item;
this.weight = weight;
}
@Override
public int compareTo(ItemAndWeight o) {
if (o == null)
return 1;
int r = KllQuantileEstimator.this._config.getComparator().compare(item, o.item);
if (r != 0)
return r;
return weight > o.weight ? 1 : (weight == o.weight ? 0 : -1);
}
}
protected class ItemAndQuantile {
public T item;
public double quantile;
public ItemAndQuantile(T item, double quantile) {
this.item = item;
this.quantile = quantile;
}
@Override
public String toString() {
return String.format("{ item: %s, quantile: %s }", item, quantile);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy