com.clearspring.analytics.stream.ConcurrentStreamSummary Maven / Gradle / Ivy
/*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
/**
* Based on the Space-Saving algorithm and the Stream-Summary
* data structure as described in:
* Efficient Computation of Frequent and Top-k Elements in Data Streams
* by Metwally, Agrawal, and Abbadi
*
* Ideally used in multithreaded applications, otherwise see {@link StreamSummary}
*
* @param type of data in the stream to be summarized
* @author Eric Vlaanderen
*/
public class ConcurrentStreamSummary implements ITopK {
private final int capacity;
private final ConcurrentHashMap> itemMap;
private final AtomicReference> minVal;
private final AtomicLong size;
private final AtomicBoolean reachCapacity;
public ConcurrentStreamSummary(final int capacity) {
this.capacity = capacity;
this.minVal = new AtomicReference>();
this.size = new AtomicLong(0);
this.itemMap = new ConcurrentHashMap>(capacity);
this.reachCapacity = new AtomicBoolean(false);
}
@Override
public boolean offer(final T element) {
return offer(element, 1);
}
@Override
public boolean offer(final T element, final int incrementCount) {
long val = incrementCount;
ScoredItem value = new ScoredItem(element, incrementCount);
ScoredItem oldVal = itemMap.putIfAbsent(element, value);
if (oldVal != null) {
val = oldVal.addAndGetCount(incrementCount);
} else if (reachCapacity.get() || size.incrementAndGet() > capacity) {
reachCapacity.set(true);
ScoredItem oldMinVal = minVal.getAndSet(value);
itemMap.remove(oldMinVal.getItem());
while (oldMinVal.isNewItem()) {
// Wait for the oldMinVal so its error and value are completely up to date.
// no thread.sleep here due to the overhead of calling it - the waiting time will be microseconds.
}
long count = oldMinVal.getCount();
value.addAndGetCount(count);
value.setError(count);
}
value.setNewItem(false);
minVal.set(getMinValue());
return val != incrementCount;
}
private ScoredItem getMinValue() {
ScoredItem minVal = null;
for (ScoredItem entry : itemMap.values()) {
if (minVal == null || (!entry.isNewItem() && entry.getCount() < minVal.getCount())) {
minVal = entry;
}
}
return minVal;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[");
for (ScoredItem entry : itemMap.values()) {
sb.append("(" + entry.getCount() + ": " + entry.getItem() + ", e: " + entry.getError() + "),");
}
sb.deleteCharAt(sb.length() - 1);
sb.append("]");
return sb.toString();
}
@Override
public List peek(final int k) {
List toReturn = new ArrayList(k);
List> values = peekWithScores(k);
for (ScoredItem value : values) {
toReturn.add(value.getItem());
}
return toReturn;
}
public List> peekWithScores(final int k) {
List> values = new ArrayList>();
for (Map.Entry> entry : itemMap.entrySet()) {
ScoredItem value = entry.getValue();
values.add(new ScoredItem(value.getItem(), value.getCount(), value.getError()));
}
Collections.sort(values);
values = values.size() > k ? values.subList(0, k) : values;
return values;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy