nl.vpro.util.BatchedReceiverSpliterator Maven / Gradle / Ivy
package nl.vpro.util;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import java.util.Iterator;
import java.util.Spliterator;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Supplier;
import com.google.common.annotations.Beta;
/**
* Given some API which supplies only 'batched' retrieval (so with offset and max/batchsize parameters),
* access such an API as an iterator to visit all elements.
*
* If an API provides access to huge set of elements, they often do it with some paging mechanism, or by some 'resumption token' formalism. With {@link BatchedReceiverSpliterator} this can be morphed into a simple {@link Spliterator}.
*
* Paging
* The 'batchGetter' argument should be a {@link BiFunction}, returning an iterator for the page described by given offset and batch size
*
* {@code
* Spliterator i = BatchedReceiverSpliterator.builder()
* .batchGetter((offset, max) ->
* apiClient.getPage(offset, max).iterator()
* )
* .batchSize(6)
* .build();
* i.forEachRemaining(string -> {
* ...
* });
* }
* Resumption token formalism
* You simply provide a {@link Supplier}. A lambda would probably not suffice because you might need the previous result the get the next one. E.g. this (using olingo code)
*
* {@code
* public Iterator iterate(URIBuilder ub) {
* return BatchedReceiver.builder()
* .batchGetter(new Supplier>() {
* ClientEntitySet result;
* @Override
* public Iterator get() {
* if (result != null) {
* result = query(result.getNext());
* } else {
* result = query(ub);
* }
* return result.getEntities().iterator();
* }
* })
* .build();
* }
* }
*
*
* @author Michiel Meeuwissen
* @since 2.12
*
* TODO: Unfinished the idea is to port this for {@link BatchedReceiver}.
*/
@ToString
@Slf4j
@Beta
public class BatchedReceiverSpliterator implements Spliterator {
final Integer batchSize;
final BiFunction> batchGetter;
long subCount = 0;
long offset;
Iterator subIterator;
Boolean hasNext;
T next;
@lombok.Builder(builderClassName = "Builder", buildMethodName = "_build")
public BatchedReceiverSpliterator(
Integer batchSize,
Long offset,
BiFunction> _batchGetter) {
this.batchSize = batchSize;
this.batchGetter = _batchGetter;
this.offset = offset == null ? 0L : offset;
}
@Override
public boolean tryAdvance(Consumer super T> action) {
findNext();
if (hasNext) {
action.accept(next);
return true;
} else {
return false;
}
}
@Override
public Spliterator trySplit() {
return null;
}
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public int characteristics() {
return Spliterator.IMMUTABLE;
}
private enum BatchType {
BIFUNCTION,
SUPPLIER
}
public static class Builder {
private BatchType batchType = null;
/**
*
* @param batchGetter A function to get the next batch, the parameters are the current necessary offset, and batch size
*/
public Builder batchGetter(BiFunction> batchGetter) {
batchType = BatchType.BIFUNCTION;
return _batchGetter(batchGetter);
}
/**
* @param batchGetter For 'resumption token' like functionality, the offset and max argument can be irrelevant.
*/
public Builder batchGetter(final Supplier> batchGetter) {
batchType = BatchType.SUPPLIER;
return _batchGetter((offset, max) -> batchGetter.get());
}
public BatchedReceiverSpliterator build() {
if (_batchGetter == null) {
throw new IllegalStateException("No batch getter defined");
}
if (batchType == BatchType.BIFUNCTION && batchSize == null) {
log.debug("Specified a bifunction, and nobatch size. The batch size is implicetely set to 100");
batchSize(100);
}
if (batchType == BatchType.SUPPLIER && batchSize != null) {
log.warn("Specified a supplier, and a batch size. The batch size is ignored");
batchSize(null);
}
return _build();
}
}
protected void findNext() {
if (hasNext == null) {
if (subIterator == null) {
subIterator = batchGetter.apply(offset, batchSize);
subCount = 0;
if (subIterator == null) {
hasNext = false;
return;
}
}
if (subIterator.hasNext()) {
next = subIterator.next();
subCount++;
hasNext = true;
} else {
offset += subCount;
if (batchSize == null || subCount == batchSize) {
subIterator = batchGetter.apply(offset, batchSize);
} else {
subIterator = null;
}
subCount = 0;
if (subIterator == null) {
hasNext = false;
return;
}
hasNext = subIterator.hasNext();
if (hasNext) {
next = subIterator.next();
subCount++;
}
}
}
}
}