io.datakernel.datastream.processor.StreamSorter Maven / Gradle / Ivy
/*
* Copyright (C) 2015 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.datastream.processor;
import io.datakernel.async.process.AsyncCollector;
import io.datakernel.datastream.*;
import io.datakernel.promise.Promise;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
/**
* Represent {@link StreamTransformer} which receives data and saves it in collection, when it
* receive end of stream it sorts it and streams to destination.
*
* @param type of keys
* @param type of objects
*/
public final class StreamSorter implements StreamTransformer {
private final AsyncCollector extends List> temporaryStreamsCollector;
private final StreamSorterStorage storage;
private final Function keyFunction;
private final Comparator keyComparator;
private final Comparator itemComparator;
private final boolean distinct;
private final int itemsInMemory;
private Input input;
private StreamSupplier output;
private StreamConsumer outputConsumer;
// region creators
private StreamSorter(StreamSorterStorage storage,
Function keyFunction, Comparator keyComparator, boolean distinct,
int itemsInMemory) {
this.storage = storage;
this.keyFunction = keyFunction;
this.keyComparator = keyComparator;
this.itemComparator = (item1, item2) -> {
K key1 = keyFunction.apply(item1);
K key2 = keyFunction.apply(item2);
return keyComparator.compare(key1, key2);
};
this.distinct = distinct;
this.itemsInMemory = itemsInMemory;
this.input = new Input();
this.output = new ForwardingStreamSupplier(StreamSupplier.ofPromise(
(this.temporaryStreamsCollector = AsyncCollector.create(new ArrayList<>()))
.run(input.getEndOfStream())
.get()
.map(streamIds -> {
input.list.sort(itemComparator);
Iterator iterator = !distinct ?
input.list.iterator() :
new DistinctIterator<>(input.list, keyFunction, keyComparator);
StreamSupplier listSupplier = StreamSupplier.ofIterator(iterator);
if (streamIds.isEmpty()) {
return listSupplier;
} else {
StreamMerger streamMerger = StreamMerger.create(keyFunction, keyComparator, distinct);
listSupplier.streamTo(streamMerger.newInput());
streamIds.forEach(streamId ->
StreamSupplier.ofPromise(storage.read(streamId))
.streamTo(streamMerger.newInput()));
return streamMerger
.getOutput()
.withLateBinding();
}
})
)) {
@Override
public void setConsumer(StreamConsumer consumer) {
super.setConsumer(consumer);
outputConsumer = consumer;
}
};
}
private static final class DistinctIterator implements Iterator {
private final ArrayList sortedList;
private final Function keyFunction;
private final Comparator keyComparator;
int i = 0;
private DistinctIterator(ArrayList sortedList, Function keyFunction, Comparator keyComparator) {
this.sortedList = sortedList;
this.keyFunction = keyFunction;
this.keyComparator = keyComparator;
}
@Override
public boolean hasNext() {
return i < sortedList.size();
}
@Override
public T next() {
T next = sortedList.get(i++);
K nextKey = keyFunction.apply(next);
while (i < sortedList.size()) {
if (keyComparator.compare(nextKey, keyFunction.apply(sortedList.get(i))) == 0) {
i++;
continue;
}
break;
}
return next;
}
}
/**
* Creates a new instance of StreamSorter
*
* @param storage storage for storing elements which was no placed to RAM
* @param keyFunction function for searching key
* @param keyComparator comparator for comparing key
* @param distinct if it is true it means that in result will be not objects with same key
* @param itemsInMemorySize size of elements which can be saved in RAM before sorting
*/
public static StreamSorter create(StreamSorterStorage storage,
Function keyFunction, Comparator keyComparator, boolean distinct,
int itemsInMemorySize) {
return new StreamSorter<>(storage, keyFunction, keyComparator, distinct, itemsInMemorySize);
}
// endregion
private final class Input extends AbstractStreamConsumer implements StreamDataAcceptor {
private ArrayList list = new ArrayList<>();
@Override
protected void onStarted() {
getSupplier().resume(this);
}
@Override
public void accept(T item) {
list.add(item);
if (list.size() >= itemsInMemory) {
list.sort(itemComparator);
Iterator iterator = !distinct ?
input.list.iterator() :
new DistinctIterator<>(input.list, keyFunction, keyComparator);
writeToTemporaryStorage(iterator)
.whenResult($ -> suspendOrResume());
suspendOrResume();
list = new ArrayList<>(itemsInMemory);
}
}
private Promise writeToTemporaryStorage(Iterator sortedList) {
return temporaryStreamsCollector.addPromise(
storage.newPartitionId()
.then(partitionId -> storage.write(partitionId)
.then(consumer -> StreamSupplier.ofIterator(sortedList).streamTo(consumer)
.map($ -> partitionId))),
List::add);
}
private void suspendOrResume() {
if (temporaryStreamsCollector.getActivePromises() > 2) {
getSupplier().suspend();
} else {
getSupplier().resume(this);
}
}
@Override
protected Promise onEndOfStream() {
return outputConsumer.getAcknowledgement();
}
@Override
protected void onError(Throwable e) {
// do nothing
}
}
@Override
public StreamConsumer getInput() {
return input;
}
@Override
public StreamSupplier getOutput() {
return output;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy