All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.datakernel.datastream.processor.StreamSorter Maven / Gradle / Ivy

/*
 * Copyright (C) 2015 SoftIndex LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.datakernel.datastream.processor;

import io.datakernel.async.process.AsyncCollector;
import io.datakernel.datastream.*;
import io.datakernel.promise.Promise;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;

/**
 * Represent {@link StreamTransformer} which receives data and saves it in collection, when it
 * receive end of stream it sorts it and streams to destination.
 *
 * @param  type of keys
 * @param  type of objects
 */
public final class StreamSorter implements StreamTransformer {
	private final AsyncCollector> temporaryStreamsCollector;
	private final StreamSorterStorage storage;
	private final Function keyFunction;
	private final Comparator keyComparator;
	private final Comparator itemComparator;
	private final boolean distinct;
	private final int itemsInMemory;

	private Input input;
	private StreamSupplier output;
	private StreamConsumer outputConsumer;

	// region creators
	private StreamSorter(StreamSorterStorage storage,
			Function keyFunction, Comparator keyComparator, boolean distinct,
			int itemsInMemory) {
		this.storage = storage;
		this.keyFunction = keyFunction;
		this.keyComparator = keyComparator;
		this.itemComparator = (item1, item2) -> {
			K key1 = keyFunction.apply(item1);
			K key2 = keyFunction.apply(item2);
			return keyComparator.compare(key1, key2);
		};
		this.distinct = distinct;
		this.itemsInMemory = itemsInMemory;

		this.input = new Input();

		this.output = new ForwardingStreamSupplier(StreamSupplier.ofPromise(
				(this.temporaryStreamsCollector = AsyncCollector.create(new ArrayList<>()))
						.run(input.getEndOfStream())
						.get()
						.map(streamIds -> {
							input.list.sort(itemComparator);
							Iterator iterator = !distinct ?
									input.list.iterator() :
									new DistinctIterator<>(input.list, keyFunction, keyComparator);
							StreamSupplier listSupplier = StreamSupplier.ofIterator(iterator);
							if (streamIds.isEmpty()) {
								return listSupplier;
							} else {
								StreamMerger streamMerger = StreamMerger.create(keyFunction, keyComparator, distinct);
								listSupplier.streamTo(streamMerger.newInput());
								streamIds.forEach(streamId ->
										StreamSupplier.ofPromise(storage.read(streamId))
												.streamTo(streamMerger.newInput()));
								return streamMerger
										.getOutput()
										.withLateBinding();
							}
						})
		)) {
			@Override
			public void setConsumer(StreamConsumer consumer) {
				super.setConsumer(consumer);
				outputConsumer = consumer;
			}
		};
	}

	private static final class DistinctIterator implements Iterator {
		private final ArrayList sortedList;
		private final Function keyFunction;
		private final Comparator keyComparator;
		int i = 0;

		private DistinctIterator(ArrayList sortedList, Function keyFunction, Comparator keyComparator) {
			this.sortedList = sortedList;
			this.keyFunction = keyFunction;
			this.keyComparator = keyComparator;
		}

		@Override
		public boolean hasNext() {
			return i < sortedList.size();
		}

		@Override
		public T next() {
			T next = sortedList.get(i++);
			K nextKey = keyFunction.apply(next);
			while (i < sortedList.size()) {
				if (keyComparator.compare(nextKey, keyFunction.apply(sortedList.get(i))) == 0) {
					i++;
					continue;
				}
				break;
			}
			return next;
		}
	}

	/**
	 * Creates a new instance of StreamSorter
	 *
	 * @param storage           storage for storing elements which was no placed to RAM
	 * @param keyFunction       function for searching key
	 * @param keyComparator     comparator for comparing key
	 * @param distinct          if it is true it means that in result will be not objects with same key
	 * @param itemsInMemorySize size of elements which can be saved in RAM before sorting
	 */
	public static  StreamSorter create(StreamSorterStorage storage,
			Function keyFunction, Comparator keyComparator, boolean distinct,
			int itemsInMemorySize) {
		return new StreamSorter<>(storage, keyFunction, keyComparator, distinct, itemsInMemorySize);
	}
	// endregion

	private final class Input extends AbstractStreamConsumer implements StreamDataAcceptor {
		private ArrayList list = new ArrayList<>();

		@Override
		protected void onStarted() {
			getSupplier().resume(this);
		}

		@Override
		public void accept(T item) {
			list.add(item);
			if (list.size() >= itemsInMemory) {
				list.sort(itemComparator);
				Iterator iterator = !distinct ?
						input.list.iterator() :
						new DistinctIterator<>(input.list, keyFunction, keyComparator);
				writeToTemporaryStorage(iterator)
						.whenResult($ -> suspendOrResume());
				suspendOrResume();
				list = new ArrayList<>(itemsInMemory);
			}
		}

		private Promise writeToTemporaryStorage(Iterator sortedList) {
			return temporaryStreamsCollector.addPromise(
					storage.newPartitionId()
							.then(partitionId -> storage.write(partitionId)
									.then(consumer -> StreamSupplier.ofIterator(sortedList).streamTo(consumer)
											.map($ -> partitionId))),
					List::add);
		}

		private void suspendOrResume() {
			if (temporaryStreamsCollector.getActivePromises() > 2) {
				getSupplier().suspend();
			} else {
				getSupplier().resume(this);
			}
		}

		@Override
		protected Promise onEndOfStream() {
			return outputConsumer.getAcknowledgement();
		}

		@Override
		protected void onError(Throwable e) {
			// do nothing
		}
	}

	@Override
	public StreamConsumer getInput() {
		return input;
	}

	@Override
	public StreamSupplier getOutput() {
		return output;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy