org.apache.flink.runtime.operators.sort.MergeIterator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.operators.sort;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.util.MutableObjectIterator;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
/**
* An iterator that returns a sorted merge of the sequences of elements from a
* set of iterators, assuming those sequences are ordered themselves.
* The iterators to be merged are kept internally as a heap, making each access
* to the next smallest element logarithmic in complexity, with respect to the
* number of streams to be merged.
* The order among the elements is established using the methods from the
* {@link TypeComparator} class, specifically {@link TypeComparator#setReference(Object)}
* and {@link TypeComparator#compareToReference(TypeComparator)}.
*/
public class MergeIterator implements MutableObjectIterator {
private final PartialOrderPriorityQueue> heap; // heap over the head elements of the stream
/**
* @param iterators
* @param comparator
* @throws IOException
*/
public MergeIterator(List> iterators, TypeComparator comparator) throws IOException {
this.heap = new PartialOrderPriorityQueue>(new HeadStreamComparator(), iterators.size());
for (MutableObjectIterator iterator : iterators) {
this.heap.add(new HeadStream(iterator, comparator.duplicate()));
}
}
/**
* Gets the next smallest element, with respect to the definition of order implied by
* the {@link TypeSerializer} provided to this iterator.
*
* @param reuse Object that may be reused.
* @return The next element if the iterator has another element, null otherwise.
*
* @see org.apache.flink.util.MutableObjectIterator#next(java.lang.Object)
*/
@Override
public E next(E reuse) throws IOException {
/* There are three ways to handle object reuse:
* 1) reuse and return the given object
* 2) ignore the given object and return a new object
* 3) exchange the given object for an existing object
*
* The first option is not available here as the return value has
* already been deserialized from the heap's top iterator. The second
* option avoids object reuse. The third option is implemented below
* by passing the given object to the heap's top iterator into which
* the next value will be deserialized.
*/
if (this.heap.size() > 0) {
// get the smallest element
final HeadStream top = this.heap.peek();
E result = top.getHead();
// read an element
if (!top.nextHead(reuse)) {
this.heap.poll();
} else {
this.heap.adjustTop();
}
return result;
}
else {
return null;
}
}
/**
* Gets the next smallest element, with respect to the definition of order implied by
* the {@link TypeSerializer} provided to this iterator.
*
* @return The next element if the iterator has another element, null otherwise.
*
* @see org.apache.flink.util.MutableObjectIterator#next()
*/
@Override
public E next() throws IOException {
if (this.heap.size() > 0) {
// get the smallest element
final HeadStream top = this.heap.peek();
E result = top.getHead();
// read an element
if (!top.nextHead()) {
this.heap.poll();
} else {
this.heap.adjustTop();
}
return result;
}
else {
return null;
}
}
// ============================================================================================
// Internal Classes that wrap the sorted input streams
// ============================================================================================
private static final class HeadStream {
private final MutableObjectIterator iterator;
private final TypeComparator comparator;
private E head;
public HeadStream(MutableObjectIterator iterator, TypeComparator comparator) throws IOException {
this.iterator = iterator;
this.comparator = comparator;
if (!nextHead()) {
throw new IllegalStateException();
}
}
public E getHead() {
return this.head;
}
public boolean nextHead(E reuse) throws IOException {
if ((this.head = this.iterator.next(reuse)) != null) {
this.comparator.setReference(this.head);
return true;
}
else {
return false;
}
}
public boolean nextHead() throws IOException {
if ((this.head = this.iterator.next()) != null) {
this.comparator.setReference(this.head);
return true;
}
else {
return false;
}
}
}
// --------------------------------------------------------------------------------------------
private static final class HeadStreamComparator implements Comparator> {
@Override
public int compare(HeadStream o1, HeadStream o2) {
return o2.comparator.compareToReference(o1.comparator);
}
}
}