Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.operators.sort;
import java.io.File;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel;
import org.apache.flink.runtime.io.disk.iomanager.BlockChannelReader;
import org.apache.flink.runtime.io.disk.iomanager.BlockChannelWriter;
import org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView;
import org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.memory.MemoryAllocationException;
import org.apache.flink.runtime.memory.MemoryManager;
import org.apache.flink.runtime.util.EmptyMutableObjectIterator;
import org.apache.flink.util.MutableObjectIterator;
/**
* The {@link UnilateralSortMerger} is a full fledged sorter. It implements a multi-way merge sort. Internally,
* the logic is factored into three threads (read, sort, spill) which communicate through a set of blocking queues,
* forming a closed loop. Memory is allocated using the {@link MemoryManager} interface. Thus the component will
* not exceed the provided memory limits.
*/
public class UnilateralSortMerger implements Sorter {
// ------------------------------------------------------------------------
// Constants
// ------------------------------------------------------------------------
/** Logging. */
private static final Logger LOG = LoggerFactory.getLogger(UnilateralSortMerger.class);
/** Fix length records with a length below this threshold will be in-place sorted, if possible. */
private static final int THRESHOLD_FOR_IN_PLACE_SORTING = 32;
/** The minimal number of buffers to use by the writers. */
protected static final int MIN_NUM_WRITE_BUFFERS = 2;
/** The maximal number of buffers to use by the writers. */
protected static final int MAX_NUM_WRITE_BUFFERS = 4;
/** The minimum number of segments that are required for the sort to operate. */
protected static final int MIN_NUM_SORT_MEM_SEGMENTS = 10;
// ------------------------------------------------------------------------
// Threads
// ------------------------------------------------------------------------
/** The thread that reads the input channels into buffers and passes them on to the merger. */
private final ThreadBase readThread;
/** The thread that merges the buffer handed from the reading thread. */
private final ThreadBase sortThread;
/** The thread that handles spilling to secondary storage. */
private final ThreadBase spillThread;
// ------------------------------------------------------------------------
// Memory
// ------------------------------------------------------------------------
/** The memory segments used first for sorting and later for reading/pre-fetching
* during the external merge. */
protected final List sortReadMemory;
/** The memory segments used to stage data to be written. */
protected final List writeMemory;
/** The memory manager through which memory is allocated and released. */
protected final MemoryManager memoryManager;
// ------------------------------------------------------------------------
// Miscellaneous Fields
// ------------------------------------------------------------------------
/**
* The handler for large records, that do not go though the in-memory sorter as a whole, but
* directly go to disk.
*/
private final LargeRecordHandler largeRecordHandler;
/**
* Collection of all currently open channels, to be closed and deleted during cleanup.
*/
private final HashSet openChannels;
/**
* Collection of all temporary files created and to be removed when closing the sorter.
*/
private final HashSet channelsToDeleteAtShutdown;
/**
* The monitor which guards the iterator field.
*/
protected final Object iteratorLock = new Object();
/**
* The iterator to be returned by the sort-merger. This variable is null, while receiving and merging is still in
* progress and it will be set once we have < merge factor sorted sub-streams that will then be streamed sorted.
*/
protected volatile MutableObjectIterator iterator;
/**
* The exception that is set, if the iterator cannot be created.
*/
protected volatile IOException iteratorException;
/**
* Flag indicating that the sorter was closed.
*/
protected volatile boolean closed;
/**
* Whether to reuse objects during deserialization.
*/
protected final boolean objectReuseEnabled;
// ------------------------------------------------------------------------
// Constructor & Shutdown
// ------------------------------------------------------------------------
public UnilateralSortMerger(MemoryManager memoryManager, IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int maxNumFileHandles, float startSpillingFraction,
boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, ioManager, input, parentTask, serializerFactory, comparator,
memoryFraction, -1, maxNumFileHandles, startSpillingFraction, handleLargeRecords, objectReuseEnabled);
}
public UnilateralSortMerger(MemoryManager memoryManager, IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, ioManager, input, parentTask, serializerFactory, comparator,
memoryFraction, numSortBuffers, maxNumFileHandles, startSpillingFraction, false, handleLargeRecords,
objectReuseEnabled);
}
public UnilateralSortMerger(MemoryManager memoryManager, List memory,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException
{
this(memoryManager, memory, ioManager, input, parentTask, serializerFactory, comparator,
numSortBuffers, maxNumFileHandles, startSpillingFraction, false, handleLargeRecords,
objectReuseEnabled);
}
protected UnilateralSortMerger(MemoryManager memoryManager,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean noSpillingMemory, boolean handleLargeRecords,
boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, memoryManager.allocatePages(parentTask, memoryManager.computeNumberOfPages(memoryFraction)),
ioManager, input, parentTask, serializerFactory, comparator,
numSortBuffers, maxNumFileHandles, startSpillingFraction, noSpillingMemory, handleLargeRecords,
objectReuseEnabled);
}
protected UnilateralSortMerger(MemoryManager memoryManager, List memory,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean noSpillingMemory, boolean handleLargeRecords,
boolean objectReuseEnabled)
throws IOException
{
// sanity checks
if (memoryManager == null | (ioManager == null && !noSpillingMemory) | serializerFactory == null | comparator == null) {
throw new NullPointerException();
}
if (parentTask == null) {
throw new NullPointerException("Parent Task must not be null.");
}
if (maxNumFileHandles < 2) {
throw new IllegalArgumentException("Merger cannot work with less than two file handles.");
}
this.memoryManager = memoryManager;
this.objectReuseEnabled = objectReuseEnabled;
// adjust the memory quotas to the page size
final int numPagesTotal = memory.size();
if (numPagesTotal < MIN_NUM_WRITE_BUFFERS + MIN_NUM_SORT_MEM_SEGMENTS) {
throw new IllegalArgumentException("Too little memory provided to sorter to perform task. " +
"Required are at least " + (MIN_NUM_WRITE_BUFFERS + MIN_NUM_SORT_MEM_SEGMENTS) +
" pages. Current page size is " + memoryManager.getPageSize() + " bytes.");
}
// determine how many buffers to use for writing
final int numWriteBuffers;
final int numLargeRecordBuffers;
if (noSpillingMemory && !handleLargeRecords) {
numWriteBuffers = 0;
numLargeRecordBuffers = 0;
}
else {
int numConsumers = (noSpillingMemory ? 0 : 1) + (handleLargeRecords ? 2 : 0);
// determine how many buffers we have when we do a full mere with maximal fan-in
final int minBuffersForMerging = maxNumFileHandles + numConsumers * MIN_NUM_WRITE_BUFFERS;
if (minBuffersForMerging > numPagesTotal) {
numWriteBuffers = noSpillingMemory ? 0 : MIN_NUM_WRITE_BUFFERS;
numLargeRecordBuffers = handleLargeRecords ? 2*MIN_NUM_WRITE_BUFFERS : 0;
maxNumFileHandles = numPagesTotal - numConsumers * MIN_NUM_WRITE_BUFFERS;
if (LOG.isDebugEnabled()) {
LOG.debug("Reducing maximal merge fan-in to " + maxNumFileHandles + " due to limited memory availability during merge");
}
}
else {
// we are free to choose. make sure that we do not eat up too much memory for writing
final int fractionalAuxBuffers = numPagesTotal / (numConsumers * 100);
if (fractionalAuxBuffers >= MAX_NUM_WRITE_BUFFERS) {
numWriteBuffers = noSpillingMemory ? 0 : MAX_NUM_WRITE_BUFFERS;
numLargeRecordBuffers = handleLargeRecords ? 2*MAX_NUM_WRITE_BUFFERS : 0;
}
else {
numWriteBuffers = noSpillingMemory ? 0 :
Math.max(MIN_NUM_WRITE_BUFFERS, fractionalAuxBuffers); // at least the lower bound
numLargeRecordBuffers = handleLargeRecords ?
Math.max(2*MIN_NUM_WRITE_BUFFERS, fractionalAuxBuffers) // at least the lower bound
: 0;
}
}
}
final int sortMemPages = numPagesTotal - numWriteBuffers - numLargeRecordBuffers;
final long sortMemory = ((long) sortMemPages) * memoryManager.getPageSize();
// decide how many sort buffers to use
if (numSortBuffers < 1) {
if (sortMemory > 100 * 1024 * 1024) {
numSortBuffers = 2;
}
else {
numSortBuffers = 1;
}
}
final int numSegmentsPerSortBuffer = sortMemPages / numSortBuffers;
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Instantiating sorter with %d pages of sorting memory (="
+ "%d bytes total) divided over %d sort buffers (%d pages per buffer). Using %d"
+ " buffers for writing sorted results and merging maximally %d streams at once. "
+ "Using %d memory segments for large record spilling.",
sortMemPages, sortMemory, numSortBuffers, numSegmentsPerSortBuffer, numWriteBuffers,
maxNumFileHandles, numLargeRecordBuffers));
}
this.sortReadMemory = memory;
this.writeMemory = new ArrayList(numWriteBuffers);
final TypeSerializer serializer = serializerFactory.getSerializer();
// move some pages from the sort memory to the write memory
if (numWriteBuffers > 0) {
for (int i = 0; i < numWriteBuffers; i++) {
this.writeMemory.add(this.sortReadMemory.remove(this.sortReadMemory.size() - 1));
}
}
if (numLargeRecordBuffers > 0) {
List mem = new ArrayList();
for (int i = 0; i < numLargeRecordBuffers; i++) {
mem.add(this.sortReadMemory.remove(this.sortReadMemory.size() - 1));
}
this.largeRecordHandler = new LargeRecordHandler(serializer, comparator.duplicate(),
ioManager, memoryManager, mem, parentTask, maxNumFileHandles);
}
else {
this.largeRecordHandler = null;
}
// circular queues pass buffers between the threads
final CircularQueues circularQueues = new CircularQueues();
// allocate the sort buffers and fill empty queue with them
final Iterator segments = this.sortReadMemory.iterator();
for (int i = 0; i < numSortBuffers; i++)
{
// grab some memory
final List sortSegments = new ArrayList(numSegmentsPerSortBuffer);
for (int k = (i == numSortBuffers - 1 ? Integer.MAX_VALUE : numSegmentsPerSortBuffer); k > 0 && segments.hasNext(); k--) {
sortSegments.add(segments.next());
}
final TypeComparator comp = comparator.duplicate();
final InMemorySorter buffer;
// instantiate a fix-length in-place sorter, if possible, otherwise the out-of-place sorter
if (comp.supportsSerializationWithKeyNormalization() &&
serializer.getLength() > 0 && serializer.getLength() <= THRESHOLD_FOR_IN_PLACE_SORTING)
{
buffer = new FixedLengthRecordSorter(serializerFactory.getSerializer(), comp, sortSegments);
} else {
buffer = new NormalizedKeySorter(serializerFactory.getSerializer(), comp, sortSegments);
}
// add to empty queue
CircularElement element = new CircularElement(i, buffer, sortSegments);
circularQueues.empty.add(element);
}
// exception handling
ExceptionHandler exceptionHandler = new ExceptionHandler() {
public void handleException(IOException exception) {
// forward exception
if (!closed) {
setResultIteratorException(exception);
close();
}
}
};
// create sets that track the channels we need to clean up when closing the sorter
this.channelsToDeleteAtShutdown = new HashSet(64);
this.openChannels = new HashSet(64);
// start the thread that reads the input channels
this.readThread = getReadingThread(exceptionHandler, input, circularQueues, largeRecordHandler,
parentTask, serializer, ((long) (startSpillingFraction * sortMemory)));
// start the thread that sorts the buffers
this.sortThread = getSortingThread(exceptionHandler, circularQueues, parentTask);
// start the thread that handles spilling to secondary storage
this.spillThread = getSpillingThread(exceptionHandler, circularQueues, parentTask,
memoryManager, ioManager, serializerFactory, comparator, this.sortReadMemory, this.writeMemory,
maxNumFileHandles);
// propagate the context class loader to the spawned threads
ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
if (contextLoader != null) {
if (this.readThread != null) {
this.readThread.setContextClassLoader(contextLoader);
}
if (this.sortThread != null) {
this.sortThread.setContextClassLoader(contextLoader);
}
if (this.spillThread != null) {
this.spillThread.setContextClassLoader(contextLoader);
}
}
startThreads();
}
/**
* Starts all the threads that are used by this sort-merger.
*/
protected void startThreads() {
if (this.readThread != null) {
this.readThread.start();
}
if (this.sortThread != null) {
this.sortThread.start();
}
if (this.spillThread != null) {
this.spillThread.start();
}
}
/**
* Shuts down all the threads initiated by this sort/merger. Also releases all previously allocated
* memory, if it has not yet been released by the threads, and closes and deletes all channels (removing
* the temporary files).
*
* The threads are set to exit directly, but depending on their operation, it may take a while to actually happen.
* The sorting thread will for example not finish before the current batch is sorted. This method attempts to wait
* for the working thread to exit. If it is however interrupted, the method exits immediately and is not guaranteed
* how long the threads continue to exist and occupy resources afterwards.
*
* @see java.io.Closeable#close()
*/
@Override
public void close() {
// check if the sorter has been closed before
synchronized (this) {
if (this.closed) {
return;
}
// mark as closed
this.closed = true;
}
// from here on, the code is in a try block, because even through errors might be thrown in this block,
// we need to make sure that all the memory is released.
try {
// if the result iterator has not been obtained yet, set the exception
synchronized (this.iteratorLock) {
if (this.iteratorException == null) {
this.iteratorException = new IOException("The sorter has been closed.");
this.iteratorLock.notifyAll();
}
}
// stop all the threads
if (this.readThread != null) {
try {
this.readThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down reader thread: " + t.getMessage(), t);
}
}
if (this.sortThread != null) {
try {
this.sortThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down sorter thread: " + t.getMessage(), t);
}
}
if (this.spillThread != null) {
try {
this.spillThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down spilling thread: " + t.getMessage(), t);
}
}
try {
if (this.readThread != null) {
this.readThread.join();
}
if (this.sortThread != null) {
this.sortThread.join();
}
if (this.spillThread != null) {
this.spillThread.join();
}
}
catch (InterruptedException iex) {
LOG.debug("Closing of sort/merger was interrupted. " +
"The reading/sorting/spilling threads may still be working.", iex);
}
}
finally {
// RELEASE ALL MEMORY. If the threads and channels are still running, this should cause
// exceptions, because their memory segments are freed
try {
if (!this.writeMemory.isEmpty()) {
this.memoryManager.release(this.writeMemory);
}
this.writeMemory.clear();
}
catch (Throwable t) {}
try {
if (!this.sortReadMemory.isEmpty()) {
this.memoryManager.release(this.sortReadMemory);
}
this.sortReadMemory.clear();
}
catch (Throwable t) {}
// we have to loop this, because it may fail with a concurrent modification exception
while (!this.openChannels.isEmpty()) {
try {
for (Iterator channels = this.openChannels.iterator(); channels.hasNext(); ) {
final FileIOChannel channel = channels.next();
channels.remove();
channel.closeAndDelete();
}
}
catch (Throwable t) {}
}
// we have to loop this, because it may fail with a concurrent modification exception
while (!this.channelsToDeleteAtShutdown.isEmpty()) {
try {
for (Iterator channels = this.channelsToDeleteAtShutdown.iterator(); channels.hasNext(); ) {
final FileIOChannel.ID channel = channels.next();
channels.remove();
try {
final File f = new File(channel.getPath());
if (f.exists()) {
f.delete();
}
} catch (Throwable t) {}
}
}
catch (Throwable t) {}
}
try {
if (this.largeRecordHandler != null) {
this.largeRecordHandler.close();
}
} catch (Throwable t) {}
}
}
// ------------------------------------------------------------------------
// Factory Methods
// ------------------------------------------------------------------------
/**
* Creates the reading thread. The reading thread simply reads the data off the input and puts it
* into the buffer where it will be sorted.
*
* The returned thread is not yet started.
*
* @param exceptionHandler
* The handler for exceptions in the thread.
* @param reader
* The reader from which the thread reads.
* @param queues
* The queues through which the thread communicates with the other threads.
* @param parentTask
* The task at which the thread registers itself (for profiling purposes).
* @param serializer
* The serializer used to serialize records.
* @param startSpillingBytes
* The number of bytes after which the reader thread will send the notification to
* start the spilling.
*
* @return The thread that reads data from an input, writes it into sort buffers and puts
* them into a queue.
*/
protected ThreadBase getReadingThread(ExceptionHandler exceptionHandler,
MutableObjectIterator reader, CircularQueues queues,
LargeRecordHandler largeRecordHandler, AbstractInvokable parentTask,
TypeSerializer serializer, long startSpillingBytes)
{
return new ReadingThread(exceptionHandler, reader, queues, largeRecordHandler,
serializer.createInstance(),parentTask, startSpillingBytes);
}
protected ThreadBase getSortingThread(ExceptionHandler exceptionHandler, CircularQueues queues,
AbstractInvokable parentTask)
{
return new SortingThread(exceptionHandler, queues, parentTask);
}
protected ThreadBase getSpillingThread(ExceptionHandler exceptionHandler, CircularQueues queues,
AbstractInvokable parentTask, MemoryManager memoryManager, IOManager ioManager,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
List sortReadMemory, List writeMemory, int maxFileHandles)
{
return new SpillingThread(exceptionHandler, queues, parentTask,
memoryManager, ioManager, serializerFactory.getSerializer(), comparator, sortReadMemory, writeMemory, maxFileHandles);
}
// ------------------------------------------------------------------------
// Result Iterator
// ------------------------------------------------------------------------
@Override
public MutableObjectIterator getIterator() throws InterruptedException {
synchronized (this.iteratorLock) {
// wait while both the iterator and the exception are not set
while (this.iterator == null && this.iteratorException == null) {
this.iteratorLock.wait();
}
if (this.iteratorException != null) {
throw new RuntimeException("Error obtaining the sorted input: " + this.iteratorException.getMessage(),
this.iteratorException);
}
else {
return this.iterator;
}
}
}
/**
* Sets the result iterator. By setting the result iterator, all threads that are waiting for the result
* iterator are notified and will obtain it.
*
* @param iterator The result iterator to set.
*/
protected final void setResultIterator(MutableObjectIterator iterator) {
synchronized (this.iteratorLock) {
// set the result iterator only, if no exception has occurred
if (this.iteratorException == null) {
this.iterator = iterator;
this.iteratorLock.notifyAll();
}
}
}
/**
* Reports an exception to all threads that are waiting for the result iterator.
*
* @param ioex The exception to be reported to the threads that wait for the result iterator.
*/
protected final void setResultIteratorException(IOException ioex) {
synchronized (this.iteratorLock) {
if (this.iteratorException == null) {
this.iteratorException = ioex;
this.iteratorLock.notifyAll();
}
}
}
// ------------------------------------------------------------------------
// Inter-Thread Communication
// ------------------------------------------------------------------------
/**
* The element that is passed as marker for the end of data.
*/
private static final CircularElement