Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.operators.sort;
import java.io.File;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.io.disk.ChannelReaderInputViewIterator;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel;
import org.apache.flink.runtime.io.disk.iomanager.BlockChannelReader;
import org.apache.flink.runtime.io.disk.iomanager.BlockChannelWriter;
import org.apache.flink.runtime.io.disk.iomanager.ChannelReaderInputView;
import org.apache.flink.runtime.io.disk.iomanager.ChannelWriterOutputView;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.memory.MemoryAllocationException;
import org.apache.flink.runtime.memory.MemoryManager;
import org.apache.flink.runtime.util.EmptyMutableObjectIterator;
import org.apache.flink.util.MutableObjectIterator;
/**
* The {@link UnilateralSortMerger} is a full fledged sorter. It implements a multi-way merge sort. Internally,
* the logic is factored into three threads (read, sort, spill) which communicate through a set of blocking queues,
* forming a closed loop. Memory is allocated using the {@link MemoryManager} interface. Thus the component will
* not exceed the provided memory limits.
*/
public class UnilateralSortMerger implements Sorter {
// ------------------------------------------------------------------------
// Constants
// ------------------------------------------------------------------------
/** Logging. */
private static final Logger LOG = LoggerFactory.getLogger(UnilateralSortMerger.class);
/** Fix length records with a length below this threshold will be in-place sorted, if possible. */
private static final int THRESHOLD_FOR_IN_PLACE_SORTING = 32;
/** The minimal number of buffers to use by the writers. */
protected static final int MIN_NUM_WRITE_BUFFERS = 2;
/** The maximal number of buffers to use by the writers. */
protected static final int MAX_NUM_WRITE_BUFFERS = 4;
/** The minimum number of segments that are required for the sort to operate. */
protected static final int MIN_NUM_SORT_MEM_SEGMENTS = 10;
// ------------------------------------------------------------------------
// Threads
// ------------------------------------------------------------------------
/** The thread that reads the input channels into buffers and passes them on to the merger. */
private final ThreadBase readThread;
/** The thread that merges the buffer handed from the reading thread. */
private final ThreadBase sortThread;
/** The thread that handles spilling to secondary storage. */
private final ThreadBase spillThread;
// ------------------------------------------------------------------------
// Memory
// ------------------------------------------------------------------------
/** The memory segments used first for sorting and later for reading/pre-fetching
* during the external merge. */
protected final List sortReadMemory;
/** The memory segments used to stage data to be written. */
protected final List writeMemory;
/** The memory manager through which memory is allocated and released. */
protected final MemoryManager memoryManager;
// ------------------------------------------------------------------------
// Miscellaneous Fields
// ------------------------------------------------------------------------
/**
* The handler for large records, that do not go though the in-memory sorter as a whole, but
* directly go to disk.
*/
private final LargeRecordHandler largeRecordHandler;
/**
* Collection of all currently open channels, to be closed and deleted during cleanup.
*/
private final HashSet openChannels;
/**
* Collection of all temporary files created and to be removed when closing the sorter.
*/
private final HashSet channelsToDeleteAtShutdown;
/**
* The monitor which guards the iterator field.
*/
protected final Object iteratorLock = new Object();
/**
* The iterator to be returned by the sort-merger. This variable is null, while receiving and merging is still in
* progress and it will be set once we have < merge factor sorted sub-streams that will then be streamed sorted.
*/
protected volatile MutableObjectIterator iterator;
/**
* The exception that is set, if the iterator cannot be created.
*/
protected volatile IOException iteratorException;
/**
* Flag indicating that the sorter was closed.
*/
protected volatile boolean closed;
/**
* Whether to reuse objects during deserialization.
*/
protected final boolean objectReuseEnabled;
private final Collection> inMemorySorters;
// ------------------------------------------------------------------------
// Constructor & Shutdown
// ------------------------------------------------------------------------
public UnilateralSortMerger(MemoryManager memoryManager, IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int maxNumFileHandles, float startSpillingFraction,
boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, ioManager, input, parentTask, serializerFactory, comparator,
memoryFraction, -1, maxNumFileHandles, startSpillingFraction, handleLargeRecords, objectReuseEnabled);
}
public UnilateralSortMerger(MemoryManager memoryManager, IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, ioManager, input, parentTask, serializerFactory, comparator,
memoryFraction, numSortBuffers, maxNumFileHandles, startSpillingFraction, false, handleLargeRecords,
objectReuseEnabled);
}
public UnilateralSortMerger(MemoryManager memoryManager, List memory,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean handleLargeRecords, boolean objectReuseEnabled)
throws IOException
{
this(memoryManager, memory, ioManager, input, parentTask, serializerFactory, comparator,
numSortBuffers, maxNumFileHandles, startSpillingFraction, false, handleLargeRecords,
objectReuseEnabled);
}
protected UnilateralSortMerger(MemoryManager memoryManager,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
double memoryFraction, int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean noSpillingMemory, boolean handleLargeRecords,
boolean objectReuseEnabled)
throws IOException, MemoryAllocationException
{
this(memoryManager, memoryManager.allocatePages(parentTask, memoryManager.computeNumberOfPages(memoryFraction)),
ioManager, input, parentTask, serializerFactory, comparator,
numSortBuffers, maxNumFileHandles, startSpillingFraction, noSpillingMemory, handleLargeRecords,
objectReuseEnabled);
}
protected UnilateralSortMerger(MemoryManager memoryManager, List memory,
IOManager ioManager,
MutableObjectIterator input, AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
int numSortBuffers, int maxNumFileHandles,
float startSpillingFraction, boolean noSpillingMemory, boolean handleLargeRecords,
boolean objectReuseEnabled) throws IOException {
this (
memoryManager,
memory,
ioManager,
input,
parentTask,
serializerFactory,
comparator,
numSortBuffers,
maxNumFileHandles,
startSpillingFraction,
noSpillingMemory,
handleLargeRecords,
objectReuseEnabled,
new DefaultInMemorySorterFactory<>(serializerFactory, comparator, THRESHOLD_FOR_IN_PLACE_SORTING));
}
protected UnilateralSortMerger(
MemoryManager memoryManager,
List memory,
IOManager ioManager,
MutableObjectIterator input,
AbstractInvokable parentTask,
TypeSerializerFactory serializerFactory,
TypeComparator comparator,
int numSortBuffers,
int maxNumFileHandles,
float startSpillingFraction,
boolean noSpillingMemory,
boolean handleLargeRecords,
boolean objectReuseEnabled,
InMemorySorterFactory inMemorySorterFactory) throws IOException {
// sanity checks
if (memoryManager == null || (ioManager == null && !noSpillingMemory) || serializerFactory == null || comparator == null) {
throw new NullPointerException();
}
if (parentTask == null) {
throw new NullPointerException("Parent Task must not be null.");
}
if (maxNumFileHandles < 2) {
throw new IllegalArgumentException("Merger cannot work with less than two file handles.");
}
this.memoryManager = memoryManager;
this.objectReuseEnabled = objectReuseEnabled;
// adjust the memory quotas to the page size
final int numPagesTotal = memory.size();
if (numPagesTotal < MIN_NUM_WRITE_BUFFERS + MIN_NUM_SORT_MEM_SEGMENTS) {
throw new IllegalArgumentException("Too little memory provided to sorter to perform task. " +
"Required are at least " + (MIN_NUM_WRITE_BUFFERS + MIN_NUM_SORT_MEM_SEGMENTS) +
" pages. Current page size is " + memoryManager.getPageSize() + " bytes.");
}
// determine how many buffers to use for writing
final int numWriteBuffers;
final int numLargeRecordBuffers;
if (noSpillingMemory && !handleLargeRecords) {
numWriteBuffers = 0;
numLargeRecordBuffers = 0;
}
else {
int numConsumers = (noSpillingMemory ? 0 : 1) + (handleLargeRecords ? 2 : 0);
// determine how many buffers we have when we do a full mere with maximal fan-in
final int minBuffersForMerging = maxNumFileHandles + numConsumers * MIN_NUM_WRITE_BUFFERS;
if (minBuffersForMerging > numPagesTotal) {
numWriteBuffers = noSpillingMemory ? 0 : MIN_NUM_WRITE_BUFFERS;
numLargeRecordBuffers = handleLargeRecords ? 2*MIN_NUM_WRITE_BUFFERS : 0;
maxNumFileHandles = numPagesTotal - numConsumers * MIN_NUM_WRITE_BUFFERS;
if (LOG.isDebugEnabled()) {
LOG.debug("Reducing maximal merge fan-in to " + maxNumFileHandles + " due to limited memory availability during merge");
}
}
else {
// we are free to choose. make sure that we do not eat up too much memory for writing
final int fractionalAuxBuffers = numPagesTotal / (numConsumers * 100);
if (fractionalAuxBuffers >= MAX_NUM_WRITE_BUFFERS) {
numWriteBuffers = noSpillingMemory ? 0 : MAX_NUM_WRITE_BUFFERS;
numLargeRecordBuffers = handleLargeRecords ? 2*MAX_NUM_WRITE_BUFFERS : 0;
}
else {
numWriteBuffers = noSpillingMemory ? 0 :
Math.max(MIN_NUM_WRITE_BUFFERS, fractionalAuxBuffers); // at least the lower bound
numLargeRecordBuffers = handleLargeRecords ?
Math.max(2*MIN_NUM_WRITE_BUFFERS, fractionalAuxBuffers) // at least the lower bound
: 0;
}
}
}
final int sortMemPages = numPagesTotal - numWriteBuffers - numLargeRecordBuffers;
final long sortMemory = ((long) sortMemPages) * memoryManager.getPageSize();
// decide how many sort buffers to use
if (numSortBuffers < 1) {
if (sortMemory > 100 * 1024 * 1024) {
numSortBuffers = 2;
}
else {
numSortBuffers = 1;
}
}
final int numSegmentsPerSortBuffer = sortMemPages / numSortBuffers;
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Instantiating sorter with %d pages of sorting memory (="
+ "%d bytes total) divided over %d sort buffers (%d pages per buffer). Using %d"
+ " buffers for writing sorted results and merging maximally %d streams at once. "
+ "Using %d memory segments for large record spilling.",
sortMemPages, sortMemory, numSortBuffers, numSegmentsPerSortBuffer, numWriteBuffers,
maxNumFileHandles, numLargeRecordBuffers));
}
this.sortReadMemory = memory;
this.writeMemory = new ArrayList(numWriteBuffers);
final TypeSerializer serializer = serializerFactory.getSerializer();
// move some pages from the sort memory to the write memory
if (numWriteBuffers > 0) {
for (int i = 0; i < numWriteBuffers; i++) {
this.writeMemory.add(this.sortReadMemory.remove(this.sortReadMemory.size() - 1));
}
}
if (numLargeRecordBuffers > 0) {
List mem = new ArrayList();
for (int i = 0; i < numLargeRecordBuffers; i++) {
mem.add(this.sortReadMemory.remove(this.sortReadMemory.size() - 1));
}
this.largeRecordHandler = new LargeRecordHandler(serializer, comparator.duplicate(),
ioManager, memoryManager, mem, parentTask, maxNumFileHandles);
}
else {
this.largeRecordHandler = null;
}
// circular queues pass buffers between the threads
final CircularQueues circularQueues = new CircularQueues();
inMemorySorters = new ArrayList<>(numSortBuffers);
// allocate the sort buffers and fill empty queue with them
final Iterator segments = this.sortReadMemory.iterator();
for (int i = 0; i < numSortBuffers; i++)
{
// grab some memory
final List sortSegments = new ArrayList(numSegmentsPerSortBuffer);
for (int k = (i == numSortBuffers - 1 ? Integer.MAX_VALUE : numSegmentsPerSortBuffer); k > 0 && segments.hasNext(); k--) {
sortSegments.add(segments.next());
}
final InMemorySorter inMemorySorter = inMemorySorterFactory.create(sortSegments);
inMemorySorters.add(inMemorySorter);
// add to empty queue
CircularElement element = new CircularElement(i, inMemorySorter, sortSegments);
circularQueues.empty.add(element);
}
// exception handling
ExceptionHandler exceptionHandler = new ExceptionHandler() {
public void handleException(IOException exception) {
// forward exception
if (!closed) {
setResultIteratorException(exception);
close();
}
}
};
// create sets that track the channels we need to clean up when closing the sorter
this.channelsToDeleteAtShutdown = new HashSet(64);
this.openChannels = new HashSet(64);
// start the thread that reads the input channels
this.readThread = getReadingThread(exceptionHandler, input, circularQueues, largeRecordHandler,
parentTask, serializer, ((long) (startSpillingFraction * sortMemory)));
// start the thread that sorts the buffers
this.sortThread = getSortingThread(exceptionHandler, circularQueues, parentTask);
// start the thread that handles spilling to secondary storage
this.spillThread = getSpillingThread(exceptionHandler, circularQueues, parentTask,
memoryManager, ioManager, serializerFactory, comparator, this.sortReadMemory, this.writeMemory,
maxNumFileHandles);
// propagate the context class loader to the spawned threads
ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
if (contextLoader != null) {
if (this.readThread != null) {
this.readThread.setContextClassLoader(contextLoader);
}
if (this.sortThread != null) {
this.sortThread.setContextClassLoader(contextLoader);
}
if (this.spillThread != null) {
this.spillThread.setContextClassLoader(contextLoader);
}
}
startThreads();
}
/**
* Starts all the threads that are used by this sort-merger.
*/
protected void startThreads() {
if (this.readThread != null) {
this.readThread.start();
}
if (this.sortThread != null) {
this.sortThread.start();
}
if (this.spillThread != null) {
this.spillThread.start();
}
}
/**
* Shuts down all the threads initiated by this sort/merger. Also releases all previously allocated
* memory, if it has not yet been released by the threads, and closes and deletes all channels (removing
* the temporary files).
*
* The threads are set to exit directly, but depending on their operation, it may take a while to actually happen.
* The sorting thread will for example not finish before the current batch is sorted. This method attempts to wait
* for the working thread to exit. If it is however interrupted, the method exits immediately and is not guaranteed
* how long the threads continue to exist and occupy resources afterwards.
*
* @see java.io.Closeable#close()
*/
@Override
public void close() {
// check if the sorter has been closed before
synchronized (this) {
if (this.closed) {
return;
}
// mark as closed
this.closed = true;
}
// from here on, the code is in a try block, because even through errors might be thrown in this block,
// we need to make sure that all the memory is released.
try {
// if the result iterator has not been obtained yet, set the exception
synchronized (this.iteratorLock) {
if (this.iteratorException == null) {
this.iteratorException = new IOException("The sorter has been closed.");
this.iteratorLock.notifyAll();
}
}
// stop all the threads
if (this.readThread != null) {
try {
this.readThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down reader thread: " + t.getMessage(), t);
}
}
if (this.sortThread != null) {
try {
this.sortThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down sorter thread: " + t.getMessage(), t);
}
}
if (this.spillThread != null) {
try {
this.spillThread.shutdown();
} catch (Throwable t) {
LOG.error("Error shutting down spilling thread: " + t.getMessage(), t);
}
}
try {
if (this.readThread != null) {
this.readThread.join();
}
if (this.sortThread != null) {
this.sortThread.join();
}
if (this.spillThread != null) {
this.spillThread.join();
}
}
catch (InterruptedException iex) {
LOG.debug("Closing of sort/merger was interrupted. " +
"The reading/sorting/spilling threads may still be working.", iex);
}
}
finally {
// Dispose all in memory sorter in order to clear memory references
for (InMemorySorter inMemorySorter : inMemorySorters) {
inMemorySorter.dispose();
}
// RELEASE ALL MEMORY. If the threads and channels are still running, this should cause
// exceptions, because their memory segments are freed
try {
if (!this.writeMemory.isEmpty()) {
this.memoryManager.release(this.writeMemory);
}
this.writeMemory.clear();
}
catch (Throwable t) {}
try {
if (!this.sortReadMemory.isEmpty()) {
this.memoryManager.release(this.sortReadMemory);
}
this.sortReadMemory.clear();
}
catch (Throwable t) {}
// we have to loop this, because it may fail with a concurrent modification exception
while (!this.openChannels.isEmpty()) {
try {
for (Iterator channels = this.openChannels.iterator(); channels.hasNext(); ) {
final FileIOChannel channel = channels.next();
channels.remove();
channel.closeAndDelete();
}
}
catch (Throwable t) {}
}
// we have to loop this, because it may fail with a concurrent modification exception
while (!this.channelsToDeleteAtShutdown.isEmpty()) {
try {
for (Iterator channels = this.channelsToDeleteAtShutdown.iterator(); channels.hasNext(); ) {
final FileIOChannel.ID channel = channels.next();
channels.remove();
try {
final File f = new File(channel.getPath());
if (f.exists()) {
f.delete();
}
} catch (Throwable t) {}
}
}
catch (Throwable t) {}
}
try {
if (this.largeRecordHandler != null) {
this.largeRecordHandler.close();
}
} catch (Throwable t) {}
}
}
// ------------------------------------------------------------------------
// Factory Methods
// ------------------------------------------------------------------------
/**
* Creates the reading thread. The reading thread simply reads the data off the input and puts it
* into the buffer where it will be sorted.
*
* The returned thread is not yet started.
*
* @param exceptionHandler
* The handler for exceptions in the thread.
* @param reader
* The reader from which the thread reads.
* @param queues
* The queues through which the thread communicates with the other threads.
* @param parentTask
* The task at which the thread registers itself (for profiling purposes).
* @param serializer
* The serializer used to serialize records.
* @param startSpillingBytes
* The number of bytes after which the reader thread will send the notification to
* start the spilling.
*
* @return The thread that reads data from an input, writes it into sort buffers and puts
* them into a queue.
*/
protected ThreadBase getReadingThread(ExceptionHandler exceptionHandler,
MutableObjectIterator reader, CircularQueues queues,
LargeRecordHandler largeRecordHandler, AbstractInvokable parentTask,
TypeSerializer serializer, long startSpillingBytes)
{
return new ReadingThread(exceptionHandler, reader, queues, largeRecordHandler,
serializer.createInstance(),parentTask, startSpillingBytes);
}
protected ThreadBase getSortingThread(ExceptionHandler exceptionHandler, CircularQueues queues,
AbstractInvokable parentTask)
{
return new SortingThread(exceptionHandler, queues, parentTask);
}
protected ThreadBase getSpillingThread(ExceptionHandler exceptionHandler, CircularQueues queues,
AbstractInvokable parentTask, MemoryManager memoryManager, IOManager ioManager,
TypeSerializerFactory serializerFactory, TypeComparator comparator,
List sortReadMemory, List writeMemory, int maxFileHandles)
{
return new SpillingThread(exceptionHandler, queues, parentTask,
memoryManager, ioManager, serializerFactory.getSerializer(), comparator, sortReadMemory, writeMemory, maxFileHandles);
}
// ------------------------------------------------------------------------
// Result Iterator
// ------------------------------------------------------------------------
@Override
public MutableObjectIterator getIterator() throws InterruptedException {
synchronized (this.iteratorLock) {
// wait while both the iterator and the exception are not set
while (this.iterator == null && this.iteratorException == null) {
this.iteratorLock.wait();
}
if (this.iteratorException != null) {
throw new RuntimeException("Error obtaining the sorted input: " + this.iteratorException.getMessage(),
this.iteratorException);
}
else {
return this.iterator;
}
}
}
/**
* Sets the result iterator. By setting the result iterator, all threads that are waiting for the result
* iterator are notified and will obtain it.
*
* @param iterator The result iterator to set.
*/
protected final void setResultIterator(MutableObjectIterator iterator) {
synchronized (this.iteratorLock) {
// set the result iterator only, if no exception has occurred
if (this.iteratorException == null) {
this.iterator = iterator;
this.iteratorLock.notifyAll();
}
}
}
/**
* Reports an exception to all threads that are waiting for the result iterator.
*
* @param ioex The exception to be reported to the threads that wait for the result iterator.
*/
protected final void setResultIteratorException(IOException ioex) {
synchronized (this.iteratorLock) {
if (this.iteratorException == null) {
this.iteratorException = ioex;
this.iteratorLock.notifyAll();
}
}
}
// ------------------------------------------------------------------------
// Inter-Thread Communication
// ------------------------------------------------------------------------
/**
* The element that is passed as marker for the end of data.
*/
private static final CircularElement