com.fasterxml.sort.Sorter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java-merge-sort Show documentation
Show all versions of java-merge-sort Show documentation
Basic configurable disk-backed N-way merge sort
package com.fasterxml.sort;
import java.io.*;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import com.fasterxml.sort.util.SegmentedBuffer;
/**
* Main entry point for sorting functionality; object that drives
* the sorting process from pre-sort to final output.
* Instances are not thread-safe, although they are reusable.
* Since the cost of creating new instances is trivial, there is usally
* no benefit from reusing instances, other than possible convenience.
*/
public class Sorter
implements SortingState
{
/* each entry (in buffer) takes about 4 bytes on 32-bit machine; but let's be
* conservative and use 8 as base, plus size of object itself.
*/
private final static long ENTRY_SLOT_SIZE = 8L;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final SortConfig _config;
/**
* Factory used for reading intermediate sorted files.
*/
protected DataReaderFactory _readerFactory;
/**
* Factory used for writing intermediate sorted files.
*/
protected DataWriterFactory _writerFactory;
/**
* Comparator to use for sorting entries; defaults to 'C
*/
protected Comparator _comparator;
/*
/**********************************************************************
/* State
/**********************************************************************
*/
protected SortingState.Phase _phase;
protected int _presortFileCount;
protected int _sortRoundCount;
protected int _currentSortRound;
protected final AtomicBoolean _cancelRequest = new AtomicBoolean(false);
protected Exception _cancelForException;
/*
/**********************************************************************
/* Construction
/**********************************************************************
*/
/**
* @param config Configuration for the sorter
* @param readerFactory Factory used for creating readers for pre-sorted data;
* as well as for input if an {@link InputStream} is passed as source
* @param writerFactory Factory used for creating writers for storing pre-sorted data;
* as well as for results if an {@link OutputStream} is passed as destination.
*/
public Sorter(SortConfig config,
DataReaderFactory readerFactory, DataWriterFactory writerFactory,
Comparator comparator)
{
_config = config;
_readerFactory = readerFactory;
_writerFactory = writerFactory;
_comparator = comparator;
_phase = null;
}
protected Sorter() {
this(new SortConfig());
}
protected Sorter(SortConfig config) {
this(config, null, null, null);
}
protected Sorter withReaderFactory(DataReaderFactory f) {
return new Sorter(_config, f, _writerFactory, _comparator);
}
protected Sorter withWriterFactory(DataWriterFactory f) {
return new Sorter(_config, _readerFactory, f, _comparator);
}
protected Sorter withComparator(Comparator cmp) {
return new Sorter(_config, _readerFactory, _writerFactory, cmp);
}
/*
/**********************************************************************
/* SortingState implementation
/**********************************************************************
*/
@Override
public void cancel() {
_cancelForException = null;
_cancelRequest.set(true);
}
@Override
public void cancel(RuntimeException e) {
_cancelForException = e;
_cancelRequest.set(true);
}
@Override
public void cancel(IOException e) {
_cancelForException = e;
_cancelRequest.set(true);
}
@Override
public Phase getPhase() {
return _phase;
}
@Override
public int getNumberOfSortRounds() {
return _sortRoundCount;
}
@Override
public int getNumberOfPreSortFiles() {
return _presortFileCount;
}
@Override
public int getSortRound() {
return _currentSortRound;
}
@Override
public boolean isCompleted() {
return (_phase == SortingState.Phase.COMPLETE);
}
@Override
public boolean isPreSorting() {
return (_phase == SortingState.Phase.PRE_SORTING);
}
@Override
public boolean isSorting() {
return (_phase == SortingState.Phase.SORTING);
}
/*
/**********************************************************************
/* Main sorting API
/**********************************************************************
*/
/**
* Method that will perform full sort on specified input, writing results
* into specified destination. Data conversions needed are done
* using {@link DataReaderFactory} and {@link DataWriterFactory} configured
* for this sorter.
*/
public void sort(InputStream source, OutputStream destination)
throws IOException
{
sort(_readerFactory.constructReader(source),
_writerFactory.constructWriter(destination));
}
/**
* Method that will perform full sort on input data read using given
* {@link DataReader}, and written out using specified {@link DataWriter}.
* Conversions to and from intermediate sort files is done
* using {@link DataReaderFactory} and {@link DataWriterFactory} configured
* for this sorter.
*
* @return true if sorting completed succesfully; false if it was cancelled
*/
public boolean sort(DataReader inputReader, DataWriter resultWriter)
throws IOException
{
// First, pre-sort:
_phase = SortingState.Phase.PRE_SORTING;
SegmentedBuffer buffer = new SegmentedBuffer();
boolean inputClosed = false;
boolean resultClosed = false;
_presortFileCount = 0;
_sortRoundCount = -1;
_currentSortRound = -1;
try {
Object[] items = _readMax(inputReader, buffer, _config.getMaxMemoryUsage(), null);
if (_checkForCancel()) {
return false;
}
Arrays.sort(items, _rawComparator());
T next = inputReader.readNext();
/* Minor optimization: in case all entries might fit in
* in-memory sort buffer, avoid writing intermediate file
* and just write results directly.
*/
if (next == null) {
inputClosed = true;
inputReader.close();
_phase = SortingState.Phase.SORTING;
_writeAll(resultWriter, items);
} else { // but if more data than memory-buffer-full, do it right:
List presorted = new ArrayList();
presorted.add(_writePresorted(items));
items = null; // it's a big array, clear refs as early as possible
_presort(inputReader, buffer, next, presorted);
inputClosed = true;
inputReader.close();
_phase = SortingState.Phase.SORTING;
if (_checkForCancel(presorted)) {
return false;
}
merge(presorted, resultWriter);
}
resultClosed = true;
resultWriter.close();
if (_checkForCancel()) {
return false;
}
_phase = SortingState.Phase.COMPLETE;
} finally {
if (!inputClosed) {
try {
inputReader.close();
} catch (IOException e) { }
}
if (!resultClosed) {
try {
resultWriter.close();
} catch (IOException e) { }
}
}
return true;
}
/*
/**********************************************************************
/* Internal methods, pre-sorting
/**********************************************************************
*/
/**
* Helper method that will fill given buffer with data read using
* given reader, obeying given memory usage constraints.
*/
private Object[] _readMax(DataReader inputReader, SegmentedBuffer buffer,
long memoryToUse, T firstItem)
throws IOException
{
// how much memory do we expect largest remaining entry to take?
int ptr = 0;
Object[] segment = buffer.resetAndStart();
int segmentLength = segment.length;
long minMemoryNeeded;
if (firstItem != null) {
segment[ptr++] = firstItem;
long firstSize = ENTRY_SLOT_SIZE + inputReader.estimateSizeInBytes(firstItem);
minMemoryNeeded = Math.max(firstSize, 256L);
} else {
minMemoryNeeded = 256L;
}
// reduce mem amount by buffer cost too:
memoryToUse -= (ENTRY_SLOT_SIZE * segmentLength);
while (true) {
T value = inputReader.readNext();
if (value == null) {
break;
}
long size = inputReader.estimateSizeInBytes(value);
if (size > minMemoryNeeded) {
minMemoryNeeded = size;
}
if (ptr >= segmentLength) {
segment = buffer.appendCompletedChunk(segment);
segmentLength = segment.length;
memoryToUse -= (ENTRY_SLOT_SIZE * segmentLength);
ptr = 0;
}
segment[ptr++] = value;
memoryToUse -= size;
if (memoryToUse < minMemoryNeeded) {
break;
}
}
return buffer.completeAndClearBuffer(segment, ptr);
}
protected void _presort(DataReader inputReader, SegmentedBuffer buffer, T nextValue,
List presorted)
throws IOException
{
do {
Object[] items = _readMax(inputReader, buffer, _config.getMaxMemoryUsage(), nextValue);
Arrays.sort(items, _rawComparator());
presorted.add(_writePresorted(items));
nextValue = inputReader.readNext();
} while (nextValue != null);
}
protected File _writePresorted(Object[] items) throws IOException
{
File tmp = _config.getTempFileProvider().provide();
@SuppressWarnings("unchecked")
DataWriter