com.fasterxml.sort.SorterBase Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java-merge-sort Show documentation
Show all versions of java-merge-sort Show documentation
Basic configurable disk-backed N-way merge sort
The newest version!
package com.fasterxml.sort;
import java.io.*;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import com.fasterxml.sort.util.SegmentedBuffer;
public abstract class SorterBase
implements SortingState
{
/* each entry (in buffer) takes about 4 bytes on 32-bit machine; but let's be
* conservative and use 8 as base, plus size of object itself.
*/
private final static long ENTRY_SLOT_SIZE = 8L;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final SortConfig _config;
/**
* Factory used for reading intermediate sorted files.
*/
protected DataReaderFactory _readerFactory;
/**
* Factory used for writing intermediate sorted files.
*/
protected DataWriterFactory _writerFactory;
/**
* Comparator to use for sorting entries; defaults to 'C
*/
protected Comparator _comparator;
/*
/**********************************************************************
/* State
/**********************************************************************
*/
protected SortingState.Phase _phase;
protected int _presortFileCount;
protected int _sortRoundCount;
protected int _currentSortRound;
protected final AtomicBoolean _cancelRequest = new AtomicBoolean(false);
protected Exception _cancelForException;
/*
/**********************************************************************
/* Construction
/**********************************************************************
*/
protected SorterBase(SortConfig config,
DataReaderFactory readerFactory,
DataWriterFactory writerFactory,
Comparator comparator)
{
_config = config;
_readerFactory = readerFactory;
_writerFactory = writerFactory;
_comparator = comparator;
_phase = null;
}
protected SorterBase() {
this(new SortConfig());
}
protected SorterBase(SortConfig config) {
this(config, null, null, null);
}
/*
/**********************************************************************
/* SortingState implementation
/**********************************************************************
*/
@Override
public void cancel() {
_cancelForException = null;
_cancelRequest.set(true);
}
@Override
public void cancel(RuntimeException e) {
_cancelForException = e;
_cancelRequest.set(true);
}
@Override
public void cancel(IOException e) {
_cancelForException = e;
_cancelRequest.set(true);
}
@Override
public Phase getPhase() {
return _phase;
}
@Override
public int getNumberOfSortRounds() {
return _sortRoundCount;
}
@Override
public int getNumberOfPreSortFiles() {
return _presortFileCount;
}
@Override
public int getSortRound() {
return _currentSortRound;
}
@Override
public boolean isCompleted() {
return (_phase == SortingState.Phase.COMPLETE);
}
@Override
public boolean isPreSorting() {
return (_phase == SortingState.Phase.PRE_SORTING);
}
@Override
public boolean isSorting() {
return (_phase == SortingState.Phase.SORTING);
}
/*
/**********************************************************************
/* Internal methods, pre-sorting
/**********************************************************************
*/
/**
* Helper method that will fill given buffer with data read using
* given reader, obeying given memory usage constraints.
*/
protected Object[] _readMax(DataReader inputReader, SegmentedBuffer buffer,
long memoryToUse, T firstItem)
throws IOException
{
// how much memory do we expect largest remaining entry to take?
int ptr = 0;
Object[] segment = buffer.resetAndStart();
int segmentLength = segment.length;
long minMemoryNeeded;
if (firstItem != null) {
segment[ptr++] = firstItem;
long firstSize = ENTRY_SLOT_SIZE + inputReader.estimateSizeInBytes(firstItem);
minMemoryNeeded = Math.max(firstSize, 256L);
} else {
minMemoryNeeded = 256L;
}
// reduce mem amount by buffer cost too:
memoryToUse -= (ENTRY_SLOT_SIZE * segmentLength);
while (true) {
T value = inputReader.readNext();
if (value == null) {
break;
}
long size = inputReader.estimateSizeInBytes(value);
if (size > minMemoryNeeded) {
minMemoryNeeded = size;
}
if (ptr >= segmentLength) {
segment = buffer.appendCompletedChunk(segment);
segmentLength = segment.length;
memoryToUse -= (ENTRY_SLOT_SIZE * segmentLength);
ptr = 0;
}
segment[ptr++] = value;
memoryToUse -= size;
if (memoryToUse < minMemoryNeeded) {
break;
}
}
return buffer.completeAndClearBuffer(segment, ptr);
}
protected void _presort(DataReader inputReader, SegmentedBuffer buffer, T nextValue,
List presorted)
throws IOException
{
do {
Object[] items = _readMax(inputReader, buffer, _config.getMaxMemoryUsage(), nextValue);
Arrays.sort(items, _rawComparator());
presorted.add(_writePresorted(items));
nextValue = inputReader.readNext();
} while (nextValue != null);
}
@SuppressWarnings("resource")
protected File _writePresorted(Object[] items) throws IOException
{
File tmp = _config.getTempFileProvider().provide();
@SuppressWarnings("unchecked")
DataWriter