
src.it.unimi.dsi.io.ByteDiskQueue Maven / Gradle / Ivy
Show all versions of dsiutils Show documentation
package it.unimi.dsi.io;
/*
* DSI utilities
*
* Copyright (C) 2013-2016 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.fastutil.Size64;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.NoSuchElementException;
/**
* A queue of bytes partially stored on disk.
*
* Instances of this class keep track of a queue of bytes using a circular
* (and possibly {@linkplain ByteBuffer#allocateDirect(int) direct})
* {@link ByteBuffer} that contains the head and the tail of the queue.
* The central part of the queue, if necessary, is dumped on disk. Note that {@link #clear()}
* does empty the queue, but it does not remove the dump file on disk: use {@link #trim()} to trim
* the file at the smallest possible length. The dump file can be removed only with a call to
* {@link #close()}, or when the instance is finalised.
*
*
Since file handles are a precious resource, this class provides a {@link #suspend()} method
* that releases the file handle without deleting the dump file. All calls to methods accessing the
* file handle will reinitialise it lazily (which implies that a certain number of
* enqueues/dequeues can be performed on a suspended class without actually reopening
* the dump file).
*
*
You can create a queue {@linkplain #createNew(File, int, boolean) using a new or empty file}, or
* {@linkplain #createFromFile(File, int, boolean) using an existing file}, in which case the
* content of the file will become the content of the queue. A {@link #freeze()} method closes
* a queue but leaves behind a dump file containing the current content of the queue, so that it
* can be {@linkplain #createFromFile(File, int, boolean) reopened} later.
*
*
Warning: the {@link #close()} method will delete the queue file from
* disk. This implies that if a file with the same name has been created in the meanwhile, it will
* be deleted.
*/
public class ByteDiskQueue implements Closeable, Size64 {
private static final boolean DEBUG = false;
/** The file containing the disk-based part of the queue. */
private final File file;
/** Whether {@link #buffer} should be allocated using {@link ByteBuffer#allocateDirect(int)}. */
private final boolean direct;
/** The start of the {@linkplain #buffer circular buffer}. */
private int start;
/** The end of the {@linkplain #buffer circular buffer}. Might be equal
* to {@link #start} even if the queue is not empty, but in that case {@link #used} is not zero (actually, it will be
* equal to the {@linkplain ByteBuffer#capacity() capacity} of {@link #buffer}). */
private int end;
/** The number of bytes currently in the {@linkplain #buffer circular buffer}.
* Note that this number might be the {@linkplain ByteBuffer#capacity() capacity} of {@link #buffer} when {@link #start} is
* equal to {@link #end}. */
private int used;
/** The hole in the queue. If it is not -1, there are elements on
* disk that should be inserted at this position in the {@linkplain #buffer circular buffer}. */
private int hole;
/** The {@linkplain ByteBuffer#capacity() capacity} of the {@linkplain #buffer circular buffer} minus 1. */
private int mask;
/** The current read/write {@link FileChannel} open on {@link #file}; it is {@code null} while the queue is
* {@linkplain #suspend() suspended} or {@linkplain #close() closed}, but in the first case
* {@link #readPosition} and {@link #writePosition} will be nonnegative. */
private FileChannel channel;
/** The write position of {@link #channel}, or -1 if this queue has been {@linkplain #close() closed}.
* If it is equal to {@link #readPosition}, then {@link #buffer} contain all bytes in this queue. */
private long writePosition;
/** The current read position, or -1 if this queue has been closed.
* @see #writePosition */
private long readPosition;
/** The circular buffer. */
private ByteBuffer buffer;
/** Creates a new disk-based byte queue.
*
* @param file the file that will be used to dump the central part of the queue on disk.
* @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two).
* @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}.
* @param useFileContent whether the queue is new or should use the content of an existing file; in the first case,
* we check that the file does not exists or has length zero.
*/
@SuppressWarnings("resource")
protected ByteDiskQueue( final File file, final int bufferSize, final boolean direct, final boolean useFileContent ) throws IOException {
this.file = file;
this.direct = direct;
channel = new RandomAccessFile( file, "rw" ).getChannel();
if ( useFileContent ) writePosition = channel.size();
else channel.truncate( 0 );
hole = readPosition == writePosition ? -1 : 0;
buffer = direct ? ByteBuffer.allocateDirect( Integer.highestOneBit( bufferSize ) ) : ByteBuffer.allocate( Integer.highestOneBit( bufferSize ) );
mask = buffer.capacity() - 1;
}
/** Creates a new empty disk-based byte queue.
*
* @param file the file that will be used to dump the central part of the queue on disk (must not exist
* or have length zero).
* @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two).
* @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}.
*/
public static ByteDiskQueue createNew( final File file, final int bufferSize, final boolean direct ) throws IOException {
if ( file.length() != 0 ) throw new IOException( "File " + file + " is nonempty" );
return new ByteDiskQueue( file, bufferSize, direct, false );
}
/** Creates a new disk-based byte queue using the content of an existing file.
* The stream of bytes contained in the provided file will form the initial content of the queue.
*
* @param file the file that will be used to dump the central part of the queue on disk (must exist).
* @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two).
* @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}.
*/
public static ByteDiskQueue createFromFile( final File file, final int bufferSize, final boolean direct ) throws IOException {
if ( ! file.exists() ) throw new IOException( "File " + file + " does not exist" );
return new ByteDiskQueue( file, bufferSize, direct, true );
}
/** Throws an {@link IllegalStateException} if the queue has been {@linkplain #close() closed}. */
private void ensureNotClosed() {
if ( readPosition == -1 ) throw new IllegalStateException();
}
/** Enqueues a byte to this queue.
*
* @param b the byte to be enqueued.
*/
public void enqueue( final byte b ) throws IOException {
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueue(" + b + ")" );
ensureNotClosed();
if ( used == buffer.capacity() ) dumpTail();
buffer.put( end++, b );
end &= mask;
used++;
}
/** Adds a vByte-coded natural number to this queue.
*
* @param x the natural number to be added to the queue.
*/
public void enqueueInt( final int x ) throws IOException {
assert x >= 0 : x;
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueueInt(" + x + ")" );
switch( Fast.mostSignificantBit( x ) / 7 + 1 ) {
case 1:
enqueue( (byte)x );
break;
case 2:
enqueue( (byte)( x >>> 7 | 0x80 ) );
enqueue( (byte)( x & 0x7F ) );
break;
case 3:
enqueue( (byte)( x >>> 14 | 0x80 ) );
enqueue( (byte)( x >>> 7 | 0x80 ) );
enqueue( (byte)( x & 0x7F ) );
break;
case 4:
enqueue( (byte)( x >>> 21 | 0x80 ) );
enqueue( (byte)( x >>> 14 | 0x80 ) );
enqueue( (byte)( x >>> 7 | 0x80 ) );
enqueue( (byte)( x & 0x7F ) );
break;
case 5:
enqueue( (byte)( x >>> 28 | 0x80 ) );
enqueue( (byte)( x >>> 21 | 0x80 ) );
enqueue( (byte)( x >>> 14 | 0x80 ) );
enqueue( (byte)( x >>> 7 | 0x80 ) );
enqueue( (byte)( x & 0x7F ) );
}
}
/** Enqueues a fragment of byte array to this queue.
*
* @param a a byte array.
* @param offset the first byte in {@code a} to be enqueued.
* @param length the number of bytes to enqueue.
*/
public void enqueue( final byte[] a, int offset, int length ) throws IOException {
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueue(" + offset + ", " + length + ")" );
ensureNotClosed();
do {
if ( used == buffer.capacity() ) dumpTail();
assert used != buffer.capacity() : used + " == " + buffer.capacity();
final int toBePut = Math.min( length, buffer.capacity() - used );
buffer.clear();
buffer.position( end );
if ( end < start ) buffer.put( a, offset, toBePut );
else {
final int firstTransfer = Math.min( toBePut, buffer.capacity() - end );
buffer.put( a, offset, firstTransfer );
if ( firstTransfer < toBePut ) {
buffer.clear();
buffer.put( a, offset + firstTransfer, toBePut - firstTransfer );
}
}
offset += toBePut;
length -= toBePut;
used += toBePut;
end = end + toBePut & mask;
} while( length != 0 );
buffer.clear();
}
/** Enqueues a byte array to this queue.
*
* @param a the array whose bytes have to be enqueued.
*/
public void enqueue( final byte[] a ) throws IOException {
enqueue( a, 0, a.length );
}
private void dumpTail() throws IOException {
resume();
if ( DEBUG ) System.err.println( "Dumping at " + writePosition + ", start = " + start+ ", end = " + end + ", hole = " + hole );
channel.position( writePosition );
if ( hole == -1 ) hole = ( start + buffer.capacity() / 2 ) & mask;
buffer.clear();
buffer.position( hole );
if ( hole < end ) {
buffer.limit( end );
channel.write( buffer );
used -= end - hole;
}
else {
channel.write( buffer );
used -= buffer.capacity() - hole;
buffer.position( 0 );
buffer.limit( end );
channel.write( buffer );
used -= end;
}
buffer.clear();
writePosition = channel.position();
end = hole;
if ( DEBUG ) System.err.println( "start = " + start+ ", end = " + end + ", hole = " + hole + ", readPosition = " + readPosition + ", writePosition = " + writePosition );
}
private void loadHead() throws IOException {
assert readPosition != writePosition : readPosition + " == " + writePosition;
// There are items on disk. We must load them.
if ( DEBUG ) System.err.println( "Reading at " + readPosition + "..." );
// If there is not enough space, we dump the tail. It is large, anyway.
if ( used > buffer.capacity() / 2 ) dumpTail();
resume();
assert size64() > used : size64() + " <= " + used;
final int toRead = (int)Math.min( writePosition - readPosition, buffer.capacity() / 2 );
start = hole - toRead & mask;
channel.position( readPosition );
buffer.clear();
buffer.position( start );
if ( start < hole ) {
buffer.limit( hole );
channel.read( buffer );
}
else {
channel.read( buffer );
buffer.position( 0 );
buffer.limit( hole );
channel.read( buffer );
}
buffer.clear();
used += toRead;
readPosition = channel.position();
if ( readPosition == writePosition ) {
readPosition = writePosition = 0;
hole = -1;
}
assert start != hole : start + " == " + hole;
if ( DEBUG ) System.err.println( "Now at " + readPosition );
}
/** Dequeues a byte from the queue.
*
* @return the first available byte in this queue.
* @throws NoSuchElementException if there are no bytes in this queue.
*/
public byte dequeue() throws IOException {
ensureNotClosed();
if ( DEBUG ) System.err.print( "[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeue() = " );
if ( isEmpty() ) throw new NoSuchElementException();
if ( start == hole ) loadHead();
final byte result = buffer.get( start++ );
start &= mask;
used--;
if ( DEBUG ) System.err.println( result );
return result;
}
/** Dequeues a sequence of bytes from this queue into an array.
*
* @param a a byte array.
* @param offset the first position in {@code a} where dequeued bytes will be written.
* @param length the number of bytes to dequeue.
* @throws NoSuchElementException if there are not enough bytes in this queue.
*/
public void dequeue( final byte[] a, int offset, int length ) throws IOException {
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeue(" + offset + ", " + length + ")" );
ensureNotClosed();
while( length != 0 ) {
if ( isEmpty() ) throw new NoSuchElementException();
else if ( start == hole ) loadHead();
assert start != hole : start + " == " + hole;
final int endOfTransfer = hole == -1 ? end : hole;
final int toTransfer = Math.min( length, endOfTransfer == start ? used : endOfTransfer - start & mask );
buffer.clear();
buffer.position( start );
if ( start < endOfTransfer ) buffer.get( a, offset, toTransfer );
else {
final int firstTransfer = Math.min( toTransfer, buffer.capacity() - start );
buffer.get( a, offset, firstTransfer );
if ( firstTransfer < toTransfer ) {
buffer.clear();
buffer.get( a, offset + firstTransfer, toTransfer - firstTransfer );
}
}
offset += toTransfer;
length -= toTransfer;
used -= toTransfer;
start = start + toTransfer & mask;
}
buffer.clear();
}
/** Dequeues a sequence of bytes from this queue into an array.
*
* @param a the array that will contain the dequeued bytes.
* @throws NoSuchElementException if there are not enough bytes in this queue.
*/
public void dequeue( final byte[] a ) throws IOException {
dequeue( a, 0, a.length);
}
/** Dequeues from this queue a vByte-coded natural number.
*
* @return the first available natural number in this queue.
*/
public int dequeueInt() throws IOException {
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeueInt()" );
for( int x = 0; ; ) {
final byte b = dequeue();
x |= b & 0x7F;
if ( b >= 0 ) return x;
x <<= 7;
}
}
@Deprecated
@Override
public int size() {
throw new UnsupportedOperationException();
}
@Override
public long size64() {
return writePosition - readPosition + used;
}
public boolean isEmpty() {
return size64() == 0;
}
/** Deallocates all disk resources associated with this queue. After calling this method, calls
* to any methods that would modify the queue will cause an {@link IllegalStateException}. */
public void close() throws IOException {
// If the queue is not suspended, we close the file handles.
if ( channel != null ) {
channel.close();
channel = null;
}
writePosition = readPosition = -1;
used = 0;
file.delete();
}
public void freeze() throws IOException {
if ( DEBUG ) System.err.println( "[start = " + start + ", end = " + end + ", hole = " + hole + ", readPosition = " + readPosition + ", writePosition = " + writePosition + "] freeze()" );
resume();
final File freezeFile = File.createTempFile( ByteDiskQueue.class.getSimpleName() + "-", ".freeze", file.getParentFile() );
final FileOutputStream fos = new FileOutputStream( freezeFile );
final FileChannel freezeChannel = fos.getChannel();
// Dump head
buffer.clear();
buffer.position( start );
if ( hole == -1 ) {
if ( start < end ) {
buffer.limit( end );
freezeChannel.write( buffer );
}
else if ( used != 0 ) {
freezeChannel.write( buffer );
buffer.position( 0 );
buffer.limit( end );
freezeChannel.write( buffer );
}
}
else {
if ( start < hole ) {
buffer.limit( hole );
freezeChannel.write( buffer );
}
else if ( start != hole ){
freezeChannel.write( buffer );
buffer.position( 0 );
buffer.limit( hole );
freezeChannel.write( buffer );
}
for( long position = readPosition; position < writePosition; ) position += channel.transferTo( position, writePosition - position, freezeChannel );
// Dump tail
buffer.clear();
buffer.position( hole );
if ( hole < end ) {
buffer.limit( end );
freezeChannel.write( buffer );
}
else if ( hole != end ) {
freezeChannel.write( buffer );
buffer.position( 0 );
buffer.limit( end );
freezeChannel.write( buffer );
}
}
assert size64() == freezeChannel.size() : size64() + " != " + freezeChannel.size();
fos.close();
channel.close();
file.delete();
if ( ! freezeFile.renameTo( file ) ) throw new IOException( "Cannot rename freeze file " + freezeFile + " to " + file );
channel = null;
writePosition = readPosition = -1;
used = 0;
}
/** Clears the queue. Note that we do not modify the dump file (use {@link #trim()} to that purpose). */
public void clear() {
ensureNotClosed();
writePosition = readPosition = start = end = used = 0;
hole = -1;
}
/** Trims the queue dump file at the current write position. */
public void trim() throws IOException {
ensureNotClosed();
resume();
channel.truncate( writePosition );
}
/** Suspends this queue, releasing the associated file handles. If the deque is already
* suspended, does nothing. The queue will lazily resume its operations when necessary. */
public void suspend() throws IOException {
ensureNotClosed();
if ( channel == null ) return;
channel.close();
channel = null;
}
/** Resumes this queue, restoring the associated file handles. If the queue is already active, does nothing.
*
* @see #suspend()
*/
@SuppressWarnings("resource")
private void resume() throws IOException {
ensureNotClosed();
if ( channel != null ) return;
channel = new RandomAccessFile( file, "rw" ).getChannel();
}
protected void finalize() throws Throwable {
try {
if ( channel != null || writePosition != -1 ) {
System.err.println( "WARNING: This " + this.getClass().getName() + " [" + toString() + "] should have been closed." );
close();
}
}
finally {
super.finalize();
}
}
/** Enlarge the size of the buffer of this queue to a given size.
*
* @param newBufferSize the required buffer size (will be possibly decreased so to be a power of two).
* If the new buffer size is smaller than the current size, nothing happens.
*/
public void enlargeBuffer( int newBufferSize ) {
if ( DEBUG ) System.err.print( "Enlarging to size " + newBufferSize + " [start = " + start+ ", end = " + end + ", hole = " + hole + "]... " );
newBufferSize = Integer.highestOneBit( newBufferSize );
if ( newBufferSize <= buffer.capacity() ) return;
ByteBuffer newByteBuffer = direct ? ByteBuffer.allocateDirect( newBufferSize ) : ByteBuffer.allocate( newBufferSize );
buffer.clear();
if ( start < end ) {
buffer.position( start );
buffer.limit( end );
newByteBuffer.put( buffer );
end -= start;
if ( hole >= 0 ) hole -= start;
}
else if ( used != 0 ) {
buffer.position( start );
newByteBuffer.put( buffer );
buffer.position( 0 );
buffer.limit( end );
newByteBuffer.put( buffer );
end += buffer.capacity() - start;
if ( hole >= 0 )
if ( hole >= start ) hole -= start;
else hole += buffer.capacity() - start;
}
else { // This covers the case start == end, used == 0
end = 0;
if ( hole > 0 ) hole = 0;
}
start = 0;
buffer = newByteBuffer;
mask = newBufferSize - 1;
if ( DEBUG ) System.err.println( "[start = " + start+ ", end = " + end + ", hole = " + hole + "]" );
}
}