All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.io.ByteDiskQueue Maven / Gradle / Ivy

Go to download

The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.

There is a newer version: 2.7.3
Show newest version
package it.unimi.dsi.io;

/*
 * DSI utilities
 *
 * Copyright (C) 2013-2020 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.NoSuchElementException;

import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.fastutil.Size64;

/**
 * A queue of bytes partially stored on disk.
 *
 * 

Instances of this class keep track of a queue of bytes using a circular * (and possibly {@linkplain ByteBuffer#allocateDirect(int) direct}) * {@link ByteBuffer} that contains the head and the tail of the queue. * The central part of the queue, if necessary, is dumped on disk. Note that {@link #clear()} * does empty the queue, but it does not remove the dump file on disk: use {@link #trim()} to trim * the file at the smallest possible length. The dump file can be removed only with a call to * {@link #close()}, or when the instance is finalised. * *

Since file handles are a precious resource, this class provides a {@link #suspend()} method * that releases the file handle without deleting the dump file. All calls to methods accessing the * file handle will reinitialise it lazily (which implies that a certain number of * enqueues/dequeues can be performed on a suspended class without actually reopening * the dump file). * *

You can create a queue {@linkplain #createNew(File, int, boolean) using a new or empty file}, or * {@linkplain #createFromFile(File, int, boolean) using an existing file}, in which case the * content of the file will become the content of the queue. A {@link #freeze()} method closes * a queue but leaves behind a dump file containing the current content of the queue, so that it * can be {@linkplain #createFromFile(File, int, boolean) reopened} later. * *

Warning: the {@link #close()} method will delete the queue file from * disk. This implies that if a file with the same name has been created in the meanwhile, it will * be deleted. */ public class ByteDiskQueue implements Closeable, Size64 { private static final boolean DEBUG = false; /** The file containing the disk-based part of the queue. */ private final File file; /** Whether {@link #buffer} should be allocated using {@link ByteBuffer#allocateDirect(int)}. */ private final boolean direct; /** The start of the {@linkplain #buffer circular buffer}. */ private int start; /** The end of the {@linkplain #buffer circular buffer}. Might be equal * to {@link #start} even if the queue is not empty, but in that case {@link #used} is not zero (actually, it will be * equal to the {@linkplain ByteBuffer#capacity() capacity} of {@link #buffer}). */ private int end; /** The number of bytes currently in the {@linkplain #buffer circular buffer}. * Note that this number might be the {@linkplain ByteBuffer#capacity() capacity} of {@link #buffer} when {@link #start} is * equal to {@link #end}. */ private int used; /** The hole in the queue. If it is not -1, there are elements on * disk that should be inserted at this position in the {@linkplain #buffer circular buffer}. */ private int hole; /** The {@linkplain ByteBuffer#capacity() capacity} of the {@linkplain #buffer circular buffer} minus 1. */ private int mask; /** The current read/write {@link FileChannel} open on {@link #file}; it is {@code null} while the queue is * {@linkplain #suspend() suspended} or {@linkplain #close() closed}, but in the first case * {@link #readPosition} and {@link #writePosition} will be nonnegative. */ private FileChannel channel; /** The write position of {@link #channel}, or -1 if this queue has been {@linkplain #close() closed}. * If it is equal to {@link #readPosition}, then {@link #buffer} contain all bytes in this queue. */ private long writePosition; /** The current read position, or -1 if this queue has been closed. * @see #writePosition */ private long readPosition; /** The circular buffer. */ private ByteBuffer buffer; /** Creates a new disk-based byte queue. * * @param file the file that will be used to dump the central part of the queue on disk. * @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two). * @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}. * @param useFileContent whether the queue is new or should use the content of an existing file; in the first case, * we check that the file does not exists or has length zero. */ @SuppressWarnings("resource") protected ByteDiskQueue(final File file, final int bufferSize, final boolean direct, final boolean useFileContent) throws IOException { this.file = file; this.direct = direct; channel = new RandomAccessFile(file, "rw").getChannel(); if (useFileContent) writePosition = channel.size(); else channel.truncate(0); hole = readPosition == writePosition ? -1 : 0; buffer = direct ? ByteBuffer.allocateDirect(Integer.highestOneBit(bufferSize)) : ByteBuffer.allocate(Integer.highestOneBit(bufferSize)); mask = buffer.capacity() - 1; } /** Creates a new empty disk-based byte queue. * * @param file the file that will be used to dump the central part of the queue on disk (must not exist * or have length zero). * @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two). * @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}. */ public static ByteDiskQueue createNew(final File file, final int bufferSize, final boolean direct) throws IOException { if (file.length() != 0) throw new IOException("File " + file + " is nonempty"); return new ByteDiskQueue(file, bufferSize, direct, false); } /** Creates a new disk-based byte queue using the content of an existing file. * The stream of bytes contained in the provided file will form the initial content of the queue. * * @param file the file that will be used to dump the central part of the queue on disk (must exist). * @param bufferSize the {@linkplain ByteBuffer#capacity() capacity} of the circular buffer (will be possibly decreased so to be a power of two). * @param direct whether the circular {@link ByteBuffer} used by this queue should be {@linkplain ByteBuffer#allocateDirect(int) allocated directly}. */ public static ByteDiskQueue createFromFile(final File file, final int bufferSize, final boolean direct) throws IOException { if (! file.exists()) throw new IOException("File " + file + " does not exist"); return new ByteDiskQueue(file, bufferSize, direct, true); } /** Throws an {@link IllegalStateException} if the queue has been {@linkplain #close() closed}. */ private void ensureNotClosed() { if (readPosition == -1) throw new IllegalStateException(); } /** Enqueues a byte to this queue. * * @param b the byte to be enqueued. */ public void enqueue(final byte b) throws IOException { if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueue(" + b + ")"); ensureNotClosed(); if (used == buffer.capacity()) dumpTail(); buffer.put(end++, b); end &= mask; used++; } /** Adds a vByte-coded natural number to this queue. * * @param x the natural number to be added to the queue. */ public void enqueueInt(final int x) throws IOException { assert x >= 0 : x; if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueueInt(" + x + ")"); switch(Fast.mostSignificantBit(x) / 7 + 1) { case 1: enqueue((byte)x); break; case 2: enqueue((byte)(x >>> 7 | 0x80)); enqueue((byte)(x & 0x7F)); break; case 3: enqueue((byte)(x >>> 14 | 0x80)); enqueue((byte)(x >>> 7 | 0x80)); enqueue((byte)(x & 0x7F)); break; case 4: enqueue((byte)(x >>> 21 | 0x80)); enqueue((byte)(x >>> 14 | 0x80)); enqueue((byte)(x >>> 7 | 0x80)); enqueue((byte)(x & 0x7F)); break; case 5: enqueue((byte)(x >>> 28 | 0x80)); enqueue((byte)(x >>> 21 | 0x80)); enqueue((byte)(x >>> 14 | 0x80)); enqueue((byte)(x >>> 7 | 0x80)); enqueue((byte)(x & 0x7F)); } } /** Enqueues a fragment of byte array to this queue. * * @param a a byte array. * @param offset the first byte in {@code a} to be enqueued. * @param length the number of bytes to enqueue. */ public void enqueue(final byte[] a, int offset, int length) throws IOException { if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + "] enqueue(" + offset + ", " + length + ")"); ensureNotClosed(); do { if (used == buffer.capacity()) dumpTail(); assert used != buffer.capacity() : used + " == " + buffer.capacity(); final int toBePut = Math.min(length, buffer.capacity() - used); buffer.clear(); buffer.position(end); if (end < start) buffer.put(a, offset, toBePut); else { final int firstTransfer = Math.min(toBePut, buffer.capacity() - end); buffer.put(a, offset, firstTransfer); if (firstTransfer < toBePut) { buffer.clear(); buffer.put(a, offset + firstTransfer, toBePut - firstTransfer); } } offset += toBePut; length -= toBePut; used += toBePut; end = end + toBePut & mask; } while(length != 0); buffer.clear(); } /** Enqueues a byte array to this queue. * * @param a the array whose bytes have to be enqueued. */ public void enqueue(final byte[] a) throws IOException { enqueue(a, 0, a.length); } private void dumpTail() throws IOException { resume(); if (DEBUG) System.err.println("Dumping at " + writePosition + ", start = " + start+ ", end = " + end + ", hole = " + hole); channel.position(writePosition); if (hole == -1) hole = (start + buffer.capacity() / 2) & mask; buffer.clear(); buffer.position(hole); if (hole < end) { buffer.limit(end); channel.write(buffer); used -= end - hole; } else { channel.write(buffer); used -= buffer.capacity() - hole; buffer.position(0); buffer.limit(end); channel.write(buffer); used -= end; } buffer.clear(); writePosition = channel.position(); end = hole; if (DEBUG) System.err.println("start = " + start+ ", end = " + end + ", hole = " + hole + ", readPosition = " + readPosition + ", writePosition = " + writePosition); } private void loadHead() throws IOException { assert readPosition != writePosition : readPosition + " == " + writePosition; // There are items on disk. We must load them. if (DEBUG) System.err.println("Reading at " + readPosition + "..."); // If there is not enough space, we dump the tail. It is large, anyway. if (used > buffer.capacity() / 2) dumpTail(); resume(); assert size64() > used : size64() + " <= " + used; final int toRead = (int)Math.min(writePosition - readPosition, buffer.capacity() / 2); start = hole - toRead & mask; channel.position(readPosition); buffer.clear(); buffer.position(start); if (start < hole) { buffer.limit(hole); channel.read(buffer); } else { channel.read(buffer); buffer.position(0); buffer.limit(hole); channel.read(buffer); } buffer.clear(); used += toRead; readPosition = channel.position(); if (readPosition == writePosition) { readPosition = writePosition = 0; hole = -1; } assert start != hole : start + " == " + hole; if (DEBUG) System.err.println("Now at " + readPosition); } /** Dequeues a byte from the queue. * * @return the first available byte in this queue. * @throws NoSuchElementException if there are no bytes in this queue. */ public byte dequeue() throws IOException { ensureNotClosed(); if (DEBUG) System.err.print("[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeue() = "); if (isEmpty()) throw new NoSuchElementException(); if (start == hole) loadHead(); final byte result = buffer.get(start++); start &= mask; used--; if (DEBUG) System.err.println(result); return result; } /** Dequeues a sequence of bytes from this queue into an array. * * @param a a byte array. * @param offset the first position in {@code a} where dequeued bytes will be written. * @param length the number of bytes to dequeue. * @throws NoSuchElementException if there are not enough bytes in this queue. */ public void dequeue(final byte[] a, int offset, int length) throws IOException { if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeue(" + offset + ", " + length + ")"); ensureNotClosed(); while(length != 0) { if (isEmpty()) throw new NoSuchElementException(); else if (start == hole) loadHead(); assert start != hole : start + " == " + hole; final int endOfTransfer = hole == -1 ? end : hole; final int toTransfer = Math.min(length, endOfTransfer == start ? used : endOfTransfer - start & mask); buffer.clear(); buffer.position(start); if (start < endOfTransfer) buffer.get(a, offset, toTransfer); else { final int firstTransfer = Math.min(toTransfer, buffer.capacity() - start); buffer.get(a, offset, firstTransfer); if (firstTransfer < toTransfer) { buffer.clear(); buffer.get(a, offset + firstTransfer, toTransfer - firstTransfer); } } offset += toTransfer; length -= toTransfer; used -= toTransfer; start = start + toTransfer & mask; } buffer.clear(); } /** Dequeues a sequence of bytes from this queue into an array. * * @param a the array that will contain the dequeued bytes. * @throws NoSuchElementException if there are not enough bytes in this queue. */ public void dequeue(final byte[] a) throws IOException { dequeue(a, 0, a.length); } /** Dequeues from this queue a vByte-coded natural number. * * @return the first available natural number in this queue. */ public int dequeueInt() throws IOException { if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + "] dequeueInt()"); for(int x = 0; ;) { final byte b = dequeue(); x |= b & 0x7F; if (b >= 0) return x; x <<= 7; } } @Deprecated @Override public int size() { throw new UnsupportedOperationException(); } @Override public long size64() { return writePosition - readPosition + used; } public boolean isEmpty() { return size64() == 0; } /** Deallocates all disk resources associated with this queue. After calling this method, calls * to any methods that would modify the queue will cause an {@link IllegalStateException}. */ @Override public void close() throws IOException { // If the queue is not suspended, we close the file handles. if (channel != null) { channel.close(); channel = null; } writePosition = readPosition = -1; used = 0; file.delete(); } public void freeze() throws IOException { if (DEBUG) System.err.println("[start = " + start + ", end = " + end + ", hole = " + hole + ", readPosition = " + readPosition + ", writePosition = " + writePosition + "] freeze()"); resume(); final File freezeFile = File.createTempFile(ByteDiskQueue.class.getSimpleName() + "-", ".freeze", file.getParentFile()); final FileOutputStream fos = new FileOutputStream(freezeFile); final FileChannel freezeChannel = fos.getChannel(); // Dump head buffer.clear(); buffer.position(start); if (hole == -1) { if (start < end) { buffer.limit(end); freezeChannel.write(buffer); } else if (used != 0) { freezeChannel.write(buffer); buffer.position(0); buffer.limit(end); freezeChannel.write(buffer); } } else { if (start < hole) { buffer.limit(hole); freezeChannel.write(buffer); } else if (start != hole){ freezeChannel.write(buffer); buffer.position(0); buffer.limit(hole); freezeChannel.write(buffer); } for(long position = readPosition; position < writePosition;) position += channel.transferTo(position, writePosition - position, freezeChannel); // Dump tail buffer.clear(); buffer.position(hole); if (hole < end) { buffer.limit(end); freezeChannel.write(buffer); } else if (hole != end) { freezeChannel.write(buffer); buffer.position(0); buffer.limit(end); freezeChannel.write(buffer); } } assert size64() == freezeChannel.size() : size64() + " != " + freezeChannel.size(); fos.close(); channel.close(); file.delete(); if (! freezeFile.renameTo(file)) throw new IOException("Cannot rename freeze file " + freezeFile + " to " + file); channel = null; writePosition = readPosition = -1; used = 0; } /** Clears the queue. Note that we do not modify the dump file (use {@link #trim()} to that purpose). */ public void clear() { ensureNotClosed(); writePosition = readPosition = start = end = used = 0; hole = -1; } /** Trims the queue dump file at the current write position. */ public void trim() throws IOException { ensureNotClosed(); resume(); channel.truncate(writePosition); } /** Suspends this queue, releasing the associated file handles. If the deque is already * suspended, does nothing. The queue will lazily resume its operations when necessary. */ public void suspend() throws IOException { ensureNotClosed(); if (channel == null) return; channel.close(); channel = null; } /** Resumes this queue, restoring the associated file handles. If the queue is already active, does nothing. * * @see #suspend() */ @SuppressWarnings("resource") private void resume() throws IOException { ensureNotClosed(); if (channel != null) return; channel = new RandomAccessFile(file, "rw").getChannel(); } @SuppressWarnings("deprecation") @Override protected void finalize() throws Throwable { try { if (channel != null || writePosition != -1) { System.err.println("WARNING: This " + this.getClass().getName() + " [" + toString() + "] should have been closed."); close(); } } finally { super.finalize(); } } /** Enlarge the size of the buffer of this queue to a given size. * * @param newBufferSize the required buffer size (will be possibly decreased so to be a power of two). * If the new buffer size is smaller than the current size, nothing happens. */ public void enlargeBuffer(int newBufferSize) { if (DEBUG) System.err.print("Enlarging to size " + newBufferSize + " [start = " + start+ ", end = " + end + ", hole = " + hole + "]... "); newBufferSize = Integer.highestOneBit(newBufferSize); if (newBufferSize <= buffer.capacity()) return; final ByteBuffer newByteBuffer = direct ? ByteBuffer.allocateDirect(newBufferSize) : ByteBuffer.allocate(newBufferSize); buffer.clear(); if (start < end) { buffer.position(start); buffer.limit(end); newByteBuffer.put(buffer); end -= start; if (hole >= 0) hole -= start; } else if (used != 0) { buffer.position(start); newByteBuffer.put(buffer); buffer.position(0); buffer.limit(end); newByteBuffer.put(buffer); end += buffer.capacity() - start; if (hole >= 0) if (hole >= start) hole -= start; else hole += buffer.capacity() - start; } else { // This covers the case start == end, used == 0 end = 0; if (hole > 0) hole = 0; } start = 0; buffer = newByteBuffer; mask = newBufferSize - 1; if (DEBUG) System.err.println("[start = " + start+ ", end = " + end + ", hole = " + hole + "]"); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy