it.unimi.dsi.io.SegmentedInputStream Maven / Gradle / Ivy
Show all versions of dsi-utils Show documentation
package it.unimi.dsi.io;
/*
* DSI utilities
*
* Copyright (C) 2006-2009 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.MeasurableInputStream;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.NoSuchElementException;
import org.apache.log4j.Logger;
/** Exhibits a single {@link InputStream} as a number of streams divided into {@link java.io.InputStream#reset() reset()}-separated
* segments.
*
* An instance of this class wraps a given input stream (usually a replicable one, such as
* a {@link java.io.FileInputStream}) and exposes its contents as a number of separated input
* streams. Each such stream, called a block, is defined by a start and a stop position (gaps
* between blocks are possible). Inside each block we can have one or more segments: each
* segment is again a separate input stream, but calling {@link SegmentedInputStream#reset()} moves
* from one segment to the following one, whereas calling {@link SegmentedInputStream#close()}
* moves from a block to the following one.
*
*
An instance of this class is enriched with blocks by calling {@link #addBlock(long[])}. This
* can also be done on-the-fly, while the underlying input stream is being scanned.
*
* @author Alessio Orlandi
* @author Luca Natali
* @author Sebastiano Vigna
*/
public class SegmentedInputStream extends MeasurableInputStream {
private static final boolean DEBUG = false;
private static final Logger LOGGER = Util.getLogger( SegmentedInputStream.class );
/** Underlying input stream. */
private InputStream in;
/** Relative position within the current segment. */
private int relativePos;
/** Byte length of the current segment. */
private int segmentLen;
/** List of known blocks. */
private ObjectArrayList blocks;
/** The start marker of the current segment. */
private long currentStartMarker;
/** The stop marker of the current segment. */
private long currentStopMarker;
/** Index in {@link #blocks}, -1 when no blocks are in. */
private int currentBlock;
/** Whether we actually closed the whole thing. */
private boolean closed;
/** A block. */
private static class SegmentBlock {
/** Segments delimiters, strictly increasing. */
final long[] delimiter;
/** The segment we're currently reading. */
int currSegment;
/** Creates a new block with given delimiters.
*
* @param delimiter a list of segment delimiters.
* @throws IllegalArgumentException if the elements of delimiter
are negative or not increasing.
*/
public SegmentBlock( long... delimiter ) throws IllegalArgumentException {
if ( delimiter.length == 0 ) throw new IllegalArgumentException();
for ( int i = 0; i < delimiter.length - 1; i++ )
if ( delimiter[ i ] > delimiter[ i + 1 ] ) throw new IllegalArgumentException( "Segment " + ( i + 1 ) + " is inconsistent as it starts after the next one: " + Arrays.toString( delimiter ) );
this.delimiter = delimiter;
this.currSegment = -1;
}
public String toString() {
return "[segments=" + Arrays.toString( delimiter ) + ", curr= " + currSegment + "]";
}
/**
* Skips to the next segment. Now {@link #currentStartMarker()} and {@link #currentStopMarker()} can be used.
*/
public void nextSegment() {
if ( ! hasMoreSegments() ) throw new NoSuchElementException();
currSegment++;
}
public boolean hasMoreSegments() {
return currSegment < delimiter.length - 2;
}
/** Start marker of the current segment (block start if the first segment is selected). */
public long currentStartMarker() {
return delimiter[ currSegment ];
}
/** Stop marker of the current segment (block stop if the last segment is selected) */
public long currentStopMarker() {
return delimiter[ currSegment + 1 ];
}
}
private void ensureBlocksNotEmpty() {
if ( blocks.isEmpty() ) throw new IllegalStateException( "You must add at least one block before reading or closing a segmented stream" );
}
private void ensureNotClosed() {
if ( closed ) throw new IllegalStateException( "This segmented input stream has been closed" );
}
/** Creates a segmented input stream with no markers.
*
* @param in the underlying input stream.
*/
public SegmentedInputStream( final InputStream in ) {
if ( in == null ) throw new NullPointerException();
this.in = in;
this.blocks = new ObjectArrayList();
this.currentBlock = -1;
}
/** Creats a stream with one marker in.
*
* @param in the underlying input stream.
* @param delimiter an array of segment delimiters.
*/
public SegmentedInputStream( final InputStream in, final long... delimiter ) throws NullPointerException, IOException, IllegalStateException {
this( in );
addBlock( delimiter );
}
/** Checks if the current position is a stop marker.
*
* @return false if a skip has to be done or eof has been reached, true otherwise.
*/
private boolean eofInBlock() {
ensureBlocksNotEmpty();
ensureNotClosed();
return relativePos >= segmentLen;
}
/** Skips the underlying input stream to the next segment. */
private void nextSegment() throws IOException {
ensureNotClosed();
final SegmentBlock block = blocks.get( currentBlock );
if ( ! block.hasMoreSegments() ) return;
block.nextSegment();
long absPos = currentStartMarker + relativePos;
currentStartMarker = block.currentStartMarker();
currentStopMarker = block.currentStopMarker();
if ( currentStartMarker - absPos > 0 ) {
long diff = in.skip( currentStartMarker - absPos );
if ( diff != currentStartMarker - absPos ) throw new IllegalStateException( "Should have skipped " + ( currentStartMarker - absPos ) + " bytes, got " + diff );
}
relativePos = 0;
segmentLen = (int)( currentStopMarker - currentStartMarker );
if ( DEBUG ) LOGGER.debug( "New segment for block # " + currentBlock );
}
/** Skips to the first segment of the next block, if any. In such case, it returns true, or false
* otherwise.
*/
public void nextBlock() throws IOException {
if ( ! hasMoreBlocks() ) throw new NoSuchElementException();
currentBlock++;
if ( DEBUG ) LOGGER.debug( "Moving to block # " + currentBlock );
nextSegment();
}
/** Checks whether there are more blocks.
*
* @return true if we there are more blocks.
*/
public boolean hasMoreBlocks() {
return currentBlock < blocks.size() - 1;
}
/** Adds a new block defined by its array of segment delimiters.
*
* The block has length defined by the difference between the last and first
* delimiter.
*
*
This method performs the initial call to {@link #nextBlock()} when the first marker
* is put in.
*
* @param delimiter a list of segment delimiters.
* @throws IllegalArgumentException if the elements of delimiter
are negative or not increasing.
*/
public void addBlock( final long... delimiter ) throws IllegalArgumentException, IOException {
ensureNotClosed();
blocks.add( new SegmentBlock( delimiter ) );
if ( DEBUG ) LOGGER.debug( "Adding a new block with delimiters " + Arrays.toString( delimiter ) );
if ( currentBlock == -1 ) nextBlock();
}
public int read() throws IOException {
ensureNotClosed();
if ( eofInBlock() ) return -1;
final int r = in.read();
relativePos++;
return r;
}
public int read( final byte b[], final int off, final int len ) throws IOException {
ensureNotClosed();
ByteArrays.ensureOffsetLength( b, off, len );
if ( len == 0 ) return 0; // Requested by InputStream.
if ( eofInBlock() ) return -1;
int effectivelen = Math.min( segmentLen - relativePos, len );
effectivelen = in.read( b, off, effectivelen );
relativePos += effectivelen;
return effectivelen;
}
public long skip( final long n ) throws IOException {
ensureNotClosed();
if ( eofInBlock() ) return 0;
long effectiveskip = Math.max( Math.min( segmentLen - relativePos, n ), 0 );
effectiveskip = in.skip( effectiveskip );
relativePos += effectiveskip;
return effectiveskip;
}
public int available() throws IOException {
ensureNotClosed();
if ( eofInBlock() ) return 0;
return Math.min( in.available(), segmentLen - relativePos );
}
@Override
public long length() throws IOException {
ensureNotClosed();
return segmentLen;
}
@Override
public long position() throws IOException {
ensureNotClosed();
return relativePos;
}
/** Skips to the next block, closing this segmented input stream if there are no more blocks. */
public void close() throws IOException {
ensureBlocksNotEmpty();
if ( closed ) return;
if ( hasMoreBlocks() ) {
nextBlock();
return;
}
if ( DEBUG ) LOGGER.debug( "Closing the underlying input stream of this segmented input stream" );
closed = true;
in.close();
}
/** Moves into the next segment of the current block. */
public void reset() throws IOException {
ensureNotClosed();
nextSegment();
}
}