All Downloads are FREE. Search and download functionalities are using the official Maven repository.

loci.common.RandomAccessInputStream Maven / Gradle / Ivy

The newest version!
/*
 * #%L
 * Common package for I/O and related utilities
 * %%
 * Copyright (C) 2005 - 2016 Open Microscopy Environment:
 *   - Board of Regents of the University of Wisconsin-Madison
 *   - Glencoe Software, Inc.
 *   - University of Dundee
 * %%
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * #L%
 */

package loci.common;

import java.io.Closeable;
import java.io.DataInput;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;

/**
 * Top-level class for reading from various data sources.
 *
 * @author Melissa Linkert melissa at glencoesoftware.com
 * @author Curtis Rueden ctrueden at wisc.edu
 */
public class RandomAccessInputStream extends InputStream implements DataInput, Closeable, KryoSerializable {

  // -- Constants --

  /** Maximum size of the buffer used by the DataInputStream. */
  protected static final int MAX_OVERHEAD = 1048576;

  /** Logger for this class. */
  private static final Logger LOGGER =
    LoggerFactory.getLogger(RandomAccessInputStream.class);

  /**
   * Block size to use when searching through the stream.
   */
  protected static final int DEFAULT_BLOCK_SIZE = 256 * 1024; // 256 KB

  /** Maximum number of bytes to search when searching through the stream. */
  protected static final int MAX_SEARCH_SIZE = 512 * 1024 * 1024; // 512 MB

  /** Various bitmasks for the 0000xxxx side of a byte. */
  private static final int[] BACK_MASK = {
    0x00, // 00000000
    0x01, // 00000001
    0x03, // 00000011
    0x07, // 00000111
    0x0F, // 00001111
    0x1F, // 00011111
    0x3F, // 00111111
    0x7F  // 01111111
  };

  /** Various bitmasks for the xxxx0000 side of a byte. */
  private static final int[] FRONT_MASK = {
    0x0000, // 00000000
    0x0080, // 10000000
    0x00C0, // 11000000
    0x00E0, // 11100000
    0x00F0, // 11110000
    0x00F8, // 11111000
    0x00FC, // 11111100
    0x00FE  // 11111110
  };

  // -- Fields --

  protected IRandomAccess raf;

  /** The file name. */
  protected String file;

  protected long length = -1;

  protected long markedPos = -1;

  protected String encoding = Constants.ENCODING;

  private int currentBit;

  // -- Constructors --

  /**
   * Constructs a hybrid RandomAccessFile/DataInputStream
   * around the given file.
   *
   * @param file a name that can be passed to {@link Location#getHandle(String)}
   * @throws IOException if the name is invalid
   */
  public RandomAccessInputStream(String file) throws IOException {
    this(Location.getHandle(file), file);
  }

  /**
   * Constructs a hybrid RandomAccessFile/DataInputStream
   * around the given file.
   *
   * @param file a name that can be passed to {@link Location#getHandle(String)}
   * @param bufferSize the size of the caching buffer in bytes
   * @throws IOException if the name is invalid
   */
  public RandomAccessInputStream(String file, int bufferSize) throws IOException
  {
    this(Location.getHandle(file, false, true, bufferSize), file);
  }

  /**
   * Constructs a random access stream around the given handle.
   *
   * @param handle the {@link IRandomAccess} to be wrapped
   * @throws IOException if the handle is invalid
   */
  public RandomAccessInputStream(IRandomAccess handle) throws IOException {
    this(handle, null);
  }

  /**
   * Constructs a random access stream around the given handle,
   * and with the associated file path.
   *
   * @param handle the {@link IRandomAccess} to be wrapped
   * @param file the name associated with the handle. Can be null.
   * @throws IOException if the handle is invalid
   */
  public RandomAccessInputStream(IRandomAccess handle, String file)
    throws IOException
  {
    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace("RandomAccessInputStream {} OPEN", hashCode());
    }
    raf = handle;
    raf.setOrder(ByteOrder.BIG_ENDIAN);
    this.file = file;
    seek(0);
    length = -1;
  }

  /**
   * Constructs a random access stream around the given byte array.
   *
   * @param array the byte array to be wrapped via {@link ByteArrayHandle}
   * @throws IOException if the {@link ByteArrayHandle} cannot be created
   */
  public RandomAccessInputStream(byte[] array) throws IOException {
    this(new ByteArrayHandle(array));
  }

  // -- RandomAccessInputStream API methods --

  /**
   * Sets the native encoding of the stream.
   *
   * @param encoding the name of a standard charset to use when
   *                 working with strings
   * @see loci.common.Constants#ENCODING
   * @see java.nio.charset.Charset
   */
  public void setEncoding(String encoding) {
    this.encoding = encoding;
  }

  /**
   * Seeks to the given offset within the stream.
   *
   * @param pos the new byte offset
   * @throws IOException if the seek fails
   */
  public void seek(long pos) throws IOException {
    raf.seek(pos);
  }

  /**
   * @return the number of bytes in the file.
   * @throws IOException if the length cannot be retrieved
   */
  public long length() throws IOException {
    return length < 0 ? raf.length() : length;
  }

  /**
   * Sets the length of the stream.
   * The new length must be less than the real length of the stream.
   * This allows us to work with a truncated view of a file, without modifying
   * the file itself.
   *
   * Passing in a negative value will reset the length to the stream's real length.
   *
   * @param newLength the new stream length as defined above
   * @throws IOException if the original stream length cannot be retrieved
   */
  public void setLength(long newLength) throws IOException {
    if (newLength < length()) {
      this.length = newLength;
    }
  }

  /**
   * @return the current (absolute) file pointer.
   * @throws IOException if the current pointer cannot be retrieved
   */
  public long getFilePointer() throws IOException {
    return raf.getFilePointer();
  }

  /** Closes the streams. */
  @Override
  public void close() throws IOException {
    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace("RandomAccessInputStream {} CLOSE", hashCode());
    }
    if (Location.getMappedFile(file) != null) return;
    if (raf != null) raf.close();
    raf = null;
    markedPos = -1;
  }

  /**
   * Sets the endianness of the stream.
   *
   * @param little true if the stream ordering should be little-endian
   */
  public void order(boolean little) {
    if (raf != null) {
      raf.setOrder(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
    }
  }

  /**
   * Gets the endianness of the stream.
   *
   * @return true if the stream ordering is little-endian
   */
  public boolean isLittleEndian() {
    return raf.getOrder() == ByteOrder.LITTLE_ENDIAN;
  }

  /**
   * Reads a string ending with one of the characters in the given string.
   *
   * @param lastChars each character is a possible terminator
   * @return The string from the initial position through the end of the
   *   terminating sequence, or through the end of the stream if no
   *   terminating sequence is found.
   * @throws IOException If the maximum search length (512 MB) is exceeded.
   * @see #findString(String...)
   */
  public String readString(String lastChars) throws IOException {
    if (lastChars.length() == 1) return findString(lastChars);
    String[] terminators = new String[lastChars.length()];
    for (int i=0; i MAX_SEARCH_SIZE;
    if (tooLong) maxLen = MAX_SEARCH_SIZE;
    boolean match = false;
    int maxTermLen = 0;
    for (String term : terminators) {
      int len = term.length();
      if (len > maxTermLen) maxTermLen = len;
    }

    InputStreamReader in = new InputStreamReader(this, encoding);
    char[] buf = new char[blockSize];
    long loc = 0;
    while (loc < maxLen && getFilePointer() < length() - 1) {
      // if we're not saving the string, drop any old, unnecessary output
      if (!saveString) {
        int outLen = out.length();
        if (outLen >= maxTermLen) {
          int dropIndex = outLen - maxTermLen + 1;
          String last = out.substring(dropIndex, outLen);
          out.setLength(0);
          out.append(last);
          bytesDropped += dropIndex;
        }
      }

      // read block from stream
      int r = in.read(buf, 0, blockSize);
      if (r <= 0) throw new IOException("Cannot read from stream: " + r);

      // append block to output
      out.append(buf, 0, r);

      // check output, returning smallest possible string
      int min = Integer.MAX_VALUE, tagLen = 0;
      for (String t : terminators) {
        int len = t.length();
        int start = (int) (loc - bytesDropped - len);
        int value = out.indexOf(t, start < 0 ? 0 : start);
        if (value >= 0 && value < min) {
          match = true;
          min = value;
          tagLen = len;
        }
      }

      if (match) {
        // reset stream to proper location
        seek(startPos + bytesDropped + min + tagLen);

        // trim output string
        if (saveString) {
          out.setLength(min + tagLen);
          return out.toString();
        }
        return null;
      }

      loc += r;
    }

    // no match
    if (tooLong) throw new IOException("Maximum search length reached.");
    return saveString ? out.toString() : null;
  }

  /**
   * Skips a number of bits in the BitBuffer.
   *
   * @param bits Number of bits to skip
   * @throws IllegalArgumentException if bits is negative
   * @throws IOException if an error occurs while skipping
   */
  public void skipBits(long bits) throws IOException {
    if (bits < 0) {
      throw new IllegalArgumentException("Bits to skip cannot be negative");
    }

    bits += currentBit;
    final long bytesToSkip = bits / 8;
    currentBit = (int) (bits % 8);
    if (bytesToSkip > 0) {
      skipBytes(bytesToSkip);
    }
  }

  /**
   * Returns an int value representing the value of the bits read from
   * the byte array, from the current position. Bits are extracted from the
   * "left side" or high side of the byte.

* The current position is modified by this call.

* Bits are pushed into the int from the right, endianness is not * considered by the method on its own. So, if 5 bits were read from the * buffer "10101", the int would be the integer representation of * 000...0010101 on the target machine.

* In general, this also means the result will be positive unless a full * 32 bits are read.

* Requesting more than 32 bits is allowed, but only up to 32 bits worth of * data will be returned (the last 32 bits read).

* * @param bitsToRead the number of bits to read from the bit buffer * @return the value of the bits read * @throws IllegalArgumentException if bits is negative * @throws IOException if an error occurs while skipping */ public int readBits(int bitsToRead) throws IOException { if (bitsToRead < 0) { throw new IllegalArgumentException("Bits to read cannot be negative"); } if (bitsToRead == 0) { return 0; } int toStore = 0; while (bitsToRead != 0 && getFilePointer() < length()) { if (currentBit < 0 || currentBit > 7) { throw new IllegalArgumentException("byte=" + getFilePointer() + ", bit=" + currentBit); } int bitsLeft = 8 - currentBit; if (bitsToRead >= bitsLeft) { toStore <<= bitsLeft; bitsToRead -= bitsLeft; int cb = readByte(); if (currentBit == 0) { // we can read in a whole byte, so we'll do that. toStore += cb & 0xff; } else { // otherwise, only read the appropriate number of bits off the back // side of the byte, in order to "finish" the current byte in the // buffer. toStore += cb & BACK_MASK[bitsLeft]; currentBit = 0; } } else { // We will be able to finish using the current byte. // read the appropriate number of bits off the front side of the byte, // then push them into the int. toStore = toStore << bitsToRead; int cb = readByte() & 0xff; seek(getFilePointer() - 1); toStore += (cb & (0x00FF - FRONT_MASK[currentBit])) >> (bitsLeft - bitsToRead); currentBit += bitsToRead; bitsToRead = 0; } } return toStore; } /** * Checks if the current position is on a byte boundary, that is the next * bit in the byte array is the first bit in a byte. * * @return true if bit is on byte boundary, false otherwise. */ public boolean isBitOnByteBoundary() { return currentBit % 8 == 0; } // -- DataInput API methods -- /** Read an input byte and return true if the byte is nonzero. */ @Override public boolean readBoolean() throws IOException { return raf.readBoolean(); } /** Read one byte and return it. */ @Override public byte readByte() throws IOException { return raf.readByte(); } /** Read an input char. */ @Override public char readChar() throws IOException { return raf.readChar(); } /** Read eight bytes and return a double value. */ @Override public double readDouble() throws IOException { return raf.readDouble(); } /** Read four bytes and return a float value. */ @Override public float readFloat() throws IOException { return raf.readFloat(); } /** Read four input bytes and return an int value. */ @Override public int readInt() throws IOException { return raf.readInt(); } /** * Read four input bytes and return an unsigned value. * * @return the next 4 bytes in the stream as a long * @throws IOException if there is an error during reading */ public long readUnsignedInt() throws IOException { return readInt() & 0xffffffffL; } /** Read the next line of text from the input stream. */ @Override public String readLine() throws IOException { String line = findString("\n"); return line.length() == 0 ? null : line; } /** * Read a string of arbitrary length, terminated by a null char. * * @return the shortest null-terminated string from the current pointer * @throws IOException if there is an error during reading */ public String readCString() throws IOException { String line = findString("\0"); return line.length() == 0 ? null : line; } /** * Reads a byte array of the given length byte by byte. Returns a string * using the set encoding. * * @param n The length of the array. * @return See above * @throws IOException Thrown if an error occurred while reading the data. * @see #setEncoding(String) */ public String readByteToString(int n) throws IOException { n = (int) Math.min(available(), n); byte[] bytes = new byte[n]; readFully(bytes); StringBuffer newString = new StringBuffer(); for (byte b : bytes) { int v = b & 0xff; if (v > 0x7f) { newString.append(Character.toChars(v)); } else { newString.append((char) b); } } String s = newString.toString(); return new String(s.getBytes(encoding), encoding); } /** * Read a string of up to length n. * * @param n the number of bytes to read * @return a string representing the read bytes, using the default encoding * @throws IOException if an error occurred during reading * @see #setEncoding(String) */ public String readString(int n) throws IOException { int avail = available(); if (n > avail) n = avail; byte[] b = new byte[n]; readFully(b); return new String(b, encoding); } /** Read eight input bytes and return a long value. */ @Override public long readLong() throws IOException { return raf.readLong(); } /** Read two input bytes and return a short value. */ @Override public short readShort() throws IOException { return raf.readShort(); } /** Read an input byte and zero extend it appropriately. */ @Override public int readUnsignedByte() throws IOException { return raf.readUnsignedByte(); } /** Read two bytes and return an int in the range 0 through 65535. */ @Override public int readUnsignedShort() throws IOException { return raf.readUnsignedShort(); } /** Read a string that has been encoded using a modified UTF-8 format. */ @Override public String readUTF() throws IOException { return raf.readUTF(); } /** Skip n bytes within the stream. */ @Override public int skipBytes(int n) throws IOException { return raf.skipBytes(n); } /** Skip n bytes within the stream. */ public long skipBytes(long n) throws IOException { return raf.skipBytes(n); } /** Read bytes from the stream into the given array. */ @Override public int read(byte[] array) throws IOException { int rtn = raf.read(array); if (rtn == 0 && raf.getFilePointer() >= raf.length() - 1) rtn = -1; return rtn; } /** * Read n bytes from the stream into the given array at the specified offset. */ @Override public int read(byte[] array, int offset, int n) throws IOException { int rtn = raf.read(array, offset, n); if (rtn == 0 && raf.getFilePointer() >= raf.length() - 1) rtn = -1; return rtn; } /** * Read bytes from the stream into the given buffer. * * @param buf the {@link ByteBuffer} to fill. buf.capacity() * determines the number of bytes to read * @return the number of bytes read * @throws IOException if an error occurred during reading */ public int read(ByteBuffer buf) throws IOException { return raf.read(buf); } /** * Read n bytes from the stream into the given buffer at the specified offset. * * @param buf the {@link ByteBuffer} to fill * @param offset the offset to the first byte in the buffer * @param n the number of bytes to read * @return the number of bytes actually read * @throws IOException if an error occurred during reading */ public int read(ByteBuffer buf, int offset, int n) throws IOException { return raf.read(buf, offset, n); } /** Read bytes from the stream into the given array. */ @Override public void readFully(byte[] array) throws IOException { raf.readFully(array); } /** * Read n bytes from the stream into the given array at the specified offset. */ @Override public void readFully(byte[] array, int offset, int n) throws IOException { raf.readFully(array, offset, n); } // -- InputStream API methods -- @Override public int read() throws IOException { int b = readByte(); if (b == -1 && (getFilePointer() >= length())) return 0; return b; } @Override public int available() throws IOException { long remain = length() - getFilePointer(); if (remain > Integer.MAX_VALUE) remain = Integer.MAX_VALUE; return (int) remain; } @Override public void mark(int readLimit) { try { markedPos = getFilePointer(); } catch (IOException exc) { LOGGER.warn("Cannot set mark", exc); } } @Override public boolean markSupported() { return true; } @Override public void reset() throws IOException { if (markedPos < 0) throw new IOException("No mark set"); seek(markedPos); } // -- Externalizable API methods -- @Override public void read(Kryo kryo, Input in) { raf = (IRandomAccess) kryo.readClassAndObject(in); file = kryo.readObjectOrNull(in, String.class); if (file != null) { try { raf = Location.getHandle(file); } catch (IOException e) { LOGGER.warn("Failed to reopen file", e); } } length = kryo.readObject(in, Long.class); markedPos = kryo.readObject(in, Long.class); encoding = kryo.readObject(in, String.class); } @Override public void write(Kryo kryo, Output out) { kryo.writeClassAndObject(out, raf); kryo.writeObjectOrNull(out, file, String.class); kryo.writeObject(out, length); kryo.writeObject(out, markedPos); kryo.writeObject(out, encoding); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy