org.scijava.io.handle.DataHandle Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of scijava-common Show documentation
SciJava Common is a shared library for SciJava software. It provides a plugin framework, with an extensible mechanism for service discovery, backed by its own annotation processor, so that plugins can be loaded dynamically. It is used by downstream projects in the SciJava ecosystem, such as ImageJ and SCIFIO.
There is a newer version: 2.99.0
Show newest version
/*
 * #%L
 * SciJava Common shared library for SciJava software.
 * %%
 * Copyright (C) 2009 - 2017 Board of Regents of the University of
 * Wisconsin-Madison, Broad Institute of MIT and Harvard, Max Planck
 * Institute of Molecular Cell Biology and Genetics, University of
 * Konstanz, and KNIME GmbH.
 * %%
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * #L%
 */

package org.scijava.io.handle;

import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.scijava.io.location.Location;
import org.scijava.plugin.WrapperPlugin;

/**
 * A data handle is a plugin which provides both streaming and random
 * access to bytes at a {@link Location} (e.g., files or arrays).
 * 
 * @author Curtis Rueden
 * @see DataHandleInputStream
 * @see DataHandleOutputStream
 */
public interface DataHandle extends WrapperPlugin,
	DataInput, DataOutput, Closeable
{

	public enum ByteOrder {
		LITTLE_ENDIAN, BIG_ENDIAN
	}

	/** Default block size to use when searching through the stream. */
	int DEFAULT_BLOCK_SIZE = 256 * 1024; // 256 KB

	/** Default bound on bytes to search when searching through the stream. */
	int MAX_SEARCH_SIZE = 512 * 1024 * 1024; // 512 MB

	/** Gets whether reading from this handle is supported. */
	boolean isReadable();

	/** Gets whether writing to this handle is supported. */
	boolean isWritable();

	/**
	 * Tests whether this handle's location actually exists at the source.
	 * 
	 * @return True if the location exists; false if not.
	 * @throws IOException If something goes wrong with the existence check.
	 */
	boolean exists() throws IOException;

	/**
	 * Gets the last modified timestamp of the location.
	 * 
	 * @return The last modified timestamp, or null if the handle does not support
	 *         this feature or if the location does not exist.
	 * @throws IOException If something goes wrong with the last modified check.
	 */
	default Date lastModified() throws IOException {
		return null;
	}

	/**
	 * Gets a "fast" checksum which succinctly represents the contents of the data
	 * stream. The term "fast" here refers to the idea that the checksum be
	 * retrievable quickly, without actually performing a thorough computation
	 * across the entire data stream. Typically, such a thing is feasible because
	 * the checksum was calculated a priori; e.g., artifacts deployed to remote
	 * Maven repositories are always deployed with corresponding checksum files.
	 * 
	 * No guarantee is made about the exact nature of the checksum (e.g., SHA-1 or
	 * MD5), only that the value is deterministic for this particular location
	 * with its current contents. In other words: if a checksum differs from a
	 * previous inquiry, you can be sure the contents have changed; conversely, if
	 * the checksum is still the same, the contents are highly likely to be
	 * unchanged.
	 * 
	 * 
	 * @return The checksum, or null if the handle does not support this feature.
	 * @throws IOException If something goes wrong when accessing the checksum.
	 */
	default String checksum() throws IOException {
		return null;
	}

	/** Returns the current offset in the stream. */
	long offset() throws IOException;

	/**
	 * Sets the stream offset, measured from the beginning of the stream, at which
	 * the next read or write occurs.
	 */
	void seek(long pos) throws IOException;

	/**
	 * Returns the length of the data in bytes.
	 * 
	 * @return The length, or -1 if the length is unknown.
	 */
	long length() throws IOException;

	/**
	 * Sets the new length of the handle.
	 * 
	 * @param length New length.
	 * @throws IOException If there is an error changing the handle's length.
	 */
	void setLength(long length) throws IOException;

	/**
	 * Gets the number of bytes which can be read from, or written to, the
	 * data handle, bounded by the specified number of bytes.
	 * 
	 * In the case of reading, attempting to read the returned number of bytes is
	 * guaranteed not to throw {@link EOFException}. However, be aware that the
	 * following methods might still process fewer bytes than indicated
	 * by this method:
	 * 
	 * 
	 * {@link #read(byte[])}
	 * {@link #read(byte[], int, int)}
	 * {@link #skip(long)}
	 * {@link #skipBytes(int)}
	 * 
	 * 
	 * In the case of writing, attempting to write the returned number of bytes is
	 * guaranteed not to expand the length of the handle; i.e., the write will
	 * only overwrite bytes already within the handle's bounds.
	 * 
	 *
	 * @param count Desired number of bytes to read/write.
	 * @return The actual number of bytes which could be read/written,
	 *         which might be less than the requested value.
	 * @throws IOException If something goes wrong with the check.
	 */
	default long available(final long count) throws IOException {
		final long remain = length() - offset();
		return remain < count ? remain : count;
	}

	/**
	 * Ensures that the handle has sufficient bytes available to read.
	 * 
	 * @param count Number of bytes to read.
	 * @see #available(long)
	 * @throws EOFException If there are insufficient bytes available.
	 * @throws IOException If the handle is write-only, or something goes wrong
	 *           with the check.
	 */
	default void ensureReadable(final long count) throws IOException {
		if (!isReadable()) throw new IOException("This handle is write-only.");
		if (available(count) < count) throw new EOFException();
	}

	/**
	 * Ensures that the handle has the correct length to be written to, and
	 * extends it as required.
	 * 
	 * @param count Number of bytes to write.
	 * @return {@code true} if the handle's length was sufficient, or
	 *         {@code false} if the handle's length required an extension.
	 * @throws IOException If the handle is read-only, or something goes wrong
	 *           with the check, or there is an error changing the handle's
	 *           length.
	 */
	default boolean ensureWritable(final long count) throws IOException {
		if (!isWritable()) throw new IOException("This handle is read-only.");
		final long minLength = offset() + count;
		if (length() < minLength) {
			setLength(minLength);
			return false;
		}
		return true;
	}

	/** Returns the byte order of the stream. */
	ByteOrder getOrder();

	/**
	 * Sets the byte order of the stream.
	 *
	 * @param order Order to set.
	 */
	void setOrder(ByteOrder order);

	/**
	 * Returns true iff the stream's order is {@link ByteOrder#BIG_ENDIAN}.
	 *
	 * @see #getOrder()
	 */
	default boolean isBigEndian() {
		return getOrder() == ByteOrder.BIG_ENDIAN;
	}

	/**
	 * Returns true iff the stream's order is {@link ByteOrder#LITTLE_ENDIAN}.
	 *
	 * @see #getOrder()
	 */
	default boolean isLittleEndian() {
		return getOrder() == ByteOrder.LITTLE_ENDIAN;
	}

	/**
	 * Sets the endianness of the stream.
	 *
	 * @param little If true, sets the order to {@link ByteOrder#LITTLE_ENDIAN};
	 *          otherwise, sets the order to {@link ByteOrder#BIG_ENDIAN}.
	 * @see #setOrder(ByteOrder)
	 */
	default void setLittleEndian(final boolean little) {
		setOrder(little ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
	}

	/** Gets the native encoding of the stream. */
	String getEncoding();

	/** Sets the native encoding of the stream. */
	void setEncoding(String encoding);

	/** Reads a string of arbitrary length, terminated by a null char. */
	default String readCString() throws IOException {
		final String line = findString("\0");
		return line.length() == 0 ? null : line;
	}

	/** Reads a string of up to length n. */
	default String readString(final int n) throws IOException {
		final int r = (int) available(n);
		final byte[] b = new byte[r];
		readFully(b);
		return new String(b, getEncoding());
	}

	/**
	 * Reads a string ending with one of the characters in the given string.
	 * 
	 * @see #findString(String...)
	 */
	default String readString(final String lastChars) throws IOException {
		if (lastChars.length() == 1) return findString(lastChars);
		final String[] terminators = new String[lastChars.length()];
		for (int i = 0; i < terminators.length; i++) {
			terminators[i] = lastChars.substring(i, i + 1);
		}
		return findString(terminators);
	}

	/**
	 * Reads a string ending with one of the given terminating substrings.
	 * 
	 * @param terminators The strings for which to search.
	 * @return The string from the initial position through the end of the
	 *         terminating sequence, or through the end of the stream if no
	 *         terminating sequence is found.
	 */
	default String findString(final String... terminators) throws IOException {
		return findString(true, DEFAULT_BLOCK_SIZE, terminators);
	}

	/**
	 * Reads or skips a string ending with one of the given terminating
	 * substrings.
	 *
	 * @param saveString Whether to collect the string from the current offset to
	 *          the terminating bytes, and return it. If false, returns null.
	 * @param terminators The strings for which to search.
	 * @throws IOException If saveString flag is set and the maximum search length
	 *           (512 MB) is exceeded.
	 * @return The string from the initial position through the end of the
	 *         terminating sequence, or through the end of the stream if no
	 *         terminating sequence is found, or null if saveString flag is unset.
	 */
	default String findString(final boolean saveString,
		final String... terminators) throws IOException
	{
		return findString(saveString, DEFAULT_BLOCK_SIZE, terminators);
	}

	/**
	 * Reads a string ending with one of the given terminating substrings, using
	 * the specified block size for buffering.
	 *
	 * @param blockSize The block size to use when reading bytes in chunks.
	 * @param terminators The strings for which to search.
	 * @return The string from the initial position through the end of the
	 *         terminating sequence, or through the end of the stream if no
	 *         terminating sequence is found.
	 */
	default String findString(final int blockSize, final String... terminators)
		throws IOException
	{
		return findString(true, blockSize, terminators);
	}

	/**
	 * Reads or skips a string ending with one of the given terminating
	 * substrings, using the specified block size for buffering.
	 *
	 * @param saveString Whether to collect the string from the current offset to
	 *          the terminating bytes, and return it. If false, returns null.
	 * @param blockSize The block size to use when reading bytes in chunks.
	 * @param terminators The strings for which to search.
	 * @throws IOException If saveString flag is set and the maximum search length
	 *           (512 MB) is exceeded.
	 * @return The string from the initial position through the end of the
	 *         terminating sequence, or through the end of the stream if no
	 *         terminating sequence is found, or null if saveString flag is unset.
	 */
	default String findString(final boolean saveString, final int blockSize,
		final String... terminators) throws IOException
	{
		final StringBuilder out = new StringBuilder();
		final long startPos = offset();
		long bytesDropped = 0;
		final long inputLen = length();
		long maxLen = inputLen - startPos;
		final boolean tooLong = saveString && maxLen > MAX_SEARCH_SIZE;
		if (tooLong) maxLen = MAX_SEARCH_SIZE;
		boolean match = false;
		int maxTermLen = 0;
		for (final String term : terminators) {
			final int len = term.length();
			if (len > maxTermLen) maxTermLen = len;
		}

		@SuppressWarnings("resource")
		final InputStreamReader in = new InputStreamReader(
			new DataHandleInputStream<>(this), getEncoding());
		final char[] buf = new char[blockSize];
		long loc = 0;
		while (loc < maxLen && offset() < length() - 1) {
			// if we're not saving the string, drop any old, unnecessary output
			if (!saveString) {
				final int outLen = out.length();
				if (outLen >= maxTermLen) {
					final int dropIndex = outLen - maxTermLen + 1;
					final String last = out.substring(dropIndex, outLen);
					out.setLength(0);
					out.append(last);
					bytesDropped += dropIndex;
				}
			}

			// read block from stream
			final int r = in.read(buf, 0, blockSize);
			if (r <= 0) throw new IOException("Cannot read from stream: " + r);

			// append block to output
			out.append(buf, 0, r);

			// check output, returning smallest possible string
			int min = Integer.MAX_VALUE, tagLen = 0;
			for (final String t : terminators) {
				final int len = t.length();
				final int start = (int) (loc - bytesDropped - len);
				final int value = out.indexOf(t, start < 0 ? 0 : start);
				if (value >= 0 && value < min) {
					match = true;
					min = value;
					tagLen = len;
				}
			}

			if (match) {
				// reset stream to proper location
				seek(startPos + bytesDropped + min + tagLen);

				// trim output string
				if (saveString) {
					out.setLength(min + tagLen);
					return out.toString();
				}
				return null;
			}

			loc += r;
		}

		// no match
		if (tooLong) throw new IOException("Maximum search length reached.");
		return saveString ? out.toString() : null;
	}

	/**
	 * Writes the provided string, followed by a newline character.
	 *
	 * @param string The string to write.
	 * @throws IOException If an I/O error occurs.
	 */
	default void writeLine(final String string) throws IOException {
		writeBytes(string);
		writeBytes("\n");
	}

	// -- InputStream look-alikes --

	/**
	 * Reads the next byte of data from the stream.
	 *
	 * @return the next byte of data, or -1 if the end of the stream is reached.
	 * @throws IOException - if an I/O error occurs.
	 */
	default int read() throws IOException {
		return offset() < length() ? readByte() & 0xff : -1;
	}

	/**
	 * Reads up to b.length bytes of data from the stream into an array of bytes.
	 *
	 * @return the total number of bytes read into the buffer.
	 */
	default int read(final byte[] b) throws IOException {
		return read(b, 0, b.length);
	}

	/**
	 * Reads up to {@code len} bytes of data from the stream into an array of
	 * bytes.
	 *
	 * @return the total number of bytes read into the buffer.
	 */
	int read(byte[] b, int off, int len) throws IOException;

	/**
	 * Skips over and discards {@code n} bytes of data from the stream. The
	 * {@code skip} method may, for a variety of reasons, end up skipping over
	 * some smaller number of bytes, possibly {@code 0}. This may result from any
	 * of a number of conditions; reaching end of file before {@code n} bytes have
	 * been skipped is only one possibility. The actual number of bytes skipped is
	 * returned. If {@code n} is negative, no bytes are skipped.
	 *
	 * @param n - the number of bytes to be skipped.
	 * @return the actual number of bytes skipped.
	 * @throws IOException - if an I/O error occurs.
	 */
	default long skip(final long n) throws IOException {
		final long skip = available(n);
		if (skip <= 0) return 0;
		seek(offset() + skip);
		return skip;
	}

	// -- DataInput methods --

	@Override
	default void readFully(final byte[] b) throws IOException {
		readFully(b, 0, b.length);
	}

	@Override
	default void readFully(final byte[] b, final int off, final int len)
		throws IOException
	{
		// NB: Adapted from java.io.DataInputStream.readFully(byte[], int, int).
		if (len < 0) throw new IndexOutOfBoundsException();
		int n = 0;
		while (n < len) {
			final int count = read(b, off + n, len - n);
			if (count < 0) throw new EOFException();
			n += count;
		}
	}

	@Override
	default int skipBytes(final int n) throws IOException {
		// NB: Cast here is safe since the value of n bounds the result to an int.
		final int skip = (int) available(n);
		if (skip < 0) return 0;
		seek(offset() + skip);
		return skip;
	}

	@Override
	default boolean readBoolean() throws IOException {
		return readByte() != 0;
	}

	@Override
	default int readUnsignedByte() throws IOException {
		return readByte() & 0xff;
	}

	@Override
	default short readShort() throws IOException {
		final int ch0;
		final int ch1;
		if (isBigEndian()) {
			ch0 = read();
			ch1 = read();
		}
		else {
			ch1 = read();
			ch0 = read();
		}
		if ((ch0 | ch1) < 0) throw new EOFException();
		return (short) ((ch0 << 8) + (ch1 << 0));
	}

	@Override
	default int readUnsignedShort() throws IOException {
		return readShort() & 0xffff;
	}

	@Override
	default char readChar() throws IOException {
		return (char) readShort();
	}

	@Override
	default int readInt() throws IOException {
		final int ch0;
		final int ch1;
		final int ch2;
		final int ch3;
		if (isBigEndian()) {
			ch0 = read();
			ch1 = read();
			ch2 = read();
			ch3 = read();
		}
		else {
			ch3 = read();
			ch2 = read();
			ch1 = read();
			ch0 = read();
		}
		if ((ch0 | ch1 | ch2 | ch3) < 0) throw new EOFException();
		return ((ch0 << 24) + (ch1 << 16) + (ch2 << 8) + (ch3 << 0));
	}

	@Override
	default long readLong() throws IOException {
		final int ch0;
		final int ch1;
		final int ch2;
		final int ch3;
		final int ch4;
		final int ch5;
		final int ch6;
		final int ch7;
		if (isBigEndian()) {
			ch0 = read();
			ch1 = read();
			ch2 = read();
			ch3 = read();
			ch4 = read();
			ch5 = read();
			ch6 = read();
			ch7 = read();
		}
		else {
			ch7 = read();
			ch6 = read();
			ch5 = read();
			ch4 = read();
			ch3 = read();
			ch2 = read();
			ch1 = read();
			ch0 = read();
		}
		if ((ch0 | ch1 | ch2 | ch3 | ch4 | ch5 | ch6 | ch7) < 0) {
			throw new EOFException();
		}
		// TODO: Double check this inconsistent code.
		return ((long) ch0 << 56) + //
			((long) (ch1 & 255) << 48) + //
			((long) (ch2 & 255) << 40) + //
			((long) (ch3 & 255) << 32) + //
			((long) (ch4 & 255) << 24) + //
			((ch5 & 255) << 16) + //
			((ch6 & 255) << 8) + //
			((ch7 & 255) << 0);
	}

	@Override
	default float readFloat() throws IOException {
		return Float.intBitsToFloat(readInt());
	}

	@Override
	default double readDouble() throws IOException {
		return Double.longBitsToDouble(readLong());
	}

	@Override
	default String readLine() throws IOException {
		// NB: Adapted from java.io.RandomAccessFile.readLine().

		final StringBuffer input = new StringBuffer();
		int c = -1;
		boolean eol = false;

		while (!eol) {
			switch (c = read()) {
				case -1:
				case '\n':
					eol = true;
					break;
				case '\r':
					eol = true;
					final long cur = offset();
					if (read() != '\n') seek(cur);
					break;
				default:
					input.append((char) c);
					break;
			}
		}

		if (c == -1 && input.length() == 0) {
			return null;
		}
		return input.toString();
	}

	@Override
	default String readUTF() throws IOException {
		return DataInputStream.readUTF(this);
	}

	// -- DataOutput methods --

	@Override
	default void write(final byte[] b) throws IOException {
		write(b, 0, b.length);
	}

	@Override
	default void writeBoolean(final boolean v) throws IOException {
		write(v ? 1 : 0);
	}

	@Override
	default void writeByte(final int v) throws IOException {
		write(v);
	}

	@Override
	default void writeShort(final int v) throws IOException {
		if (isBigEndian()) {
			write((v >>> 8) & 0xFF);
			write((v >>> 0) & 0xFF);
		}
		else {
			write((v >>> 0) & 0xFF);
			write((v >>> 8) & 0xFF);
		}
	}

	@Override
	default void writeChar(final int v) throws IOException {
		if (isBigEndian()) {
			write((v >>> 8) & 0xFF);
			write((v >>> 0) & 0xFF);
		}
		else {
			write((v >>> 0) & 0xFF);
			write((v >>> 8) & 0xFF);
		}
	}

	@Override
	default void writeInt(final int v) throws IOException {
		if (isBigEndian()) {
			write((v >>> 24) & 0xFF);
			write((v >>> 16) & 0xFF);
			write((v >>> 8) & 0xFF);
			write((v >>> 0) & 0xFF);
		}
		else {
			write((v >>> 0) & 0xFF);
			write((v >>> 8) & 0xFF);
			write((v >>> 16) & 0xFF);
			write((v >>> 24) & 0xFF);
		}
	}

	@Override
	default void writeLong(final long v) throws IOException {
		if (isBigEndian()) {
			write((byte) (v >>> 56));
			write((byte) (v >>> 48));
			write((byte) (v >>> 40));
			write((byte) (v >>> 32));
			write((byte) (v >>> 24));
			write((byte) (v >>> 16));
			write((byte) (v >>> 8));
			write((byte) (v >>> 0));
		}
		else {
			write((byte) (v >>> 0));
			write((byte) (v >>> 8));
			write((byte) (v >>> 16));
			write((byte) (v >>> 24));
			write((byte) (v >>> 32));
			write((byte) (v >>> 40));
			write((byte) (v >>> 48));
			write((byte) (v >>> 56));
		}
	}

	@Override
	default void writeFloat(final float v) throws IOException {
		writeInt(Float.floatToIntBits(v));
	}

	@Override
	default void writeDouble(final double v) throws IOException {
		writeLong(Double.doubleToLongBits(v));
	}

	@Override
	default void writeBytes(final String s) throws IOException {
		write(s.getBytes("UTF-8"));
	}

	@Override
	default void writeChars(final String s) throws IOException {
		final int len = s.length();
		for (int i = 0; i < len; i++) {
			final int v = s.charAt(i);
			write((v >>> 8) & 0xFF);
			write((v >>> 0) & 0xFF);
		}
	}

	@Override
	default void writeUTF(final String str) throws IOException {
		DataHandles.writeUTF(str, this);
	}
}