panda.io.stream.CharSequenceInputStream Maven / Gradle / Ivy

Go to download
package panda.io.stream;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

import panda.io.Streams;

/**
 * {@link InputStream} implementation that can read from String, StringBuffer, StringBuilder or
 * CharBuffer.
 * 
 * Note: Supports {@link #mark(int)} and {@link #reset()}.
 */
public class CharSequenceInputStream extends InputStream {

	private static final int BUFFER_SIZE = 2048;

	private static final int NO_MARK = -1;

	private final CharsetEncoder encoder;
	private final CharBuffer cbuf;
	private final ByteBuffer bbuf;

	private int mark_cbuf; // position in cbuf
	private int mark_bbuf; // position in bbuf

	/**
	 * Constructor.
	 * 
	 * @param cs the input character sequence
	 * @param charset the character set name to use
	 * @param bufferSize the buffer size to use.
	 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete
	 *             character
	 */
	public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
		super();
		this.encoder = charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE)
			.onUnmappableCharacter(CodingErrorAction.REPLACE);
		// Ensure that buffer is long enough to hold a complete character
		final float maxBytesPerChar = encoder.maxBytesPerChar();
		if (bufferSize < maxBytesPerChar) {
			throw new IllegalArgumentException("Buffer size " + bufferSize + " is less than maxBytesPerChar "
					+ maxBytesPerChar);
		}
		this.bbuf = ByteBuffer.allocate(bufferSize);
		this.bbuf.flip();
		this.cbuf = CharBuffer.wrap(cs);
		this.mark_cbuf = NO_MARK;
		this.mark_bbuf = NO_MARK;
	}

	/**
	 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}.
	 * 
	 * @param cs the input character sequence
	 * @param charset the character set name to use
	 * @param bufferSize the buffer size to use.
	 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete
	 *             character
	 */
	public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
		this(cs, Charset.forName(charset), bufferSize);
	}

	/**
	 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)} with a buffer
	 * size of 2048.
	 * 
	 * @param cs the input character sequence
	 * @param charset the character set name to use
	 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete
	 *             character
	 */
	public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
		this(cs, charset, BUFFER_SIZE);
	}

	/**
	 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, String, int)} with a buffer
	 * size of 2048.
	 * 
	 * @param cs the input character sequence
	 * @param charset the character set name to use
	 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete
	 *             character
	 */
	public CharSequenceInputStream(final CharSequence cs, final String charset) {
		this(cs, charset, BUFFER_SIZE);
	}

	/**
	 * Fills the byte output buffer from the input char buffer.
	 * 
	 * @throws CharacterCodingException an error encoding data
	 */
	private void fillBuffer() throws CharacterCodingException {
		this.bbuf.compact();
		final CoderResult result = this.encoder.encode(this.cbuf, this.bbuf, true);
		if (result.isError()) {
			result.throwException();
		}
		this.bbuf.flip();
	}

	@Override
	public int read(final byte[] b, int off, int len) throws IOException {
		if (b == null) {
			throw new NullPointerException("Byte array is null");
		}
		if (len < 0 || (off + len) > b.length) {
			throw new IndexOutOfBoundsException("Array Size=" + b.length + ", offset=" + off + ", length=" + len);
		}
		if (len == 0) {
			return 0; // must return 0 for zero length read
		}
		if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
			return Streams.EOF;
		}
		int bytesRead = 0;
		while (len > 0) {
			if (this.bbuf.hasRemaining()) {
				final int chunk = Math.min(this.bbuf.remaining(), len);
				this.bbuf.get(b, off, chunk);
				off += chunk;
				len -= chunk;
				bytesRead += chunk;
			}
			else {
				fillBuffer();
				if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
					break;
				}
			}
		}
		return bytesRead == 0 && !this.cbuf.hasRemaining() ? Streams.EOF : bytesRead;
	}

	@Override
	public int read() throws IOException {
		for (;;) {
			if (this.bbuf.hasRemaining()) {
				return this.bbuf.get() & 0xFF;
			}
			else {
				fillBuffer();
				if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
					return Streams.EOF;
				}
			}
		}
	}

	@Override
	public int read(final byte[] b) throws IOException {
		return read(b, 0, b.length);
	}

	@Override
	public long skip(long n) throws IOException {
		/*
		 * This could be made more efficient by using position to skip within the current buffer.
		 */
		long skipped = 0;
		while (n > 0 && available() > 0) {
			this.read();
			n--;
			skipped++;
		}
		return skipped;
	}

	/**
	 * Return an estimate of the number of bytes remaining in the byte stream.
	 * 
	 * @return the count of bytes that can be read without blocking (or returning EOF).
	 * @throws IOException if an error occurs (probably not possible)
	 */
	@Override
	public int available() throws IOException {
		// The cached entries are in bbuf; since encoding always creates at least one byte
		// per character, we can add the two to get a better estimate (e.g. if bbuf is empty)
		// Note that the previous implementation (2.4) could return zero even though there were
		// encoded bytes still available.
		return this.bbuf.remaining() + this.cbuf.remaining();
	}

	@Override
	public void close() throws IOException {
	}

	/**
	 * {@inheritDoc}
	 * 
	 * @param readlimit max read limit (ignored)
	 */
	@Override
	public synchronized void mark(final int readlimit) {
		this.mark_cbuf = this.cbuf.position();
		this.mark_bbuf = this.bbuf.position();
		this.cbuf.mark();
		this.bbuf.mark();
		// It would be nice to be able to use mark & reset on the cbuf and bbuf;
		// however the bbuf is re-used so that won't work
	}

	@Override
	public synchronized void reset() throws IOException {
		/*
		 * This is not the most efficient implementation, as it re-encodes from the beginning. Since
		 * the bbuf is re-used, in general it's necessary to re-encode the data. It should be
		 * possible to apply some optimisations however: + use mark/reset on the cbuf and bbuf. This
		 * would only work if the buffer had not been (re)filled since the mark. The code would have
		 * to catch InvalidMarkException - does not seem possible to check if mark is valid
		 * otherwise. + Try saving the state of the cbuf before each fillBuffer; it might be
		 * possible to restart from there.
		 */
		if (this.mark_cbuf != NO_MARK) {
			// if cbuf is at 0, we have not started reading anything, so skip re-encoding
			if (this.cbuf.position() != 0) {
				this.encoder.reset();
				this.cbuf.rewind();
				this.bbuf.rewind();
				this.bbuf.limit(0); // rewind does not clear the buffer
				while (this.cbuf.position() < this.mark_cbuf) {
					this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal
										// processing)
					this.bbuf.limit(0);
					fillBuffer();
				}
			}
			if (this.cbuf.position() != this.mark_cbuf) {
				throw new IllegalStateException("Unexpected CharBuffer postion: actual=" + cbuf.position()
						+ " expected=" + this.mark_cbuf);
			}
			this.bbuf.position(this.mark_bbuf);
			this.mark_cbuf = NO_MARK;
			this.mark_bbuf = NO_MARK;
		}
	}

	@Override
	public boolean markSupported() {
		return true;
	}

}