com.amazon.ion.impl.UnifiedInputStreamX Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ion-java Show documentation
Show all versions of ion-java Show documentation
A Java implementation of the Amazon Ion data notation.
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.ion.impl;
import com.amazon.ion.impl.IonReaderTextRawTokensX.IonReaderTextTokenException;
import com.amazon.ion.impl.UnifiedSavePointManagerX.SavePoint;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
/**
* This is a local stream abstraction, and implementation, that
* allows the calling code to operate over final methods even
* when the original data source is an interface, such a
* Reader
.
*
* When passed a users data buffer it simply operates of the
* entire buffer directly.
*
* When the input source is a stream is creates it's own local
* buffers, using {@link #UnifiedInputBufferX} and {@link #UnifiedDataPageX}.
* These allocate pages and loads them, a page at a time, from
* the stream using the highest bandwidth stream interface available.
*
* In this class the unread is only allowed to unread characters
* that it actually read. This is checked, when possible, when
* the local {@link #_debug} is true. This is checked when
* the unread is bad into data that is present in the current
* buffer. When the unread crosses a buffer boundary the unread
* value is simply written into the space at the beginning of the
* block, which is reserved for that purpose. On all system
* allocated pages the user data starts at an offset in from
* the front of the buffer, The offset is set by {@link #UNREAD_LIMIT}.
* This is not necessary at either the beginning of the input
* or for a user supplied buffer since the entire input is a
* single buffer.
*
*/
abstract class UnifiedInputStreamX
implements Closeable
{
public static final int EOF = -1;
private static final boolean _debug = false;
static final int UNREAD_LIMIT = 10;
static int DEFAULT_PAGE_SIZE;
static {
if (_debug) {
DEFAULT_PAGE_SIZE = 32;
}
else {
DEFAULT_PAGE_SIZE = 32*1024;
}
}
//
// member variables
//
boolean _eof;
boolean _is_byte_data;
boolean _is_stream;
UnifiedInputBufferX _buffer;
int _max_char_value;
int _pos;
int _limit;
// only 1 of these will be filled in depending on whether this is a byte
// source or a character source
Reader _reader;
InputStream _stream;
byte[] _bytes;
char[] _chars;
UnifiedSavePointManagerX _save_points;
// factories to construct an appropriate input stream
// based on the input source
public static UnifiedInputStreamX makeStream(CharSequence chars) {
return new FromCharArray(chars, 0, chars.length());
}
public static UnifiedInputStreamX makeStream(CharSequence chars, int offset, int length) {
return new FromCharArray(chars, offset, length);
}
public static UnifiedInputStreamX makeStream(char[] chars) {
return new FromCharArray(chars, 0, chars.length);
}
public static UnifiedInputStreamX makeStream(char[] chars, int offset, int length) {
return new FromCharArray(chars, offset, length);
}
public static UnifiedInputStreamX makeStream(Reader reader) throws IOException {
return new FromCharStream(reader);
}
public static UnifiedInputStreamX makeStream(byte[] buffer) {
return new FromByteArray(buffer, 0, buffer.length);
}
public static UnifiedInputStreamX makeStream(byte[] buffer, int offset, int length) {
return new FromByteArray(buffer, offset, length);
}
public static UnifiedInputStreamX makeStream(InputStream stream) throws IOException {
return new FromByteStream(stream);
}
public final InputStream getInputStream() { return _stream; }
public final Reader getReader() { return _reader; }
public final byte[] getByteArray() { return _bytes; }
public final char[] getCharArray() { return _chars; }
private final void init() {
// _state = UIS_STATE.STATE_READING;
_eof = false;
_max_char_value = _buffer.maxValue();
_save_points = new UnifiedSavePointManagerX(this);
}
public void close()
throws IOException
{
_eof = true;
_buffer.clear();
}
public final boolean isEOF() {
return _eof; // (_state == UIS_STATE.STATE_EOF);
}
/**
* used to find the current position of this stream in the
* input source.
* @return current "file" position
*/
public long getPosition() {
long file_pos = 0;
UnifiedDataPageX page = _buffer.getCurrentPage();
if (page != null) {
file_pos = page.getFilePosition(_pos);
}
return file_pos;
}
/*
* save point handling - most of the heavy lifting is handled
* by the PageBuffer. The local offset in the current page
* is added by these routines (from the local member _pos)
* when needed by the save point handling.
*
* these covers also handle keeping _save_point_active and
* _save_point_limit up to date as they handle their normal
* work.
*
* savepoints were not intended to overlap (save in a save
* point) so while there isn't any obvious or intentional
* reason this won't work, it's not planned for nor tested
* and, therefore, likely to have problems. There is no
* need for overlapping save point for the parsing case.
*/
public final SavePoint savePointAllocate() {
SavePoint sp = _save_points.savePointAllocate();
return sp;
}
protected final void save_point_reset_to_prev(SavePoint sp)
{
int idx = sp.getPrevIdx();
UnifiedDataPageX curr = _buffer.getPage(idx);
int pos = sp.getPrevPos();
int limit = sp.getPrevLimit();
make_page_current(curr, idx, pos, limit);
}
protected final void make_page_current(UnifiedDataPageX curr, int idx, int pos, int limit)
{
_limit = limit;
_pos = pos;
_eof = false;
if (is_byte_data()) {
_bytes = curr.getByteBuffer();
}
else {
_chars = curr.getCharBuffer();
}
_buffer.setCurrentPage(idx, curr);
if (pos > limit) {
refill_is_eof();
return;
}
}
private final boolean is_byte_data() {
return _is_byte_data;
}
public final void unread(int c)
{
if (c == -1) {
return;
}
else if (c < 0 || c > _max_char_value) {
throw new IllegalArgumentException();
}
if (_eof) {
_eof = false;
if (_limit == -1) {
_limit = _pos;
}
}
_pos--;
if (_pos >= 0) {
UnifiedDataPageX curr = _buffer.getCurrentPage();
if (_pos < curr.getStartingOffset()) {
// here we've backed up past the beginning of the current
// buffer. This can only happen when this is a system
// managed buffer, not a use supplied buffer (which has
// only one page). Or when the user has backed up past
// the actual beginning of the input - which is an error.
curr.inc_unread_count();
if (is_byte_data()) {
_bytes[_pos] = (byte)c;
}
else {
_chars[_pos] = (char)c;
}
}
else {
// we only allow unreading the character that was actually
// read - and when we can, we verify this. We will miss
// cases when the character was in the preceding buffer
// (handled above) when we just have to believe them.
// TODO: add test only code that checks the previous buffer
// case.
verify_matched_unread(c);
}
}
else {
// We don't seem to check for that elsewhere.
_buffer.putCharAt(getPosition(), c);
}
}
private final void verify_matched_unread(int c) {
if (_debug) {
if (is_byte_data()) {
assert(_bytes[_pos] == (byte)c);
}
else {
assert(_chars[_pos] == (char)c);
}
}
}
public final boolean unread_optional_cr()
{
boolean did_unread = false;
UnifiedDataPageX curr = _buffer.getCurrentPage();
int c;
// if we're in the current buffer and we unread a
// new line and we can see we were preceded by a
// carriage return in which case we need to back up
// 1 more position.
// If we can't back up into a real data we don't
// care about this since our next unread/read will
// be work the same anyway since a new line will
// just be a lone new line.
// This corrects bug where the scanner reads a char
// then a \r\n unreads the \n (the \r was eaten by
// read()) unreads the char and that overwrites
// the \r and has editted the buffer. That's a bad
// thing.
if (_pos > curr.getStartingOffset()) {
if (is_byte_data()) {
c = _bytes[_pos-1] & 0xff;
}
else {
c = _chars[_pos-1];
}
if (c == '\r') {
_pos--;
}
}
return did_unread;
}
public final int read() throws IOException {
if (_pos >= _limit) return read_helper();
// both bytes and chars might be null if this is empty input
// otherwise we should have 1, and only 1, of these buffers set
assert((_bytes == null) ^ (_chars == null));
return (_is_byte_data) ? (_bytes[_pos++] & 0xff) : _chars[_pos++];
}
protected final int read_helper() throws IOException
{
if (_eof) {
return EOF;
}
if (refill_helper()) {
return EOF;
}
int c = (is_byte_data()) ? (_bytes[_pos++] & 0xff) : _chars[_pos++];
return c;
}
private final boolean refill_helper() throws IOException
{
_limit = refill();
// done in refill: _pos = _buffer.getCurrentPage().getOriginalStartingOffset();
if (_pos >= _limit) {
_eof = true;
return true;
}
return false;
}
public final void skip(int skipDistance) throws IOException
{
int remaining = _limit - _pos;
if (remaining >= skipDistance) {
_pos += skipDistance;
remaining = 0;
}
else {
remaining = skipDistance;
while (remaining > 0) {
int ready = _limit - _pos;
if (ready > remaining) {
ready = remaining;
}
_pos += ready;
remaining -= ready;
if (remaining > 0) {
if (refill_helper()) {
break;
}
}
}
}
if (remaining > 0) {
String message = "unexpected EOF encountered during skip of "
+ skipDistance
+ " at position "
+ getPosition();
throw new IOException(message);
}
return;
}
// NB this method does not follow the contract of InputStream.read, it will return 0 at EOF
// It is unclear what the implication to the rest of the system to make it 'conform'
public final int read(byte[] dst, int offset, int length) throws IOException
{
if (!is_byte_data()) {
throw new IOException("byte read is not support over character sources");
}
int remaining = length;
while (remaining > 0 && !isEOF()) {
int ready = _limit - _pos;
if (ready > remaining) {
ready = remaining;
}
System.arraycopy(_bytes, _pos, dst, offset, ready);
_pos += ready;
offset += ready;
remaining -= ready;
if (remaining == 0 || _pos < _limit || refill_helper()) {
break;
}
}
return length - remaining;
}
private int read_utf8(int c) throws IOException
{
int len = IonUTF8.getUTF8LengthFromFirstByte(c);
for (int ii=1; ii 0)); // xor: either we're at eof or we have data to read
return _limit;
}
private void set_current_page(int new_page_idx, UnifiedDataPageX new_page, int pos)
{
assert(new_page != null && new_page_idx >= 0 && new_page_idx <= _buffer.getPageCount() + 1);
UnifiedDataPageX curr = null;
if (new_page_idx < _buffer.getPageCount()) {
curr = _buffer.getPage(new_page_idx);
}
if (new_page != curr) {
_buffer.setPage(new_page_idx, new_page, true);
}
make_page_current(new_page, new_page_idx, pos, new_page.getBufferLimit());
return;
}
private int refill_is_eof() {
_eof = true;
_limit = -1;
return _limit;
}
private final boolean can_fill_new_page() {
return _is_stream;
}
protected final int load(UnifiedDataPageX curr, int start_pos, long file_position) throws IOException
{
int read = 0;
if (can_fill_new_page()) {
if (is_byte_data()) {
read = curr.load(_stream, start_pos, file_position);
}
else {
read = curr.load(_reader, start_pos, file_position);
}
}
return read;
}
//
// specialized subclasses that provide an appropriate constructor
// and refill method tailored to efficiently use the data source
//
private static class FromCharArray extends UnifiedInputStreamX
{
FromCharArray(CharSequence chars, int offset, int length)
{
_is_byte_data = false;
_is_stream = false;
_buffer = UnifiedInputBufferX.makePageBuffer(chars, offset, length);
UnifiedDataPageX curr = _buffer.getCurrentPage();
make_page_current(curr, 0, offset, offset+length);
super.init();
}
FromCharArray(char[] charArray, int offset, int length)
{
_is_byte_data = false;
_is_stream = false;
_buffer = UnifiedInputBufferX.makePageBuffer(charArray, offset, length);
UnifiedDataPageX curr = _buffer.getCurrentPage();
make_page_current(curr, 0, offset, offset+length);
super.init();
}
}
private static class FromCharStream extends UnifiedInputStreamX
{
FromCharStream(Reader reader) throws IOException
{
_is_byte_data = false;
_is_stream = true;
_reader = reader;
_buffer = UnifiedInputBufferX.makePageBuffer(UnifiedInputBufferX.BufferType.CHARS, DEFAULT_PAGE_SIZE);
super.init();
_limit = refill();
}
@Override
public void close()
throws IOException
{
super.close();
_reader.close();
}
}
// FIXME: PERF_TEST was: private
static class FromByteArray extends UnifiedInputStreamX
{
FromByteArray(byte[] bytes, int offset, int length)
{
_is_byte_data = true;
_is_stream = false;
_buffer = UnifiedInputBufferX.makePageBuffer(bytes, offset, length);
UnifiedDataPageX curr = _buffer.getCurrentPage();
make_page_current(curr, 0, offset, offset+length);
super.init();
}
}
private static class FromByteStream extends UnifiedInputStreamX
{
FromByteStream(InputStream stream) throws IOException
{
_is_byte_data = true;
_is_stream = true;
_stream = stream;
_buffer = UnifiedInputBufferX.makePageBuffer(UnifiedInputBufferX.BufferType.BYTES, DEFAULT_PAGE_SIZE);
super.init();
_limit = refill();
}
@Override
public void close()
throws IOException
{
super.close();
_stream.close();
}
}
}