com.amazon.ion.impl.IonReaderBinaryRawX Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ion-java Show documentation
Show all versions of ion-java Show documentation
A Java implementation of the Amazon Ion data notation.
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.ion.impl;
import static com.amazon.ion.SystemSymbols.ION_1_0_SID;
import com.amazon.ion.Decimal;
import com.amazon.ion.IonException;
import com.amazon.ion.IonReader;
import com.amazon.ion.IonType;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.Timestamp;
import com.amazon.ion.Timestamp.Precision;
import com.amazon.ion.impl.UnifiedSavePointManagerX.SavePoint;
import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE;
import com.amazon.ion.impl._Private_ScalarConversions.ValueVariant;
import com.amazon.ion.impl.bin.utf8.ByteBufferPool;
import com.amazon.ion.impl.bin.utf8.PoolableByteBuffer;
import com.amazon.ion.impl.bin.utf8.Utf8StringDecoder;
import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.nio.ByteBuffer;
/**
* low level reader, base class, for reading Ion binary
* input sources. This using the UnifiedInputStream just
* as the updated (july 2009) text reader does. The
* routines in this impl only include those needed to handle
* field id's, annotation ids', and access to the value
* headers. In particular hasNext, next, stepIn and stepOut
* are handled here.
*
* scalar values are handled by IonReaderBinarySystem and
* symbol tables (as well as field names and annotations as
* strings) are handled by IonBinaryReaderUser.
*/
abstract class IonReaderBinaryRawX
implements IonReader
{
static final int DEFAULT_CONTAINER_STACK_SIZE = 12; // a multiple of 3
static final int DEFAULT_ANNOTATION_SIZE = 10;
static final int NO_LIMIT = Integer.MIN_VALUE;
protected enum State {
S_INVALID,
S_BEFORE_FIELD, // only true in structs
S_BEFORE_TID,
S_BEFORE_VALUE,
S_AFTER_VALUE,
S_EOF
}
State _state;
UnifiedInputStreamX _input;
long _local_remaining;
boolean _eof;
boolean _has_next_needed;
ValueVariant _v;
IonType _value_type;
boolean _value_is_null;
boolean _value_is_true; // cached boolean value (since we step on the length)
/**
* {@link SymbolTable#UNKNOWN_SYMBOL_ID} means "not on a struct field"
* since otherwise we always know the SID.
*/
int _value_field_id;
int _value_tid;
long _value_len;
long _value_start;
int _value_lob_remaining;
boolean _value_lob_is_ready;
long _position_start;
long _position_len;
SavePoint _annotations;
int[] _annotation_ids;
int _annotation_count;
// local stack for stepInto() and stepOut()
boolean _is_in_struct;
boolean _struct_is_ordered;
int _parent_tid;
int _container_top;
long[] _container_stack; // triples of: position, type, local_end
// Pooled decoder for UTF-8 strings.
private final Utf8StringDecoder utf8Decoder = Utf8StringDecoderPool.getInstance().getOrCreate();
// Calling read() to pull in the next byte of a string requires an EOF check to be performed for each byte.
// This reusable buffer allows us to call read(utf8InputBuffer) instead, letting us can pay the cost of an EOF check
// once per buffer rather than once per byte.
private final PoolableByteBuffer pooledUtf8InputBuffer = ByteBufferPool.getInstance().getOrCreate();
protected IonReaderBinaryRawX() {
}
/**
* @return This implementation always returns null.
*/
public T asFacet(Class facetType)
{
return null;
}
protected final void init_raw(UnifiedInputStreamX uis) {
_input = uis;
_container_stack = new long[DEFAULT_CONTAINER_STACK_SIZE];
_annotations = uis.savePointAllocate();
_v = new ValueVariant();
_annotation_ids = new int[DEFAULT_ANNOTATION_SIZE];
re_init_raw();
_position_start = -1;
}
final void re_init_raw() {
_local_remaining = NO_LIMIT;
_parent_tid = _Private_IonConstants.tidDATAGRAM;
_value_field_id = SymbolTable.UNKNOWN_SYMBOL_ID;
_state = State.S_BEFORE_TID; // this is where we always start
_has_next_needed = true;
_eof = false;
_value_type = null;
_value_is_null = false;
_value_is_true = false;
_value_len = 0;
_value_start = 0;
_value_lob_remaining = 0;
_value_lob_is_ready = false;
_annotation_count = 0;
_is_in_struct = false;
_struct_is_ordered = false;
_parent_tid = 0;
_container_top = 0;
}
public void close()
throws IOException
{
_input.close();
utf8Decoder.close();
pooledUtf8InputBuffer.close();
}
static private final int POS_OFFSET = 0;
static private final int TYPE_LIMIT_OFFSET = 1;
// All type IDs can fit in one byte.
static private final long TYPE_MASK = 0xff;
// The limit is restricted to 7 bytes (see readVarUIntOrEOF(boolean longAllowed)), leaving space for one byte
// to hold the type ID. Shift the limit 8 bits left to make room for the type ID byte.
static private final int LIMIT_SHIFT = 8;
static private final int POS_STACK_STEP = 2;
private final void push(int type, long position, long local_remaining)
{
int oldlen = _container_stack.length;
if ((_container_top + POS_STACK_STEP) >= oldlen) {
int newlen = oldlen * 2;
long[] temp = new long[newlen];
System.arraycopy(_container_stack, 0, temp, 0, oldlen);
_container_stack = temp;
}
_container_stack[_container_top + POS_OFFSET] = position;
long type_limit = local_remaining;
type_limit <<= LIMIT_SHIFT;
type_limit |= (type & TYPE_MASK);
_container_stack[_container_top + TYPE_LIMIT_OFFSET] = type_limit;
_container_top += POS_STACK_STEP;
}
private final long get_top_position() {
assert(_container_top > 0);
long pos = _container_stack[(_container_top - POS_STACK_STEP) + POS_OFFSET];
return pos;
}
private final int get_top_type() {
assert(_container_top > 0);
long type_limit = _container_stack[(_container_top - POS_STACK_STEP) + TYPE_LIMIT_OFFSET];
int type = (int)(type_limit & TYPE_MASK);
if (type < 0 || type > _Private_IonConstants.tidDATAGRAM) {
throwErrorAt("invalid type id in parent stack");
}
return type;
}
private final long get_top_local_remaining() {
assert(_container_top > 0);
long type_limit = _container_stack[_container_top - POS_STACK_STEP + TYPE_LIMIT_OFFSET];
return type_limit >> LIMIT_SHIFT;
}
private final void pop() {
assert(_container_top > 0);
_container_top -= POS_STACK_STEP;
}
public boolean hasNext()
{
if (!_eof && _has_next_needed) {
try {
has_next_helper_raw();
}
catch (IOException e) {
error(e);
}
}
return !_eof;
}
public IonType next()
{
if (_eof) {
return null;
}
if (_has_next_needed) {
try {
has_next_helper_raw();
}
catch (IOException e) {
error(e);
}
}
_has_next_needed = true;
// this should only be null here if we're at eof
assert( _value_type != null || _eof == true);
return _value_type;
}
//from IonConstants
//public static final byte[] BINARY_VERSION_MARKER_1_0 =
// { (byte) 0xE0,
// (byte) 0x01,
// (byte) 0x00,
// (byte) 0xEA };
private static final int BINARY_VERSION_MARKER_TID = _Private_IonConstants.getTypeCode(_Private_IonConstants.BINARY_VERSION_MARKER_1_0[0] & 0xff);
private static final int BINARY_VERSION_MARKER_LEN = _Private_IonConstants.getLowNibble(_Private_IonConstants.BINARY_VERSION_MARKER_1_0[0] & 0xff);
private final void has_next_helper_raw() throws IOException
{
clear_value();
while (_value_tid == -1 && !_eof) {
switch (_state) {
case S_BEFORE_FIELD:
assert _value_field_id == SymbolTable.UNKNOWN_SYMBOL_ID;
_value_field_id = read_field_id();
if (_value_field_id == UnifiedInputStreamX.EOF) {
// FIXME why is EOF ever okay in the middle of a struct?
assert UnifiedInputStreamX.EOF == SymbolTable.UNKNOWN_SYMBOL_ID;
_eof = true;
break;
}
// fall through to try to read the type id right now
case S_BEFORE_TID:
_state = State.S_BEFORE_VALUE; // read_type_id may change this for null and bool values
_value_tid = read_type_id();
if (_value_tid == UnifiedInputStreamX.EOF) {
_state = State.S_EOF;
_eof = true;
break;
}
if (_value_tid == _Private_IonConstants.tidNopPad) {
// skips size of pad and resets State machine
skip(_value_len);
clear_value();
break;
}
else if (_value_tid == _Private_IonConstants.tidTypedecl) {
assert (_value_tid == (BINARY_VERSION_MARKER_TID & 0xff)); // the bvm tid happens to be type decl
if (_value_len == BINARY_VERSION_MARKER_LEN ) {
if (getDepth() != 0) {
// In Ion text, we can interpret an IVM in the wrong position as an ordinary Symbol,
// but in Ion binary, the BVM is unambiguously an IVM rather than a Symbol, and it
// is not allowed in any container type.
throw newErrorAt("Encountered IVM type code E0 below the top level");
}
// this isn't valid for any type descriptor except the first byte
// of a 4 byte version marker - so lets read the rest
load_version_marker();
_value_type = IonType.SYMBOL;
}
else {
// if it's not a bvm then it's an ordinary annotated value
// The next call changes our positions to that of the
// wrapped value, but we need to remember the overall
// wrapper position.
long wrapperStart = _position_start;
long wrapperLen = _position_len;
_value_type = load_annotation_start_with_value_type();
// Wrapper and wrapped value should finish together!
long wrapperFinish = wrapperStart + wrapperLen;
long wrappedValueFinish = _position_start + _position_len;
if (wrapperFinish != wrappedValueFinish) {
throw newErrorAt(String.format("Wrapper length mismatch: wrapper %s wrapped value %s", wrapperFinish, wrappedValueFinish));
}
_position_start = wrapperStart;
_position_len = wrapperLen;
}
}
else {
// if it's not a typedesc then we just get the IonType and we're done
_value_type = get_iontype_from_tid(_value_tid);
}
break;
case S_BEFORE_VALUE:
skip(_value_len);
// fall through to "after value"
case S_AFTER_VALUE:
if (isInStruct()) {
_state = State.S_BEFORE_FIELD;
}
else {
_state = State.S_BEFORE_TID;
}
break;
case S_EOF:
break;
default:
error("internal error: raw binary reader in invalid state!");
}
}
// we always want to exit here
_has_next_needed = false;
return;
}
private final void load_version_marker() throws IOException
{
for (int ii=1; ii<_Private_IonConstants.BINARY_VERSION_MARKER_1_0.length; ii++) {
int b = read();
if (b != (_Private_IonConstants.BINARY_VERSION_MARKER_1_0[ii] & 0xff)) {
throwErrorAt("invalid binary image");
}
}
// so it's a 4 byte version marker - make it look like
// the symbol $ion_1_0 ...
_value_tid = _Private_IonConstants.tidSymbol;
_value_len = 0; // so skip will go the right place - here
_value_start = 0;
_v.setValue(ION_1_0_SID);
_v.setAuthoritativeType(AS_TYPE.int_value);
// _value_type = IonType.SYMBOL; we do this in the caller so it's easier to see
_value_is_null = false;
_value_lob_is_ready = false;
_annotations.clear();
_value_field_id = SymbolTable.UNKNOWN_SYMBOL_ID;
_state = State.S_AFTER_VALUE;
}
private final IonType load_annotation_start_with_value_type() throws IOException
{
IonType value_type;
// we need to skip over the annotations to read
// the actual type id byte for the value. We'll
// save the annotations using a save point, which
// will pin the input buffers until we free this,
// not later than the next call to hasNext().
int alen = readVarUInt();
_annotations.start(getPosition(), 0);
skip(alen);
_annotations.markEnd();
// this will both get the type id and it will reset the
// length as well (over-writing the len + annotations value
// that is there now, before the call)
_value_tid = read_type_id();
if (_value_tid == _Private_IonConstants.tidNopPad) {
throwErrorAt("NOP padding is not allowed within annotation wrappers.");
}
if (_value_tid == UnifiedInputStreamX.EOF) {
throwErrorAt("unexpected EOF encountered where a type descriptor byte was expected");
}
if (_value_tid == _Private_IonConstants.tidTypedecl) {
throwErrorAt("An annotation wrapper may not contain another annotation wrapper.");
}
value_type = get_iontype_from_tid(_value_tid);
assert( value_type != null );
return value_type;
}
protected final int load_annotations() {
switch (_state) {
case S_BEFORE_VALUE:
case S_AFTER_VALUE:
if (_annotations.isDefined()) {
long local_remaining_save = _local_remaining;
_input._save_points.savePointPushActive(_annotations, getPosition(), 0);
_local_remaining = NO_LIMIT; // limit will be handled by the save point
_annotation_count = 0;
try {
do {
int a = readVarUIntOrEOF();
if (a == UnifiedInputStreamX.EOF) {
break;
}
load_annotation_append(a);
} while (!isEOF());
}
catch (IOException e) {
error(e);
}
_input._save_points.savePointPopActive(_annotations);
_local_remaining = local_remaining_save;
_annotations.clear();
}
// else the count stays zero (or it was previously set)
break;
default:
throw new IllegalStateException("annotations require the value to be ready");
}
return _annotation_count;
}
private final void load_annotation_append(int a)
{
int oldlen = _annotation_ids.length;
if (_annotation_count >= oldlen) {
int newlen = oldlen * 2;
int[] temp = new int[newlen];
System.arraycopy(_annotation_ids, 0, temp, 0, oldlen);
_annotation_ids = temp;
}
_annotation_ids[_annotation_count++] = a;
}
private final void clear_value()
{
_value_type = null;
_value_tid = -1;
_value_is_null = false;
_value_lob_is_ready = false;
_annotations.clear();
_v.clear();
_annotation_count = 0;
_value_field_id = SymbolTable.UNKNOWN_SYMBOL_ID;
}
/**
* @return the field SID, or -1 if at EOF.
*/
private final int read_field_id() throws IOException
{
int field_id = readVarUIntOrEOF();
return field_id;
}
private final int read_type_id() throws IOException
{
long start_of_tid = _input.getPosition();
long start_of_value = start_of_tid + 1;
int td = read();
if (td < 0) {
return UnifiedInputStreamX.EOF;
}
int tid = _Private_IonConstants.getTypeCode(td);
long len = _Private_IonConstants.getLowNibble(td);
// NOP Padding
if (tid == _Private_IonConstants.tidNull && len != _Private_IonConstants.lnIsNull) {
if (len == _Private_IonConstants.lnIsVarLen) {
len = readVarUInt();
}
_state = _is_in_struct ? State.S_BEFORE_FIELD : State.S_BEFORE_TID;
tid = _Private_IonConstants.tidNopPad; // override typeId to use Pad marker
}
else if (len == _Private_IonConstants.lnIsVarLen) {
// For now, only allow *container* lengths to exceed Integer.MAX_VALUE.
// Note: for annotated scalars that exceed the limit, this method will be called again for the wrapped
// value's type ID, and will fail.
boolean isLongLengthAllowed = (
tid == _Private_IonConstants.tidTypedecl // Annotation wrapper
|| tid == _Private_IonConstants.tidList
|| tid == _Private_IonConstants.tidSexp
|| tid == _Private_IonConstants.tidStruct
);
len = readVarUInt(isLongLengthAllowed);
start_of_value = _input.getPosition();
}
else if (tid == _Private_IonConstants.tidNull) {
_value_is_null = true;
len = 0;
_state = State.S_AFTER_VALUE;
}
else if (len == _Private_IonConstants.lnIsNull) {
_value_is_null = true;
len = 0;
_state = State.S_AFTER_VALUE;
}
else if (tid == _Private_IonConstants.tidBoolean) {
switch ((int) len) {
case _Private_IonConstants.lnBooleanFalse:
_value_is_true = false;
break;
case _Private_IonConstants.lnBooleanTrue:
_value_is_true = true;
break;
default:
throwErrorAt("invalid length nibble in boolean value: "+len);
break;
}
len = 0;
_state = State.S_AFTER_VALUE;
}
else if (tid == _Private_IonConstants.tidStruct) {
if ((_struct_is_ordered = (len == 1))) {
// special case of an ordered struct, it gets the
// otherwise impossible to have length of 1
len = readVarUInt(true);
if (len == 0) {
throwErrorAt("Structs flagged as having ordered keys must contain at least one key/value pair.");
}
start_of_value = _input.getPosition();
}
}
_value_tid = tid;
_value_len = len;
// TODO Keeping track of _value_start is only necessary because top-level
// symbol values are treated differently than all other values: they
// are read during has_next_helper_user in order to compare them
// against the IVM (symbol 2). This behavior is actually contrary to
// the spec, and leads to the reader position being advanced PAST
// the value BEFORE a *Value() method is even called. Once that is fixed,
// _value_start can be removed, and _input._pos can be used to find the
// start of the value at the current valid position.
// amazon-ion/ion-java/issues/88 tracks the fix for bringing IVM handling up to
// spec.
_value_start = start_of_value;
_position_len = len + (start_of_value - start_of_tid);
_position_start = start_of_tid;
return tid;
}
private final IonType get_iontype_from_tid(int tid)
{
IonType t = null;
switch (tid) {
case _Private_IonConstants.tidNull: // 0
t = IonType.NULL;
break;
case _Private_IonConstants.tidBoolean: // 1
t = IonType.BOOL;
break;
case _Private_IonConstants.tidPosInt: // 2
case _Private_IonConstants.tidNegInt: // 3
t = IonType.INT;
break;
case _Private_IonConstants.tidFloat: // 4
t = IonType.FLOAT;
break;
case _Private_IonConstants.tidDecimal: // 5
t = IonType.DECIMAL;
break;
case _Private_IonConstants.tidTimestamp: // 6
t = IonType.TIMESTAMP;
break;
case _Private_IonConstants.tidSymbol: // 7
t = IonType.SYMBOL;
break;
case _Private_IonConstants.tidString: // 8
t = IonType.STRING;
break;
case _Private_IonConstants.tidClob: // 9
t = IonType.CLOB;
break;
case _Private_IonConstants.tidBlob: // 10 A
t = IonType.BLOB;
break;
case _Private_IonConstants.tidList: // 11 B
t = IonType.LIST;
break;
case _Private_IonConstants.tidSexp: // 12 C
t = IonType.SEXP;
break;
case _Private_IonConstants.tidStruct: // 13 D
t = IonType.STRUCT;
break;
case _Private_IonConstants.tidTypedecl: // 14 E
t = null; // we don't know yet
break;
default:
throw newErrorAt("unrecognized value type encountered: "+tid);
}
return t;
}
public void stepIn()
{
if (_value_type == null || _eof) {
throw new IllegalStateException();
}
switch (_value_type) {
case STRUCT:
case LIST:
case SEXP:
break;
default:
throw new IllegalStateException();
}
if (_value_is_null) {
if (_state != State.S_AFTER_VALUE) {
assert( _state == State.S_AFTER_VALUE );
}
}
else {
if (_state != State.S_BEFORE_VALUE) {
assert( _state == State.S_BEFORE_VALUE );
}
}
// first push place where we'll take up our next
// value processing when we step out
long curr_position = getPosition();
long next_position = curr_position + _value_len;
long next_remaining = _local_remaining;
if (next_remaining != NO_LIMIT) {
next_remaining -= _value_len;
if (next_remaining < 0) {
next_remaining = 0; // we'll see and EOF down the road TODO: should we error now?
}
}
push(_parent_tid, next_position, next_remaining);
_is_in_struct = (_value_tid == _Private_IonConstants.tidStruct);
_local_remaining = _value_len;
_state = _is_in_struct ? State.S_BEFORE_FIELD : State.S_BEFORE_TID;
_parent_tid = _value_tid;
clear_value();
_has_next_needed = true;
}
public void stepOut()
{
if (getDepth() < 1) {
throw new IllegalStateException(IonMessages.CANNOT_STEP_OUT);
}
// first we get the top values, then we
// pop them all off in one fell swoop.
long next_position = get_top_position();
long local_remaining = get_top_local_remaining();
int parent_tid = get_top_type();
pop();
_eof = false;
_parent_tid = parent_tid;
// later, only after we've skipped to our new location: _local_remaining = local_remaining;
if (_parent_tid == _Private_IonConstants.tidStruct) {
_is_in_struct = true;
_state = State.S_BEFORE_FIELD;
}
else {
_is_in_struct = false;
_state = State.S_BEFORE_TID;
}
_has_next_needed = true;
clear_value();
long curr_position = getPosition();
if (next_position > curr_position) {
try {
long distance = next_position - curr_position;
int max_skip = Integer.MAX_VALUE - 1; // -1 just in case
while (distance > max_skip) {
skip(max_skip);
distance -= max_skip;
}
if (distance > 0) {
skip(distance);
}
}
catch (IOException e) {
error(e);
}
}
else if (next_position < curr_position) {
String message = "invalid position during stepOut, current position "
+ curr_position
+ " next value at "
+ next_position;
error(message);
}
assert(next_position == getPosition());
_local_remaining = local_remaining;
}
public int byteSize()
{
int len;
switch (_value_type) {
case BLOB:
case CLOB:
break;
default:
throw new IllegalStateException("only valid for LOB values");
}
if (!_value_lob_is_ready) {
if (_value_is_null) {
len = 0;
}
else {
len = (int) _value_len;
}
_value_lob_remaining = len;
_value_lob_is_ready = true;
}
return _value_lob_remaining;
}
public byte[] newBytes()
{
int len = byteSize(); // does out validation for us
byte[] bytes;
if (_value_is_null) {
bytes = null;
}
else {
bytes = new byte[len];
getBytes(bytes, 0, len);
}
return bytes;
}
public int getBytes(byte[] buffer, int offset, int len)
{
int value_len = byteSize(); // again validation
if (value_len > len) {
value_len = len;
}
int read_len = readBytes(buffer, offset, value_len);
return read_len;
}
public int readBytes(byte[] buffer, int offset, int len)
{
if (offset < 0 || len < 0) {
throw new IllegalArgumentException();
}
if (len < 1) {
return 0;
}
int read_len;
try {
read_len = read(buffer, offset, len);
_value_lob_remaining -= read_len;
}
catch (IOException e) {
read_len = -1;
error(e);
}
if (_value_lob_remaining == 0) {
_state = State.S_AFTER_VALUE;
}
else {
_value_len = _value_lob_remaining;
}
return read_len;
}
public int getDepth()
{
return (_container_top / POS_STACK_STEP);
}
public IonType getType()
{
//if (_has_next_needed) {
// throw new IllegalStateException("getType() isn't valid until you have called next()");
//}
return _value_type;
}
public boolean isInStruct()
{
return _is_in_struct;
}
public boolean isNullValue()
{
return _value_is_null;
}
//
// helper read routines - these were lifted
// from SimpleByteBuffer.SimpleByteReader
//
private final int read() throws IOException
{
if (_local_remaining != NO_LIMIT) {
if (_local_remaining < 1) {
return UnifiedInputStreamX.EOF;
}
_local_remaining--;
}
return _input.read();
}
private final int read(byte[] dst, int start, int len) throws IOException
{
if (dst == null || start < 0 || len < 0 || start + len > dst.length) {
// no need to test this start >= dst.length ||
// since we test start+len > dst.length which is the correct test
throw new IllegalArgumentException();
}
int read;
if (_local_remaining == NO_LIMIT) {
read = _input.read(dst, start, len);
}
else {
if (len > _local_remaining) {
if (_local_remaining < 1) {
throwUnexpectedEOFException();
}
len = (int) _local_remaining;
}
read = _input.read(dst, start, len);
_local_remaining -= read;
}
return read;
}
/**
* Uses {@link #read(byte[], int, int)} until the entire length is read.
* This method will block until the request is satisfied.
*
* @param buf The buffer to read to.
* @param offset The offset of the buffer to read from.
* @param len The length of the data to read.
*/
public void readAll(byte[] buf, int offset, int len) throws IOException
{
int rem = len;
while (rem > 0)
{
int amount = read(buf, offset, rem);
if (amount <= 0)
{
throwUnexpectedEOFException();
}
rem -= amount;
offset += amount;
}
}
private final boolean isEOF() {
if (_local_remaining > 0) return false;
if (_local_remaining == NO_LIMIT) {
return _input.isEOF();
}
return true;
}
private final long getPosition() {
long pos = _input.getPosition();
return pos;
}
private final void skip(long len) throws IOException
{
if (len < 0) {
// no need to test this start >= dst.length ||
// since we test start+len > dst.length which is the correct test
throw new IllegalArgumentException();
}
if (_local_remaining == NO_LIMIT) {
while (len > 0) {
int toSkip = (int) Math.min(Integer.MAX_VALUE, len);
_input.skip(toSkip);
len -= toSkip;
}
}
else {
if (len > _local_remaining) {
if (_local_remaining < 1) {
throwUnexpectedEOFException();
}
len = _local_remaining;
}
_local_remaining -= len;
while (len > 0) {
int toSkip = (int) Math.min(Integer.MAX_VALUE, len);
_input.skip(toSkip);
len -= toSkip;
}
}
return;
}
protected final long readULong(int len) throws IOException
{
long retvalue = 0;
int b;
switch (len) {
default:
throw new IonException("value too large for Java long");
case 8:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 7:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 6:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 5:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 4:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 3:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 2:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 1:
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 8) | b;
case 0:
// do nothing, it's just a 0 length is a 0 value
}
return retvalue;
}
protected final BigInteger readBigInteger(int len, boolean is_negative) throws IOException
{
BigInteger value;
if (len > 0) {
byte[] bits = new byte[len];
readAll(bits, 0, len);
int signum = is_negative ? -1 : 1;
value = new BigInteger(signum, bits);
}
else {
value = BigInteger.ZERO;
}
return value;
}
protected final int readVarInt() throws IOException
{
return readVarInt(read());
}
/**
* Reads an integer value, returning null to mean -0.
* @throws IOException
*/
protected final Integer readVarInteger() throws IOException
{
int firstByte = read();
// if byte represents -0 returns null
if (firstByte == 0xC0) {
return null;
}
return readVarInt(firstByte);
}
/**
* reads a varInt after the first byte was read. The first byte is used to specify the sign and -0 has different
* representation on the protected API that was called
*
* @param firstByte last varInt octet
*/
private int readVarInt(int firstByte) throws IOException {
// VarInt uses the high-order bit of the last octet as a marker; some (but not all) 5-byte VarInts can fit
// into a Java int.
// To validate overflows we accumulate the VarInt in a long and then check if it can be represented by an int
//
// see https://amazon-ion.github.io/ion-docs/docs/binary.html#varuint-and-varint-fields
long retValue = 0;
int b = firstByte;
boolean isNegative = false;
for (;;) {
if (b < 0) throwUnexpectedEOFException();
if ((b & 0x40) != 0) {
isNegative = true;
}
retValue = (b & 0x3F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retValue = (retValue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
// for the rest, they're all the same
if ((b = read()) < 0) throwUnexpectedEOFException();
retValue = (retValue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
// for the rest, they're all the same
if ((b = read()) < 0) throwUnexpectedEOFException();
retValue = (retValue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
// for the rest, they're all the same
if ((b = read()) < 0) throwUnexpectedEOFException();
retValue = (retValue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
// Don't support anything above a 5-byte VarInt for now, see https://github.com/amazon-ion/ion-java/issues/146
throwVarIntOverflowException(5);
}
if (isNegative) {
retValue = -retValue;
}
int retValueAsInt = (int) retValue;
if (retValue != ((long) retValueAsInt)) {
throwVarIntOverflowException(4);
}
return retValueAsInt;
}
protected final int readVarUIntOrEOF() throws IOException
{
return (int) readVarUIntOrEOF(false);
}
/**
* Attempts to read a VarUInt.
* @param longAllowed true if values over Integer.MAX_VALUE are allowed. If false, such values will raise an error.
* @return the value of the VarUInt, or -1 if EOF has been reached.
* @throws IOException if thrown when reading from the stream.
*/
protected final long readVarUIntOrEOF(boolean longAllowed) throws IOException
{
// VarUInt uses the high-order bit of the last octet as a marker; some (but not all) 5-byte VarUInt can fit
// into a Java int.
// To validate overflows we accumulate the VarInt in a long and then check if it can be represented by an int
//
// see https://amazon-ion.github.io/ion-docs/docs/binary.html#varuint-and-varint-fields
long retvalue = 0;
int b;
for (;;) { // fake loop to create a "goto done"
if ((b = read()) < 0) {
return UnifiedInputStreamX.EOF;
}
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
if ((b = read()) < 0) throwUnexpectedEOFException();
retvalue = (retvalue << 7) | (b & 0x7F);
if ((b & 0x80) != 0) break;
// Don't support anything above a 7-byte VarUInt for now, see https://github.com/amazon-ion/ion-java/issues/146
throwVarIntOverflowException(7);
}
if (!longAllowed) {
int retValueAsInt = (int) retvalue;
if (retvalue != ((long) retValueAsInt)) {
throwVarIntOverflowException(4);
}
return retValueAsInt;
}
return retvalue;
}
protected final int readVarUInt() throws IOException
{
return (int) readVarUInt(false);
}
/**
* Attempts to read a VarUInt, raising an error if EOF is encountered.
* @param longAllowed true if values over Integer.MAX_VALUE are allowed. If false, such values will raise an error.
* @return the value of the VarUInt.
* @throws IOException if thrown when reading from the stream.
*/
protected final long readVarUInt(boolean longAllowed) throws IOException
{
long varUInt = readVarUIntOrEOF(longAllowed);
if (varUInt == UnifiedInputStreamX.EOF) {
throwUnexpectedEOFException();
}
return varUInt;
}
protected final double readFloat(int len) throws IOException
{
if (len == 0)
{
// special case, return pos zero
return 0.0d;
}
if (len != 4 && len != 8)
{
throw new IOException("Length of float read must be 0, 4, or 8");
}
long dBits = this.readULong(len);
return len == 4
? (double) Float.intBitsToFloat((int) (dBits & 0xffffffffL))
: Double.longBitsToDouble(dBits);
}
protected final Decimal readDecimal(int len) throws IOException
{
MathContext mathContext = MathContext.UNLIMITED;
Decimal bd;
// we only write out the '0' value as the nibble 0
if (len == 0) {
bd = Decimal.valueOf(0, mathContext);
}
else {
// otherwise we to it the hard way ....
long save_limit = NO_LIMIT;
if (_local_remaining != NO_LIMIT) {
save_limit = _local_remaining - len;
}
_local_remaining = len;
int exponent = readVarInt();
BigInteger value;
int signum;
if (_local_remaining > 0)
{
byte[] bits = new byte[(int) _local_remaining];
readAll(bits, 0, (int) _local_remaining);
signum = 1;
if (bits[0] < 0)
{
// value is negative, clear the sign
bits[0] &= 0x7F;
signum = -1;
}
value = new BigInteger(signum, bits);
}
else {
signum = 0;
value = BigInteger.ZERO;
}
// Ion stores exponent, BigDecimal uses the negation "scale"
int scale = -exponent;
if (value.signum() == 0 && signum == -1)
{
assert value.equals(BigInteger.ZERO);
bd = Decimal.negativeZero(scale, mathContext);
}
else
{
bd = Decimal.valueOf(value, scale, mathContext);
}
_local_remaining = save_limit;
}
return bd;
}
/**
* @see IonBinary.Reader#readTimestampValue
*/
protected final Timestamp readTimestamp(int len) throws IOException
{
if (len < 1) {
// nothing to do here - and the timestamp will be NULL
return null;
}
int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0;
BigDecimal frac = null;
long save_limit = NO_LIMIT;
if (_local_remaining != NO_LIMIT) {
save_limit = _local_remaining - len;
}
_local_remaining = len; // > 0
// first up is the offset, which requires a special int reader
// to return the -0 as a null Integer
Integer offset = readVarInteger();
// now we'll read the struct values from the input stream
// year is from 0001 to 9999
// or 0x1 to 0x270F or 14 bits - 1 or 2 bytes
year = readVarUInt();
Precision p = Precision.YEAR; // our lowest significant option
// now we look for months
if (_local_remaining > 0) {
month = readVarUInt();
p = Precision.MONTH;
// now we look for days
if (_local_remaining > 0) {
day = readVarUInt();
p = Precision.DAY; // our lowest significant option
// now we look for hours and minutes
if (_local_remaining > 0) {
hour = readVarUInt();
minute = readVarUInt();
p = Precision.MINUTE;
if (_local_remaining > 0) {
second = readVarUInt();
p = Precision.SECOND;
if (_local_remaining > 0) {
// now we read in our actual "milliseconds since the epoch"
frac = readDecimal((int) _local_remaining);
if (frac.compareTo(BigDecimal.ZERO) < 0 || frac.compareTo(BigDecimal.ONE) >= 0) {
throwErrorAt(
"The fractional seconds value in a timestamp must be greater than or "
+ "equal to zero and less than one."
);
}
}
}
}
}
}
// restore out outer limit(s)
_local_remaining = save_limit;
// now we let timestamp put it all together
try {
Timestamp val =
Timestamp.createFromUtcFields(p, year, month, day, hour,
minute, second, frac, offset);
return val;
}
catch (IllegalArgumentException e)
{
// Rewrap to the expected type.
throw newErrorAt("Invalid timestamp encoding: " + e.getMessage());
}
}
protected final String readString(int numberOfBytes) throws IOException
{
ByteBuffer utf8InputBuffer = pooledUtf8InputBuffer.getBuffer();
// If the string we're reading is small enough to fit in our reusable buffer, we can avoid the overhead
// of looping and bounds checking.
if (numberOfBytes <= utf8InputBuffer.capacity()) {
return readStringWithReusableBuffer(numberOfBytes, utf8InputBuffer);
}
utf8Decoder.prepareDecode(numberOfBytes);
long save_limit = NO_LIMIT;
if (_local_remaining != NO_LIMIT) {
save_limit = _local_remaining - numberOfBytes;
}
_local_remaining = numberOfBytes;
// The following loop will:
// * Fill the input buffer with utf8 bytes
// * Write decoded chars to the decoding buffer
// * Move any remaining partial character bytes to the front of the buffer
// * Repeat until the requested number of bytes have been decoded.
// * Create a new String object from the contents of the decoding buffer.
int totalBytesRead = 0;
int carryoverBytes = 0;
while (totalBytesRead < numberOfBytes) {
int bytesRemaining = numberOfBytes - totalBytesRead;
// When decoding, it's possible to have 'carryover' bytes left in the buffer which represent
// a partial unicode character. We need to leave these in the buffer so that we can complete the
// partial character in the next call to read().
int capacityRemaining = utf8InputBuffer.array().length - carryoverBytes;
int bytesToRead = Math.min(bytesRemaining, capacityRemaining);
int bytesRead = read(utf8InputBuffer.array(), carryoverBytes, bytesToRead);
if (bytesRead <= 0) {
// UnifiedInputStreamX doesn't adhere to InputStream's API. See the comments on
// UnifiedInputStreamX#read() for more information.
throwUnexpectedEOFException();
}
totalBytesRead += bytesRead;
utf8InputBuffer.position(0);
utf8InputBuffer.limit(carryoverBytes + bytesRead);
utf8Decoder.partialDecode(utf8InputBuffer, totalBytesRead >= numberOfBytes);
// Shift leftover partial character bytes (if any) to the beginning of the buffer
carryoverBytes = utf8InputBuffer.remaining();
if (carryoverBytes > 0) {
System.arraycopy(
utf8InputBuffer.array(),
utf8InputBuffer.position(),
utf8InputBuffer.array(),
0,
carryoverBytes
);
}
}
_local_remaining = save_limit;
return utf8Decoder.finishDecode();
}
private String readStringWithReusableBuffer(int numberOfBytes, ByteBuffer utf8InputBuffer) throws IOException {
long save_limit = NO_LIMIT;
if (_local_remaining != NO_LIMIT) {
save_limit = _local_remaining - numberOfBytes;
}
_local_remaining = numberOfBytes;
readAll(utf8InputBuffer.array(), 0, numberOfBytes);
_local_remaining = save_limit;
utf8InputBuffer.position(0);
utf8InputBuffer.limit(numberOfBytes);
return utf8Decoder.decode(utf8InputBuffer, numberOfBytes);
}
private final void throwUnexpectedEOFException() throws IOException {
throwErrorAt("unexpected EOF in value");
}
private final void throwVarIntOverflowException(int byteLimit) throws IOException {
throwErrorAt("int in stream is too long to fit in " + byteLimit + " bytes.");
}
protected IonException newErrorAt(String msg) {
String msg2 = msg + " at position " + getPosition();
return new IonException(msg2);
}
protected void throwErrorAt(String msg) {
throw newErrorAt(msg);
}
protected void error(String msg) {
throw new IonException(msg);
}
protected void error(Exception e) {
throw new IonException(e);
}
}