com.amazon.ion.impl.IonReaderTextRawX Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ion-java Show documentation
Show all versions of ion-java Show documentation
A Java implementation of the Amazon Ion data notation.
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.ion.impl;
import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID;
import static com.amazon.ion.impl.IonTokenConstsX.TOKEN_CLOSE_BRACE;
import static com.amazon.ion.impl.IonTokenConstsX.TOKEN_CLOSE_PAREN;
import static com.amazon.ion.impl.IonTokenConstsX.TOKEN_CLOSE_SQUARE;
import com.amazon.ion.IonException;
import com.amazon.ion.IonTextReader;
import com.amazon.ion.IonType;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.UnknownSymbolException;
import com.amazon.ion.impl.UnifiedSavePointManagerX.SavePoint;
import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE;
import com.amazon.ion.impl._Private_ScalarConversions.ValueVariant;
import java.io.IOException;
import java.math.BigInteger;
import java.util.Iterator;
/**
* Reader implementation that reads the token stream and validates
* the Ion grammar. This does not care about system values. It
* does not materialize values or convert them. It does mark values
* in the UnifiedInputStream if they might be field names or annotations
* since it does populate these properties directly. Otherwise it
* accepts the TextRawToken's assessment of the type of the next
* token, which is based on as few characters as possible, typically
* 1 but generally less than 5.
*
* This is called by the {@link IonReaderTextSystemX}, which in turn is most
* often called by the {@link IonReaderTextUserX}. One of these two (system
* reader or user reader) should be invoked by the user for reading text Ion
* data. This class is not intended for general use.
*
* This reader scan skip values and in doing so it does not
* materialize the contents and it does not validate the contents.
* TODO amazon-ion/ion-java/issues/7 We may want to make validation on skip optional.
*
* This manages the value buffer (_v ValueVariant) and the lob
* content (_lob_*) which is cached in some cases. It's main
* job however is recognizing the correct order of the input
* tokens. This is done in parse_to_next_value (called by hasNext).
*
* The current state is represented by an int (whose value should
* be one of the values of the STATE_* constants). The legal
* transitions are stored in TransitionActions and TransitionActions2.
* The first (TransitionActions) is a two dimensional array whose
* dimensions are state and input token. The value stored is an
* int that represents the action to be taken (ACTION_*). The
* second copy of this data (TransitionActions2) is a one dimensional
* array built from the first and manually dereferenced in the
* parse_to_next_value method. This turns out to be a significant
* performance gain (). Logically these are the same.
*
*/
abstract class IonReaderTextRawX
implements IonTextReader
{
public abstract BigInteger bigIntegerValue();
// static final boolean _object_parser = false;
static final boolean _debug = false;
private static final int DEFAULT_STACK_DEPTH = 10;
protected static final int UNKNOWN_SIZE = -1;
private static final int DEFAULT_ANNOTATION_COUNT = 5;
static final int STATE_BEFORE_ANNOTATION_DATAGRAM = 0;
static final int STATE_BEFORE_ANNOTATION_CONTAINED = 1;
static final int STATE_BEFORE_ANNOTATION_SEXP = 2;
static final int STATE_BEFORE_FIELD_NAME = 3;
static final int STATE_BEFORE_VALUE_CONTENT = 4;
static final int STATE_BEFORE_VALUE_CONTENT_SEXP = 5;
static final int STATE_IN_LONG_STRING = 6;
static final int STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT = 7;
static final int STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT = 8;
static final int STATE_IN_BLOB_CONTENT = 9;
static final int STATE_AFTER_VALUE_CONTENTS = 10;
static final int STATE_EOF = 11;
static final int STATE_MAX = 11;
private final String get_state_name(int state) {
switch(state) {
case STATE_BEFORE_ANNOTATION_DATAGRAM: return "STATE_BEFORE_ANNOTATION_DATAGRAM";
case STATE_BEFORE_ANNOTATION_CONTAINED: return "STATE_BEFORE_ANNOTATION_CONTAINED";
case STATE_BEFORE_ANNOTATION_SEXP: return "STATE_BEFORE_ANNOTATION_SEXP";
case STATE_BEFORE_FIELD_NAME: return "STATE_BEFORE_FIELD_NAME";
case STATE_BEFORE_VALUE_CONTENT: return "STATE_BEFORE_VALUE_CONTENT";
case STATE_BEFORE_VALUE_CONTENT_SEXP: return "STATE_BEFORE_VALUE_CONTENT_SEXP";
case STATE_IN_LONG_STRING: return "STATE_IN_LONG_STRING";
case STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT: return "STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT";
case STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT: return "STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT";
case STATE_IN_BLOB_CONTENT: return "STATE_IN_BLOB_CONTENT";
case STATE_AFTER_VALUE_CONTENTS: return "STATE_AFTER_VALUE_CONTENTS";
case STATE_EOF: return "STATE_EOF";
default: return "";
}
}
static final int ACTION_NOT_DEFINED = 0;
static final int ACTION_LOAD_FIELD_NAME = 1;
static final int ACTION_LOAD_ANNOTATION = 2;
static final int ACTION_START_STRUCT = 3;
static final int ACTION_START_LIST = 4;
static final int ACTION_START_SEXP = 5;
static final int ACTION_START_LOB = 6;
static final int ACTION_LOAD_SCALAR = 8;
static final int ACTION_PLUS_INF = 9;
static final int ACTION_MINUS_INF = 10;
static final int ACTION_EAT_COMMA = 11; // if this is unnecessary (because load_scalar handle it) we don't need "after_value"
static final int ACTION_FINISH_CONTAINER = 12;
static final int ACTION_FINISH_LOB = 13;
static final int ACTION_FINISH_DATAGRAM = 14;
static final int ACTION_EOF = 15;
static final int ACTION_count = 16;
@SuppressWarnings("unused")
private final String get_action_name(int action) {
switch(action) {
case ACTION_NOT_DEFINED: return "ACTION_DO_NOTHING";
case ACTION_LOAD_FIELD_NAME: return "ACTION_LOAD_FIELD_NAME";
case ACTION_LOAD_ANNOTATION: return "ACTION_LOAD_ANNOTATION";
case ACTION_START_STRUCT: return "ACTION_START_STRUCT";
case ACTION_START_LIST: return "ACTION_START_LIST";
case ACTION_START_SEXP: return "ACTION_START_SEXP";
case ACTION_START_LOB: return "ACTION_START_LOB";
case ACTION_LOAD_SCALAR: return "ACTION_LOAD_SCALAR";
case ACTION_PLUS_INF: return "ACTION_PLUS_INF";
case ACTION_MINUS_INF: return "ACTION_MINUS_INF";
case ACTION_EAT_COMMA: return "ACTION_EAT_COMMA";
case ACTION_FINISH_CONTAINER: return "ACTION_FINISH_CONTAINER";
case ACTION_FINISH_LOB: return "ACTION_FINISH_LOB";
case ACTION_FINISH_DATAGRAM: return "ACTION_FINISH_DATAGRAM";
case ACTION_EOF: return "ACTION_EOF";
default: return "";
}
}
static final int[][] TransitionActions = makeTransitionActionArray();
static final int[][] makeTransitionActionArray()
{
int[][] actions = new int[STATE_MAX + 1][IonTokenConstsX.TOKEN_MAX + 1];
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_EOF] = ACTION_FINISH_DATAGRAM;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_UNKNOWN_NUMERIC] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_INT] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_BINARY] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_HEX] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_DECIMAL] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_FLOAT] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_FLOAT_INF] = ACTION_PLUS_INF;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_FLOAT_MINUS_INF] = ACTION_MINUS_INF;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_TIMESTAMP] = ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE]= ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE]= ACTION_LOAD_SCALAR;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER] = ACTION_LOAD_ANNOTATION;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_SYMBOL_QUOTED] = ACTION_LOAD_ANNOTATION;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_PAREN] = ACTION_START_SEXP;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_BRACE] = ACTION_START_STRUCT;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_SQUARE] = ACTION_START_LIST;
actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_DOUBLE_BRACE] = ACTION_START_LOB;
// both before_annotation and after_annotation are essentially the same as
// BOF (after_annotation can't accept EOF as valid however)
for (int ii=0; ii" value states have to be handled
// specially, they can only scan forward to the end of
// the content on next, or read content for the user otherwise
actions[STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT][IonTokenConstsX.TOKEN_CLOSE_BRACE] = ACTION_FINISH_LOB;
actions[STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT][IonTokenConstsX.TOKEN_CLOSE_BRACE] = ACTION_FINISH_LOB;
actions[STATE_IN_BLOB_CONTENT][IonTokenConstsX.TOKEN_CLOSE_BRACE] = ACTION_FINISH_LOB;
// the eof action exists because finishing an unread value can place the scanner just before
// the input stream eof and set the current state to eof - in which case we just need to return eof
for (int ii=0; ii T asFacet(Class facetType)
{
return null;
}
//========================================================================
protected final void init_once() {
_current_value_buffer = new StringBuilder();
_annotations = new SymbolToken[DEFAULT_ANNOTATION_COUNT];
}
protected final void init(UnifiedInputStreamX iis, IonType parent)
{
init(iis, parent, 1, 1);
}
protected final void init(UnifiedInputStreamX iis
,IonType parent
,long start_line
,long start_column
) {
assert(parent != null);
_scanner = new IonReaderTextRawTokensX(iis, start_line, start_column);
_value_start_line = start_line;
_value_start_column = start_column;
_current_value_save_point = iis.savePointAllocate();
_lob_loaded = LOB_STATE.EMPTY;
int starting_state = get_state_at_container_start(parent);
set_state(starting_state);
_eof = false;
push_container_state(parent);
}
protected final void re_init(UnifiedInputStreamX iis
,IonType parent
,long start_line
,long start_column
) {
_state = 0;
_container_state_top = 0;
_container_is_struct = false;
_container_prohibits_commas = false;
_has_next_called = false;
_value_type = null;
_value_keyword = 0;
_null_type = null;
_field_name = null;
_field_name_sid = UNKNOWN_SYMBOL_ID;
_annotation_count = 0;
_current_value_save_point_loaded = false;
_current_value_buffer_loaded = false;
_value_start_offset = 0;
_lob_value_set = false;
_lob_token = 0;
_lob_value_position = 0;
_lob_bytes = null;
_lob_actual_len = 0;
init(iis, parent, start_line, start_column);
_nesting_parent = parent;
if (IonType.STRUCT.equals(_nesting_parent)) {
_container_is_struct = true;
}
}
public void close()
throws IOException
{
_scanner.close();
}
private final void set_state(int new_state) {
_state = new_state;
}
private final int get_state_int() {
return _state;
}
private final String get_state_name() {
String name = get_state_name(get_state_int());
return name;
}
protected final void clear_current_value_buffer() {
if (_current_value_buffer_loaded) {
_current_value_buffer.setLength(0);
_current_value_buffer_loaded = false;
}
if (_current_value_save_point_loaded) {
_current_value_save_point.clear();
_current_value_save_point_loaded = false;
}
}
private final void current_value_is_null(IonType null_type)
{
clear_current_value_buffer();
_value_type = _null_type;
_v.setValueToNull(null_type);
_v.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.null_value);
}
private final void current_value_is_bool(boolean value)
{
clear_current_value_buffer();
_value_type = IonType.BOOL;
_v.setValue(value);
_v.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.boolean_value);
}
private final void set_fieldname(SymbolToken sym) {
_field_name = sym.getText();
_field_name_sid = sym.getSid();
}
private final void clear_fieldname() {
_field_name = null;
_field_name_sid = UNKNOWN_SYMBOL_ID;
}
private final void append_annotation(SymbolToken sym) {
// empty text is checked by caller
int oldlen = _annotations.length;
if (_annotation_count >= oldlen) {
int newlen = oldlen * 2;
SymbolToken[] temp = new SymbolToken[newlen];
System.arraycopy(_annotations, 0, temp, 0, oldlen);
_annotations = temp;
}
_annotations[_annotation_count++] = sym;
}
private final void clear_annotation_list() {
_annotation_count = 0;
}
/**
* this looks forward to see if there is an upcoming value
* if there is it returns true. It may have to clean up
* any value that's partially complete (for example a
* collection whose annotation has been read and loaded
* but the user has chosen not to step into the collection).
* @return true if more data remains, false on eof
*/
public boolean hasNext()
{
boolean has_next = has_next_raw_value();
return has_next;
}
protected final boolean has_next_raw_value() {
if (!_has_next_called && !_eof) {
try {
finish_value(null);
clear_value();
parse_to_next_value();
}
catch (IOException e) {
throw new IonException(e);
}
_has_next_called = true;
}
return (_eof != true);
}
/**
* returns the type of the next value in the stream.
* it calls hasNext to assure that the value has been properly
* started, since hasNext prepares a value as a side effect of
* determining whether or not a value is pending.
* A NoSuchElementException is thrown if there are not values remaining.
* Once called if there is a value available it's contents can
* be accessed through the other public API's (such as getLong()).
* @return type of the next value, or null if there is none.
*/
public IonType next()
{
if (!hasNext()) {
return null;
}
if (_value_type == null && _scanner.isUnfinishedToken()) {
try {
token_contents_load(_scanner.getToken());
}
catch (IOException e) {
throw new IonException(e);
}
}
_has_next_called = false;
return _value_type;
}
private final void finish_and_save_value() throws IOException
{
if (!_current_value_save_point_loaded) {
_scanner.save_point_start(_current_value_save_point);
finish_value(_current_value_save_point);
_current_value_save_point_loaded = true;
}
}
private final void finish_value(SavePoint sp) throws IOException
{
if (_scanner.isUnfinishedToken()) {
if (sp != null && _value_type != null) {
switch (_value_type) {
case STRUCT:
case SEXP:
case LIST:
sp = null;
break;
default:
break;
}
}
_scanner.finish_token(sp);
int new_state = get_state_after_value();
set_state(new_state);
}
_has_next_called = false;
}
private final void clear_value()
{
_value_type = null;
_null_type = null;
if (_lob_value_set) {
_lob_value_set = false;
_lob_value_position = 0;
}
if (!LOB_STATE.EMPTY.equals(_lob_loaded)) {
_lob_actual_len = -1;
_lob_bytes = null;
_lob_loaded = LOB_STATE.EMPTY;
}
clear_current_value_buffer();
clear_annotation_list();
clear_fieldname();
_v.clear();
_value_start_offset = -1;
}
private final void set_container_flags(IonType t) {
switch (t) {
case LIST:
_container_is_struct = false;
_container_prohibits_commas = false;
break;
case SEXP:
_container_is_struct = false;
_container_prohibits_commas = true;
break;
case STRUCT:
_container_is_struct = true;
_container_prohibits_commas = false;
break;
case DATAGRAM:
_container_is_struct = false;
_container_prohibits_commas = true;
break;
default:
throw new IllegalArgumentException("type must be a container, not a "+t.toString());
}
}
private int get_state_after_value()
{
int state_after_scalar;
switch(getContainerType()) {
case LIST:
case STRUCT:
state_after_scalar = STATE_AFTER_VALUE_CONTENTS;
break;
case SEXP:
state_after_scalar = STATE_BEFORE_ANNOTATION_SEXP;
break;
case DATAGRAM:
state_after_scalar = STATE_BEFORE_ANNOTATION_DATAGRAM;
break;
default:
String message = "invalid container type encountered during parsing "
+ getContainerType()
+ _scanner.input_position();
throw new IonException(message);
}
if (_nesting_parent != null && getDepth() == 0) {
state_after_scalar = STATE_EOF;
}
return state_after_scalar;
}
private final int get_state_after_annotation() {
int state_after_annotation;
switch(get_state_int()) {
case STATE_AFTER_VALUE_CONTENTS:
IonType container = top_state();
switch(container) {
case STRUCT:
case LIST:
case DATAGRAM:
state_after_annotation = STATE_BEFORE_VALUE_CONTENT;
break;
case SEXP:
state_after_annotation = STATE_BEFORE_VALUE_CONTENT_SEXP;
break;
default:
String message = "invalid container type encountered during parsing "
+ container
+ _scanner.input_position();
throw new IonException(message);
}
break;
case STATE_BEFORE_ANNOTATION_DATAGRAM:
case STATE_BEFORE_ANNOTATION_CONTAINED:
state_after_annotation = STATE_BEFORE_VALUE_CONTENT;
break;
case STATE_BEFORE_ANNOTATION_SEXP:
state_after_annotation = STATE_BEFORE_VALUE_CONTENT_SEXP;
break;
default:
String message = "invalid state encountered during parsing before the value "
+ get_state_name()
+ _scanner.input_position();
throw new IonException(message);
}
return state_after_annotation;
}
private final int get_state_after_container() {
IonType container = top_state();
int new_state = get_state_after_container(container);
return new_state;
}
private final int get_state_after_container(int token) {
IonType container = top_state();
switch(container) {
case STRUCT:
check_container_close(container, TOKEN_CLOSE_BRACE, token);
break;
case LIST:
check_container_close(container, TOKEN_CLOSE_SQUARE, token);
break;
case SEXP:
check_container_close(container, TOKEN_CLOSE_PAREN, token);
break;
case DATAGRAM:
// We shouldn't get here. Fall through.
default:
String message = "invalid container type encountered during parsing "
+ container
+ _scanner.input_position();
throw new IonException(message);
}
int new_state = get_state_after_container(container);
return new_state;
}
private final int get_state_after_container(IonType container) {
int new_state;
if (container == null) {
new_state = STATE_BEFORE_ANNOTATION_DATAGRAM;
}
else {
switch(container) {
case STRUCT:
case LIST:
new_state = STATE_AFTER_VALUE_CONTENTS;
break;
case SEXP:
new_state = STATE_BEFORE_ANNOTATION_SEXP;
break;
case DATAGRAM:
new_state = STATE_BEFORE_ANNOTATION_DATAGRAM;
break;
default:
String message = "invalid container type encountered during parsing "
+ container
+ _scanner.input_position();
throw new IonException(message);
}
if (_nesting_parent != null && getDepth() == 0) {
new_state = STATE_EOF;
}
}
return new_state;
}
private final void check_container_close(IonType container, int expectedToken, int actualToken)
{
if (actualToken != expectedToken) {
String message = container.toString().toLowerCase() + " closed by "
+ IonTokenConstsX.describeToken(actualToken)
+ _scanner.input_position();
throw new IonException(message);
}
}
private final int get_state_at_container_start(IonType container) {
int new_state;
if (container == null) {
new_state = STATE_BEFORE_ANNOTATION_DATAGRAM;
}
else {
switch (container) {
case STRUCT:
new_state = STATE_BEFORE_FIELD_NAME;
break;
case LIST:
new_state = STATE_BEFORE_ANNOTATION_CONTAINED;
break;
case SEXP:
new_state = STATE_BEFORE_ANNOTATION_SEXP;
break;
case DATAGRAM:
new_state = STATE_BEFORE_ANNOTATION_DATAGRAM;
break;
default:
String message = "invalid container type encountered during parsing "
+ container
+ _scanner.input_position();
throw new IonException(message);
}
}
return new_state;
}
private final SymbolToken parseSymbolToken(String context,
StringBuilder sb,
int t)
throws IOException
{
String text;
int sid;
if (t == IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER) {
int kw = IonTokenConstsX.keyword(sb, 0, sb.length());
switch (kw) {
case IonTokenConstsX.KEYWORD_FALSE:
case IonTokenConstsX.KEYWORD_TRUE:
case IonTokenConstsX.KEYWORD_NULL:
case IonTokenConstsX.KEYWORD_NAN:
// keywords are not ok unless they're quoted
String reason =
"Cannot use unquoted keyword " +
sb.toString() + " as " + context;
parse_error(reason);
case IonTokenConstsX.KEYWORD_sid:
text = null;
sid = IonTokenConstsX.decodeSid(sb);
break;
default:
text = sb.toString();
sid = UNKNOWN_SYMBOL_ID;
break;
}
}
else {
text = sb.toString();
sid = UNKNOWN_SYMBOL_ID;
}
return new SymbolTokenImpl(text, sid);
}
protected final void parse_to_next_value() throws IOException
{
int t;
int action, temp_state;
boolean trailing_whitespace = false; // TODO: there's a better way to do this
StringBuilder sb;
// FIXME: check depth and type before doing anything further
// if we're on a collection and at the correct depth
// we need to skip over the contents of the collection
// before doing any more parsing
// we'll need a token to get started here
// we'll also remember where we were when we started if the
// user later wants to get a span over this value. In the
// case where we just before a comma, after the comma we'll
// reset this offset since for the span the comma isn't part
// of the span when it's hoisted
_value_start_offset = _scanner.getStartingOffset();
_value_start_line = _scanner.getLineNumber();
_value_start_column = _scanner.getLineOffset();
t = _scanner.nextToken();
for (;;) {
int idx = get_state_int() * IonTokenConstsX.TOKEN_count + t;
action = TransitionActions2[idx];
// this used to be (but the 2d array is 9072ms vs 8786ms
// timing, 3% of total file parse time!):
// action = TransitionActions[get_state_int()][t];
switch (action) {
case ACTION_NOT_DEFINED:
{
// TODO why would we get here?
boolean span_eof = false;
if (_nesting_parent != null) {
switch (_nesting_parent) {
case LIST:
if (t == IonTokenConstsX.TOKEN_CLOSE_SQUARE) {
span_eof = true;
}
break;
case SEXP:
if (t == IonTokenConstsX.TOKEN_CLOSE_PAREN){
span_eof = true;
}
break;
case STRUCT:
if (t == IonTokenConstsX.TOKEN_CLOSE_BRACE) {
span_eof = true;
}
break;
default:
break;
}
}
if (span_eof != true) {
String message = "invalid syntax [state:"
+ get_state_name()
+ " on token:"
+IonTokenConstsX.getTokenName(t)
+"]";
parse_error(message);
}
set_state(STATE_EOF);
_eof = true;
return;
}
case ACTION_EOF:
set_state(STATE_EOF);
_eof = true;
return;
case ACTION_LOAD_FIELD_NAME:
{
if (!is_in_struct_internal()) {
throw new IllegalStateException("field names have to be in structs");
}
//finish_value(_current_value_save_point);
finish_and_save_value();
sb = token_contents_load(t);
SymbolToken sym = parseSymbolToken("a field name", sb, t);
set_fieldname(sym);
clear_current_value_buffer();
t = _scanner.nextToken();
if (t != IonTokenConstsX.TOKEN_COLON) {
String message = "field name must be followed by a colon, not a "
+ IonTokenConstsX.getTokenName(t);
parse_error(message);
}
_scanner.tokenIsFinished();
set_state(STATE_BEFORE_ANNOTATION_CONTAINED);
t = _scanner.nextToken();
break;
}
case ACTION_LOAD_ANNOTATION:
{
sb = token_contents_load(t);
trailing_whitespace = _scanner.skip_whitespace();
if (!_scanner.skipDoubleColon()) {
// unnecessary: set_current_value(sp);
// this will "loop around" to ACTION_LOAD_SCALAR
// since this is necessarily a symbol of one
// sort of another
temp_state = get_state_after_annotation();
set_state(temp_state);
break;
}
// We have an annotation!
SymbolToken sym = parseSymbolToken("an annotation", sb, t);
append_annotation(sym);
clear_current_value_buffer();
// Consumed the annotation, move on.
// note: that peekDoubleColon() consumed the two colons
// so nextToken won't see them
t = _scanner.nextToken();
switch(t) {
case IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER:
case IonTokenConstsX.TOKEN_SYMBOL_QUOTED:
// This may be another annotation, so stay in this state
// and come around the horn again to check it out.
break;
default:
// we leave the error handling to the transition
temp_state = get_state_after_annotation();
set_state(temp_state);
break;
}
break;
}
case ACTION_START_STRUCT:
_value_type = IonType.STRUCT;
temp_state = STATE_BEFORE_FIELD_NAME;
set_state(temp_state);
return;
case ACTION_START_LIST:
_value_type = IonType.LIST;
temp_state = STATE_BEFORE_ANNOTATION_CONTAINED;
set_state(temp_state);
return;
case ACTION_START_SEXP:
_value_type = IonType.SEXP;
temp_state = STATE_BEFORE_ANNOTATION_SEXP;
set_state(temp_state);
return;
case ACTION_START_LOB:
switch (_scanner.peekLobStartPunctuation()) {
case IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE:
set_state(STATE_IN_CLOB_DOUBLE_QUOTED_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE;
_value_type = IonType.CLOB;
break;
case IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE:
set_state(STATE_IN_CLOB_TRIPLE_QUOTED_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE;
_value_type = IonType.CLOB;
break;
default:
set_state(STATE_IN_BLOB_CONTENT);
_lob_token = IonTokenConstsX.TOKEN_OPEN_DOUBLE_BRACE;
_value_type = IonType.BLOB;
break;
}
return;
case ACTION_LOAD_SCALAR:
if (t == IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER) {
sb = token_contents_load(t);
_value_keyword = IonTokenConstsX.keyword(sb, 0, sb.length());
switch (_value_keyword) {
case IonTokenConstsX.KEYWORD_NULL:
{
int kwt = trailing_whitespace ? IonTokenConstsX.KEYWORD_none : _scanner.peekNullTypeSymbol();
switch (kwt) {
case IonTokenConstsX.KEYWORD_NULL: _null_type = IonType.NULL; break;
case IonTokenConstsX.KEYWORD_BOOL: _null_type = IonType.BOOL; break;
case IonTokenConstsX.KEYWORD_INT: _null_type = IonType.INT; break;
case IonTokenConstsX.KEYWORD_FLOAT: _null_type = IonType.FLOAT; break;
case IonTokenConstsX.KEYWORD_DECIMAL: _null_type = IonType.DECIMAL; break;
case IonTokenConstsX.KEYWORD_TIMESTAMP: _null_type = IonType.TIMESTAMP; break;
case IonTokenConstsX.KEYWORD_SYMBOL: _null_type = IonType.SYMBOL; break;
case IonTokenConstsX.KEYWORD_STRING: _null_type = IonType.STRING; break;
case IonTokenConstsX.KEYWORD_BLOB: _null_type = IonType.BLOB; break;
case IonTokenConstsX.KEYWORD_CLOB: _null_type = IonType.CLOB; break;
case IonTokenConstsX.KEYWORD_LIST: _null_type = IonType.LIST; break;
case IonTokenConstsX.KEYWORD_SEXP: _null_type = IonType.SEXP; break;
case IonTokenConstsX.KEYWORD_STRUCT: _null_type = IonType.STRUCT; break;
case IonTokenConstsX.KEYWORD_none: _null_type = IonType.NULL; break; // this happens when there isn't a '.' otherwise peek throws the error or returns none
default: parse_error("invalid keyword id ("+kwt+") encountered while parsing a null");
}
// at this point we've consumed a dot '.' and it's preceding whitespace
// clear_value();
current_value_is_null(_null_type);
// set to null_type in above call: _value_type = IonType.NULL;
break;
}
case IonTokenConstsX.KEYWORD_TRUE:
_value_type = IonType.BOOL;
current_value_is_bool(true);
break;
case IonTokenConstsX.KEYWORD_FALSE:
_value_type = IonType.BOOL;
current_value_is_bool(false);
break;
case IonTokenConstsX.KEYWORD_NAN:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.NaN);
_v.setAuthoritativeType(AS_TYPE.double_value);
break;
case IonTokenConstsX.KEYWORD_sid:
{
int sid = IonTokenConstsX.decodeSid(sb);
_v.setValue(sid);
_v.setAuthoritativeType(AS_TYPE.int_value);
}
default:
// We don't care about any other 'keywords'
_value_type = IonType.SYMBOL;
break;
}
}
else if (t == IonTokenConstsX.TOKEN_DOT) {
_value_type = IonType.SYMBOL;
clear_current_value_buffer();
_v.setValue(".");
_v.setAuthoritativeType(AS_TYPE.string_value);
}
else {
// if it's not a symbol we just look at the token type
_value_type = IonTokenConstsX.ion_type_of_scalar(t);
}
int state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_PLUS_INF:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.POSITIVE_INFINITY);
_v.setAuthoritativeType(AS_TYPE.double_value);
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_MINUS_INF:
_value_type = IonType.FLOAT;
clear_current_value_buffer();
_v.setValue(Double.NEGATIVE_INFINITY);
_v.setAuthoritativeType(AS_TYPE.double_value);
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_EAT_COMMA:
if (_container_prohibits_commas) {
parse_error("commas aren't used to separate values in "+getContainerType().toString());
}
int new_state = STATE_BEFORE_ANNOTATION_CONTAINED;
if (_container_is_struct) {
new_state = STATE_BEFORE_FIELD_NAME;
}
set_state(new_state);
_scanner.tokenIsFinished();
// when we eat a comma we need to reset the current
// value start used to define a span, since the comma
// isn't part of the span when it's hoisted
_value_start_offset = _scanner.getStartingOffset();
t = _scanner.nextToken();
break;
case ACTION_FINISH_CONTAINER:
new_state = get_state_after_container(t);
set_state(new_state);
_eof = true;
return;
case ACTION_FINISH_LOB:
state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
return;
case ACTION_FINISH_DATAGRAM:
if (getDepth() != 0) {
parse_error("state failure end of datagram encounterd with a non-container stack");
}
set_state(STATE_EOF);
_eof = true;
return;
default: parse_error("unexpected token encountered: "+IonTokenConstsX.getTokenName(t));
}
}
}
protected final StringBuilder token_contents_load(int token_type) throws IOException
{
StringBuilder sb = _current_value_buffer;
boolean clob_chars_only;
int c;
if (_current_value_buffer_loaded) {
return sb;
}
else if (_current_value_save_point_loaded) {
assert(!_scanner.isUnfinishedToken() && !_current_value_save_point.isClear());
// _scanner.load_save_point_contents( _current_value_save_point, sb);
_scanner.save_point_activate(_current_value_save_point);
switch (token_type) {
default:
_scanner.load_raw_characters(sb);
break;
case IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER:
_scanner.load_symbol_identifier(sb);
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_SYMBOL_OPERATOR:
_scanner.load_symbol_operator(sb);
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_SYMBOL_QUOTED:
clob_chars_only = (IonType.CLOB == _value_type);
_scanner.load_single_quoted_string(sb, clob_chars_only);
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE:
clob_chars_only = (IonType.CLOB == _value_type);
_scanner.load_double_quoted_string(sb, clob_chars_only);
_value_type = IonType.STRING;
break;
case IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE:
clob_chars_only = (IonType.CLOB == _value_type);
_scanner.load_triple_quoted_string(sb, clob_chars_only);
_value_type = IonType.STRING;
break;
}
_scanner.save_point_deactivate(_current_value_save_point);
_current_value_buffer_loaded = true;
}
else {
_scanner.save_point_start(_current_value_save_point);
switch (token_type) {
case IonTokenConstsX.TOKEN_UNKNOWN_NUMERIC:
case IonTokenConstsX.TOKEN_INT:
case IonTokenConstsX.TOKEN_BINARY:
case IonTokenConstsX.TOKEN_HEX:
case IonTokenConstsX.TOKEN_FLOAT:
case IonTokenConstsX.TOKEN_DECIMAL:
case IonTokenConstsX.TOKEN_TIMESTAMP:
_value_type = _scanner.load_number(sb);
break;
case IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER:
_scanner.load_symbol_identifier(sb);
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_SYMBOL_OPERATOR:
_scanner.load_symbol_operator(sb);
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_SYMBOL_QUOTED:
clob_chars_only = (IonType.CLOB == _value_type);
c = _scanner.load_single_quoted_string(sb, clob_chars_only);
if (c == UnifiedInputStreamX.EOF) {
//String message = "EOF encountered before closing single quote";
//parse_error(message);
_scanner.unexpected_eof();
}
_value_type = IonType.SYMBOL;
break;
case IonTokenConstsX.TOKEN_STRING_DOUBLE_QUOTE:
clob_chars_only = (IonType.CLOB == _value_type);
c = _scanner.load_double_quoted_string(sb, clob_chars_only);
if (c == UnifiedInputStreamX.EOF) {
// String message = "EOF encountered before closing single quote";
// parse_error(message);
_scanner.unexpected_eof();
}
_value_type = IonType.STRING;
break;
case IonTokenConstsX.TOKEN_STRING_TRIPLE_QUOTE:
clob_chars_only = (IonType.CLOB == _value_type);
c = _scanner.load_triple_quoted_string(sb, clob_chars_only);
if (c == UnifiedInputStreamX.EOF) {
//String message = "EOF encountered before closing single quote";
//parse_error(message);
_scanner.unexpected_eof();
}
_value_type = IonType.STRING;
break;
default:
String message = "unexpected token "
+ IonTokenConstsX.getTokenName(token_type)
+ " encountered";
throw new IonException(message);
}
_current_value_save_point.markEnd();
_current_value_save_point_loaded = true;
_current_value_buffer_loaded = true;
tokenValueIsFinished();
}
return sb;
}
/**
* called by super classes to tell us that the
* current token has been consumed.
*/
protected void tokenValueIsFinished()
{
_scanner.tokenIsFinished();
if (IonType.BLOB.equals(_value_type) || IonType.CLOB.equals(_value_type))
{
int state_after_scalar = get_state_after_value();
set_state(state_after_scalar);
}
}
private final void push_container_state(IonType newContainer)
{
int oldlen = _container_state_stack.length;
if (_container_state_top >= oldlen) {
int newlen = oldlen * 2;
IonType[] temp = new IonType[newlen];
System.arraycopy(_container_state_stack, 0, temp, 0, oldlen);
_container_state_stack = temp;
}
set_container_flags(newContainer);
_container_state_stack[_container_state_top++] = newContainer;
}
private final void pop_container_state() {
_container_state_top--;
set_container_flags(top_state());
_eof = false;
_has_next_called = false;
int new_state = get_state_after_container();
set_state(new_state);
}
private final IonType top_state() {
int top = _container_state_top - 1;
IonType top_container = _container_state_stack[top];
return top_container;
}
public IonType getType()
{
return _value_type;
}
// externally we're if we're in a hoisted struct
// we're not really in a struct, we at the top level
public boolean isInStruct()
{
boolean in_struct = false;
IonType container = getContainerType();
if (IonType.STRUCT.equals(container)) {
if (getDepth() > 0) {
in_struct = true;
}
else {
assert(IonType.STRUCT.equals(_nesting_parent) == true);
}
}
return in_struct;
}
// internally (really only in parse_to_next()) we care
// about being in a struct even if it's a hoisted container
// since the hoisted values will still have a field name we
// have to ignore
private boolean is_in_struct_internal()
{
boolean in_struct = false;
IonType container = getContainerType();
if (IonType.STRUCT.equals(container)) {
in_struct = true;
}
return in_struct;
}
public IonType getContainerType()
{
if (_container_state_top == 0) return IonType.DATAGRAM;
return _container_state_stack[_container_state_top - 1];
}
public int getDepth()
{
int depth = _container_state_top;
if (depth > 0) {
int debugging_depth = depth;
IonType top_type = _container_state_stack[0];
if (_nesting_parent == null) {
if (IonType.DATAGRAM.equals(top_type)) {
depth--;
}
}
else {
if (_nesting_parent.equals(top_type)) {
depth--;
}
}
if (depth == debugging_depth) {
System.err.println("so here's a case where we didn't subtract 1");
}
}
return depth;
}
public String getFieldName()
{
// For hoisting
if (getDepth() == 0 && is_in_struct_internal()) return null;
String name = _field_name;
if (name == null && _field_name_sid > 0)
{
throw new UnknownSymbolException(_field_name_sid);
}
return name;
}
final String getRawFieldName()
{
// For hoisting
if (getDepth() == 0 && is_in_struct_internal()) return null;
return _field_name;
}
public int getFieldId()
{
// For hoisting
if (getDepth() == 0 && is_in_struct_internal()) return UNKNOWN_SYMBOL_ID;
return _field_name_sid;
}
public SymbolToken getFieldNameSymbol()
{
// For hoisting
if (getDepth() == 0 && is_in_struct_internal()) return null;
String name = _field_name;
int sid = getFieldId();
if (name == null && sid == UNKNOWN_SYMBOL_ID) return null;
return new SymbolTokenImpl(name, sid);
}
public Iterator iterateTypeAnnotations()
{
return _Private_Utils.stringIterator(getTypeAnnotations());
}
public void stepIn()
{
if (_value_type == null || _eof) {
throw new IllegalStateException();
}
switch (_value_type) {
case STRUCT:
case LIST:
case SEXP:
break;
default:
throw new IllegalStateException("Unexpected value type: " + _value_type);
}
int new_state = get_state_at_container_start(_value_type);
set_state(new_state);
push_container_state(_value_type);
_scanner.tokenIsFinished();
try {
finish_value(null);
}
catch (IOException e) {
throw new IonException(e);
}
if (_v.isNull()) {
_eof = true;
_has_next_called = true; // there are no contents in a null container
}
_value_type = null;
if (_debug) System.out.println("stepInto() new depth: "+getDepth());
}
public void stepOut()
{
if (getDepth() < 1) {
throw new IllegalStateException(IonMessages.CANNOT_STEP_OUT);
}
try {
finish_value(null);
switch (getContainerType()) {
case STRUCT:
if (!_eof) _scanner.skip_over_struct();
break;
case LIST:
if (!_eof) _scanner.skip_over_list();
break;
case SEXP:
if (!_eof) _scanner.skip_over_sexp();
break;
case DATAGRAM:
break;
default:
throw new IllegalStateException("Unexpected value type: " + _value_type);
}
}
catch (IOException e) {
throw new IonException(e);
}
pop_container_state();
_scanner.tokenIsFinished();
try {
finish_value(null);
}
catch (IOException e) {
throw new IonException(e);
}
clear_value();
if (_debug) System.out.println("stepOUT() new depth: "+getDepth());
}
//
// symbol related code that is inactive in this parser
//
public SymbolTable getSymbolTable()
{
return null;
}
//
// helper classes
//
public static class IonReaderTextParsingException extends IonException {
private static final long serialVersionUID = 1L;
IonReaderTextParsingException(String msg) {
super(msg);
}
IonReaderTextParsingException(Exception e) {
super(e);
}
IonReaderTextParsingException(String msg, Exception e) {
super(msg, e);
}
}
protected final void parse_error(String reason) {
String message =
"Syntax error"
+ _scanner.input_position()
+ ": "
+ reason;
throw new IonReaderTextParsingException(message);
}
protected final void parse_error(Exception e) {
String message =
"Syntax error at "
+ _scanner.input_position()
+ ": "
+ e.getLocalizedMessage();
throw new IonReaderTextParsingException(message, e);
}
}