com.amazon.ion.impl.IonReaderTextUserX Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ion-java Show documentation
Show all versions of ion-java Show documentation
A Java implementation of the Amazon Ion data notation.
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.ion.impl;
import static com.amazon.ion.SystemSymbols.ION_1_0;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE;
import com.amazon.ion.IonCatalog;
import com.amazon.ion.IonType;
import com.amazon.ion.OffsetSpan;
import com.amazon.ion.SeekableReader;
import com.amazon.ion.Span;
import com.amazon.ion.SpanProvider;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.TextSpan;
import com.amazon.ion.UnknownSymbolException;
import com.amazon.ion.UnsupportedIonVersionException;
import java.util.regex.Pattern;
/**
* The text user reader add support for symbols and recognizes,
* and consumes (and processes), the system values $ion_1_0 and
* local symbol tables (tagged with $ion_symbol_table).
*
* Should this materialize and "symbolate" all the symbol
* values as they come through? - No.
*
* Probably if we want the symbol id's to be the same for this
* reader as it is for other variants. Hmmm, that's expensive
* when you don't need it (which is most of the time).
*
* This will not auto-populate a symbol table. In the event
* a symbol is a '$' symbol id symbol this will return
* that value. If the string is present in the current symbol
* table it will return the id, which would be true if the
* symbol is a system symbol or if there is a local symbol
* table in the input stream. Otherwise it return the
* undefined symbol value.
*
*/
class IonReaderTextUserX
extends IonReaderTextSystemX
implements _Private_ReaderWriter
{
private static final Pattern ION_VERSION_MARKER_REGEX = Pattern.compile("^\\$ion_[0-9]+_[0-9]+$");
/**
* This is the physical start-of-stream offset when this reader was created.
* It must be subtracted from the logical offsets exposed by
* {@link OffsetSpan}s.
*/
private final int _physical_start_offset;
private final _Private_LocalSymbolTableFactory _lstFactory;
// IonSystem _system; now in IonReaderTextSystemX where it could be null
IonCatalog _catalog;
SymbolTable _symbols;
protected IonReaderTextUserX(IonCatalog catalog,
_Private_LocalSymbolTableFactory lstFactory,
UnifiedInputStreamX uis,
int physicalStartOffset)
{
super(uis);
_symbols = _system_symtab;
_physical_start_offset = physicalStartOffset;
_catalog = catalog;
_lstFactory = lstFactory;
}
protected IonReaderTextUserX(IonCatalog catalog,
_Private_LocalSymbolTableFactory lstFactory,
UnifiedInputStreamX uis) {
this(catalog, lstFactory, uis, 0);
}
/**
* this looks forward to see if there is an upcoming value
* and if there is it returns true. It may have to clean up
* any value that's partially complete (for example a
* collection whose annotation has been read and loaded
* but the use has chosen not to step into the collection).
*
* The user reader variant of hasNext also looks for system
* values to process. System values are the Ion version
* marker ($ion_1_0) and local symbol tables. If either of
* these is encountered the symbol table processing will be
* handled and the value will be "skipped".
*
* @return true if more data remains, false on eof
*/
@Override
public boolean hasNext()
{
boolean has_next = has_next_user_value();
return has_next;
}
private final boolean has_next_user_value()
{
// clear out our previous value
clear_system_value_stack();
// changed to 'while' since consumed
// values will not be counted
while (!_has_next_called)
{
// first move to the next value regardless of whether
// it's a system value or a user value
has_next_raw_value();
// system values are only at the datagram level
// we don't care about them if they're buried
// down in some other value - note that _value_type
// will be null at eof and on as yet undetermined
// numeric types (which are never system values)
if (_value_type != null && !isNullValue() && IonType.DATAGRAM.equals(getContainerType())) {
switch (_value_type) {
case STRUCT:
if (_annotation_count > 0 && ION_SYMBOL_TABLE.equals(_annotations[0].getText())) {
_symbols = _lstFactory.newLocalSymtab(_catalog,
this,
true);
push_symbol_table(_symbols);
_has_next_called = false;
}
break;
case SYMBOL:
if (_annotation_count == 0)
{
// $ion_1_0 is read as an IVM only if it is not annotated
String version = symbolValue().getText();
if (isIonVersionMarker(version))
{
if (ION_1_0.equals(version))
{
if (_value_keyword != IonTokenConstsX.KEYWORD_sid)
{
symbol_table_reset();
push_symbol_table(_system_symtab);
}
_has_next_called = false;
}
else
{
throw new UnsupportedIonVersionException(version);
}
}
}
break;
default:
break;
}
}
}
return (!_eof);
}
private static boolean isIonVersionMarker(String text)
{
return text != null && ION_VERSION_MARKER_REGEX.matcher(text).matches();
}
private final void symbol_table_reset()
{
IonType t = next();
assert( IonType.SYMBOL.equals(t) );
_symbols = _system_symtab;
return;
}
private void validateSymbolToken(SymbolToken symbol) {
if (symbol != null) {
if (symbol.getText() == null && symbol.getSid() > getSymbolTable().getMaxId()) {
throw new UnknownSymbolException(symbol.getSid());
}
}
}
@Override
public SymbolToken[] getTypeAnnotationSymbols() {
SymbolToken[] annotations = super.getTypeAnnotationSymbols();
for (SymbolToken annotation : annotations) {
validateSymbolToken(annotation);
}
return annotations;
}
@Override
public final SymbolToken getFieldNameSymbol() {
SymbolToken fieldName = super.getFieldNameSymbol();
validateSymbolToken(fieldName);
return fieldName;
}
@Override
public final SymbolToken symbolValue() {
SymbolToken symbol = super.symbolValue();
validateSymbolToken(symbol);
return symbol;
}
@Override
public SymbolTable getSymbolTable()
{
return _symbols;
}
//
// This code handles the skipped symbol table
// support - it is cloned in IonReaderTreeUserX
// and IonReaderBinaryUserX
//
// SO ANY FIXES HERE WILL BE NEEDED IN THOSE
// TWO LOCATIONS AS WELL.
//
private int _symbol_table_top = 0;
private SymbolTable[] _symbol_table_stack = new SymbolTable[3]; // 3 is rare, IVM followed by a local sym tab with open content
private void clear_system_value_stack()
{
while (_symbol_table_top > 0) {
_symbol_table_top--;
_symbol_table_stack[_symbol_table_top] = null;
}
}
private void push_symbol_table(SymbolTable symbols)
{
assert(symbols != null);
if (_symbol_table_top >= _symbol_table_stack.length) {
int new_len = _symbol_table_stack.length * 2;
SymbolTable[] temp = new SymbolTable[new_len];
System.arraycopy(_symbol_table_stack, 0, temp, 0, _symbol_table_stack.length);
_symbol_table_stack = temp;
}
_symbol_table_stack[_symbol_table_top++] = symbols;
}
@Override
public SymbolTable pop_passed_symbol_table()
{
if (_symbol_table_top <= 0) {
return null;
}
_symbol_table_top--;
SymbolTable symbols = _symbol_table_stack[_symbol_table_top];
_symbol_table_stack[_symbol_table_top] = null;
return symbols;
}
private static final class IonReaderTextSpan
extends DowncastingFaceted
implements Span, TextSpan, OffsetSpan
{
private final UnifiedDataPageX _data_page;
private final IonType _container_type;
private final long _start_offset;
private final long _start_line;
private final long _start_column;
IonReaderTextSpan(IonReaderTextUserX reader)
{
// TODO: convert _start_char_offset from a long and data page
// to be an abstract reference into the Unified* data source
UnifiedInputStreamX current_stream = reader._scanner.getSourceStream();
//
// TODO: this page isn't safe, except where we have only a single
// page of buffered input Which is the case for the time
// being. Later, when this is stream aware, this needs to change.
_data_page = current_stream._buffer.getCurrentPage();
_container_type = reader.getContainerType();
_start_offset = reader._value_start_offset - reader._physical_start_offset;
_start_line = reader._value_start_line;
_start_column = reader._value_start_column;
}
public long getStartLine()
{
if (_start_line < 1) {
throw new IllegalStateException("not positioned on a reader");
}
return _start_line;
}
public long getStartColumn()
{
if (_start_column < 0) {
throw new IllegalStateException("not positioned on a reader");
}
return _start_column;
}
public long getFinishLine()
{
return -1;
}
public long getFinishColumn()
{
return -1;
}
public long getStartOffset()
{
return _start_offset;
}
public long getFinishOffset()
{
return -1;
}
IonType getContainerType() {
return _container_type;
}
UnifiedDataPageX getDataPage() {
return _data_page;
}
}
public Span currentSpanImpl()
{
if (getType() == null) {
throw new IllegalStateException("must be on a value");
}
IonReaderTextSpan pos = new IonReaderTextSpan(this);
return pos;
}
private void hoistImpl(Span span)
{
if (!(span instanceof IonReaderTextSpan)) {
throw new IllegalArgumentException("position must match the reader");
}
IonReaderTextSpan text_span = (IonReaderTextSpan)span;
UnifiedInputStreamX current_stream = _scanner.getSourceStream();
UnifiedDataPageX curr_page = text_span.getDataPage();
int array_offset = (int)text_span._start_offset + _physical_start_offset;
int page_limit = curr_page._page_limit;
int array_length = page_limit - array_offset;
// we're going to cast this value down. Since we only support
// in memory single buffered chars here this is ok.
assert(text_span.getStartOffset() <= Integer.MAX_VALUE);
// Now - create a new stream
// TODO: this is a pretty expensive way to do this. UnifiedInputStreamX
// needs to have a reset method added that can reset the position
// and length of the input to be some subset of the original source.
// This would avoid a lot of object creation (and wasted destruction.
// But this is a time-to-market solution here. The change can be
// made as support for streams is added.
UnifiedInputStreamX iis;
if (current_stream._is_byte_data) {
byte[] bytes = current_stream.getByteArray();
assert(bytes != null);
iis = UnifiedInputStreamX.makeStream(
bytes
, array_offset
, array_length
);
}
else {
char[] chars = current_stream.getCharArray();
assert(chars != null);
iis = UnifiedInputStreamX.makeStream(
chars
, array_offset
, array_length
);
}
IonType container = text_span.getContainerType();
re_init(iis, container, text_span._start_line, text_span._start_column);
}
//========================================================================
@Override
public T asFacet(Class facetType)
{
if (facetType == SpanProvider.class)
{
return facetType.cast(new SpanProviderFacet());
}
if (facetType == SeekableReader.class && _scanner.isBufferedInput())
{
return facetType.cast(new SeekableReaderFacet());
}
return super.asFacet(facetType);
}
private class SpanProviderFacet
implements SpanProvider
{
public Span currentSpan()
{
return currentSpanImpl();
}
}
private final class SeekableReaderFacet
extends SpanProviderFacet
implements SeekableReader
{
public void hoist(Span span)
{
hoistImpl(span);
}
}
}