com.threecrickets.jvm.json.nashorn.util.NashornTokener Maven / Gradle / Ivy
/**
* Based on code from JSON.org, modified to support Rhino. The original code
* came with the following notice:
*
* Copyright (c) 2002 JSON.org
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* The Software shall be used for Good, not Evil.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.threecrickets.jvm.json.nashorn.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import com.threecrickets.jvm.json.JsonSyntaxError;
import jdk.nashorn.internal.objects.NativeArray;
import jdk.nashorn.internal.runtime.ScriptObject;
/**
* A tokener takes a source string and extracts characters and tokens from it.
*/
public class NashornTokener
{
//
// Construction
//
/**
* Construct a tokener from a reader.
*
* @param reader
* A reader.
*/
public NashornTokener( Reader reader )
{
this.reader = reader.markSupported() ? reader : new BufferedReader( reader );
this.eof = false;
this.usePrevious = false;
this.previous = 0;
this.index = 0;
this.character = 1;
this.line = 1;
}
/**
* Construct a tokener from a string.
*
* @param s
* A source string.
*/
public NashornTokener( String s )
{
this( new StringReader( s ) );
}
//
// Attributes
//
public boolean end()
{
return eof && !usePrevious;
}
/**
* Determine if the source string still contains characters that next() can
* consume.
*
* @return true if not yet at the end of the source.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public boolean more() throws JsonSyntaxError
{
next();
if( end() )
{
return false;
}
back();
return true;
}
//
// Operations
//
/**
* Back up one character. This provides a sort of lookahead capability, so
* that you can test for a digit or letter before attempting to parse the
* next number or identifier.
*
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public void back() throws JsonSyntaxError
{
if( usePrevious || index <= 0 )
{
throw new JsonSyntaxError( "Stepping back two steps is not supported", line, character );
}
this.index -= 1;
this.character -= 1;
this.usePrevious = true;
this.eof = false;
}
/**
* Get the next character in the source string.
*
* @return The next character, or 0 if past the end of the source string.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public char next() throws JsonSyntaxError
{
int c;
if( this.usePrevious )
{
this.usePrevious = false;
c = this.previous;
}
else
{
try
{
c = this.reader.read();
}
catch( IOException exception )
{
throw new JsonSyntaxError( exception.getMessage(), line, character );
}
if( c <= 0 )
{ // End of stream
this.eof = true;
c = 0;
}
}
this.index += 1;
if( this.previous == '\r' )
{
this.line += 1;
this.character = c == '\n' ? 0 : 1;
}
else if( c == '\n' )
{
this.line += 1;
this.character = 0;
}
else
{
this.character += 1;
}
this.previous = (char) c;
return this.previous;
}
/**
* Consume the next character, and check that it matches a specified
* character.
*
* @param c
* The character to match.
* @return The character.
* @throws JsonSyntaxError
* if the character does not match.
*/
public char next( char c ) throws JsonSyntaxError
{
char n = next();
if( n != c )
{
throw syntaxError( "Expected '" + c + "' and instead saw '" + n + "'" );
}
return n;
}
/**
* Get the next n characters.
*
* @param n
* The number of characters to take.
* @return A string of n characters.
* @throws JsonSyntaxError
* Substring bounds error if there are not n characters remaining in
* the source string.
*/
public String next( int n ) throws JsonSyntaxError
{
if( n == 0 )
{
return "";
}
char[] buffer = new char[n];
int pos = 0;
while( pos < n )
{
buffer[pos] = next();
if( end() )
{
throw syntaxError( "Substring bounds error" );
}
pos += 1;
}
return new String( buffer );
}
/**
* Get the next char in the string, skipping whitespace.
*
* @return A character, or 0 if there are no more characters.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public char nextClean() throws JsonSyntaxError
{
for( ;; )
{
char c = next();
if( c == 0 || c > ' ' )
{
return c;
}
}
}
/**
* Return the characters up to the next close quote character. Backslash
* processing is done. The formal JSON format does not allow strings in
* single quotes, but an implementation is allowed to accept them.
*
* @param quote
* The quoting character, either "
(double
* quote) or '
(single
* quote).
* @return A String.
* @throws JsonSyntaxError
* Unterminated string.
*/
public String nextString( char quote ) throws JsonSyntaxError
{
char c;
StringBuffer sb = new StringBuffer();
for( ;; )
{
c = next();
switch( c )
{
case 0:
case '\n':
case '\r':
throw syntaxError( "Unterminated string" );
case '\\':
c = next();
switch( c )
{
case 'b':
sb.append( '\b' );
break;
case 't':
sb.append( '\t' );
break;
case 'n':
sb.append( '\n' );
break;
case 'f':
sb.append( '\f' );
break;
case 'r':
sb.append( '\r' );
break;
case 'u':
sb.append( (char) Integer.parseInt( next( 4 ), 16 ) );
break;
case '"':
case '\'':
case '\\':
case '/':
sb.append( c );
break;
default:
throw syntaxError( "Illegal escape." );
}
break;
default:
if( c == quote )
{
return sb.toString();
}
sb.append( c );
}
}
}
/**
* Get the text up but not including the specified character or the end of
* line, whichever comes first.
*
* @param d
* A delimiter character.
* @return A string.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public String nextTo( char d ) throws JsonSyntaxError
{
StringBuffer sb = new StringBuffer();
for( ;; )
{
char c = next();
if( c == d || c == 0 || c == '\n' || c == '\r' )
{
if( c != 0 )
{
back();
}
return sb.toString().trim();
}
sb.append( c );
}
}
/**
* Get the text up but not including one of the specified delimiter
* characters or the end of line, whichever comes first.
*
* @param delimiters
* A set of delimiter characters.
* @return A string, trimmed.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public String nextTo( String delimiters ) throws JsonSyntaxError
{
char c;
StringBuffer sb = new StringBuffer();
for( ;; )
{
c = next();
if( delimiters.indexOf( c ) >= 0 || c == 0 || c == '\n' || c == '\r' )
{
if( c != 0 )
{
back();
}
return sb.toString().trim();
}
sb.append( c );
}
}
/**
* Get the next value. The value can be a Boolean, Double, Integer,
* JSONArray, JSONObject, Long, or String, or the JSONObject.NULL object.
*
* @return An object.
* @throws JsonSyntaxError
* If syntax error.
*/
public Object nextValue() throws JsonSyntaxError
{
char c = nextClean();
String s;
switch( c )
{
case '"':
case '\'':
return nextString( c );
case '{':
back();
return createNativeObject();
case '[':
case '(':
back();
return createNativeArray();
}
/*
* Handle unquoted text. This could be the values true, false, or null,
* or it can be a number. An implementation (such as this one) is
* allowed to also accept non-standard forms. Accumulate characters
* until we reach the end of the text or a formatting character.
*/
StringBuffer sb = new StringBuffer();
while( c >= ' ' && ",:]}/\\\"[{;=#".indexOf( c ) < 0 )
{
sb.append( c );
c = next();
}
back();
s = sb.toString().trim();
if( s.equals( "" ) )
{
throw syntaxError( "Missing value" );
}
return stringToValue( s );
}
/**
* Skip characters until the next character is the requested character. If
* the requested character is not found, no characters are skipped.
*
* @param to
* A character to skip to.
* @return The requested character, or zero if the requested character is
* not found.
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public char skipTo( char to ) throws JsonSyntaxError
{
char c;
try
{
int startIndex = this.index;
int startCharacter = this.character;
int startLine = this.line;
reader.mark( Integer.MAX_VALUE );
do
{
c = next();
if( c == 0 )
{
reader.reset();
this.index = startIndex;
this.character = startCharacter;
this.line = startLine;
return c;
}
}
while( c != to );
}
catch( IOException exc )
{
throw new JsonSyntaxError( exc.getMessage(), line, character );
}
back();
return c;
}
/**
* Make a JSONException to signal a syntax error.
*
* @param message
* The error message.
* @return A JSONException object, suitable for throwing
*/
public JsonSyntaxError syntaxError( String message )
{
return new JsonSyntaxError( message + toString(), line, character );
}
/**
* Create a native Rhino object as appropriate.
*
* @return A NativeObject or a NativeArray
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public Object createNative() throws JsonSyntaxError
{
char next = nextClean();
if( next == '{' )
{
back();
return createNativeObject();
}
else if( next == '[' )
{
back();
return createNativeArray();
}
else
{
throw syntaxError( "JSON text must begin with either a '{' or a '['" );
}
}
/**
* Create a Rhino NativeObject.
*
* @return A NativeObject
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public ScriptObject createNativeObject() throws JsonSyntaxError
{
ScriptObject scriptObject = NashornNativeUtil.newObject();
char c;
String key;
if( nextClean() != '{' )
{
throw syntaxError( "A JSON object text must begin with '{'" );
}
for( ;; )
{
c = nextClean();
switch( c )
{
case 0:
throw syntaxError( "A JSON object text must end with '}'" );
case '}':
return scriptObject;
default:
back();
key = nextValue().toString();
}
/*
* The key is followed by ':'. We will also tolerate '=' or '=>'.
*/
c = nextClean();
if( c == '=' )
{
if( next() != '>' )
{
back();
}
}
else if( c != ':' )
{
throw syntaxError( "Expected a ':' after a key" );
}
scriptObject.put( key, nextValue(), true );
/*
* Pairs are separated by ','. We will also tolerate ';'.
*/
switch( nextClean() )
{
case ';':
case ',':
if( nextClean() == '}' )
{
return scriptObject;
}
back();
break;
case '}':
return scriptObject;
default:
throw syntaxError( "Expected a ',' or '}'" );
}
}
}
/**
* Create a Rhino NativeArray.
*
* @return A NativeArray
* @throws JsonSyntaxError
* In case of a JSON conversion error
*/
public ScriptObject createNativeArray() throws JsonSyntaxError
{
NativeArray nativeArray = NashornNativeUtil.newArray( 0 );
int arrayIndex = 0;
char c = nextClean();
char q;
if( c == '[' )
{
q = ']';
}
else if( c == '(' )
{
q = ')';
}
else
{
throw syntaxError( "A JSON array text must start with '['" );
}
if( nextClean() == ']' )
{
return nativeArray;
}
back();
for( ;; )
{
if( nextClean() == ',' )
{
back();
nativeArray.set( arrayIndex++, null, 0 );
}
else
{
back();
nativeArray.set( arrayIndex++, nextValue(), 0 );
}
c = nextClean();
switch( c )
{
case ';':
case ',':
if( nextClean() == ']' )
{
return nativeArray;
}
back();
break;
case ']':
case ')':
if( q != c )
{
throw syntaxError( "Expected a '" + new Character( q ) + "'" );
}
return nativeArray;
default:
throw syntaxError( "Expected a ',' or ']'" );
}
}
}
//
// Object
//
/**
* Make a printable string of this JSONTokener.
*
* @return " at {index} [character {character} line {line}]"
*/
@Override
public String toString()
{
return " at " + index + " [character " + this.character + " line " + this.line + "]";
}
// //////////////////////////////////////////////////////////////////////////
// Private
private int character;
private boolean eof;
private int index;
private int line;
private char previous;
private Reader reader;
private boolean usePrevious;
private static Object stringToValue( String s )
{
if( s.equals( "" ) )
{
return NashornNativeUtil.to( s );
}
if( s.equalsIgnoreCase( "true" ) )
{
return Boolean.TRUE;
}
if( s.equalsIgnoreCase( "false" ) )
{
return Boolean.FALSE;
}
if( s.equalsIgnoreCase( "null" ) )
{
return null;
}
/*
* If it might be a number, try converting it. We support the
* non-standard 0x- convention. If a number cannot be produced, then the
* value will just be a string. Note that the 0x-, plus, and implied
* string conventions are non-standard. A JSON parser may accept
* non-JSON forms as long as it accepts all correct JSON forms.
*/
char b = s.charAt( 0 );
if( ( b >= '0' && b <= '9' ) || b == '.' || b == '-' || b == '+' )
{
if( b == '0' && s.length() > 2 && ( s.charAt( 1 ) == 'x' || s.charAt( 1 ) == 'X' ) )
{
try
{
// Tal Liron's patch: allow for longs hexes! Why not?
Long myLong = Long.parseLong( s.substring( 2 ), 16 );
if( myLong.longValue() == myLong.intValue() )
{
return new Integer( myLong.intValue() );
}
else
{
return NashornNativeUtil.wrap( myLong );
}
}
catch( Exception ignore )
{
}
}
try
{
if( s.indexOf( '.' ) > -1 || s.indexOf( 'e' ) > -1 || s.indexOf( 'E' ) > -1 )
{
return Double.valueOf( s );
}
else
{
Long myLong = new Long( s );
if( myLong.longValue() == myLong.intValue() )
{
return new Integer( myLong.intValue() );
}
else
{
return NashornNativeUtil.wrap( myLong );
}
}
}
catch( Exception ignore )
{
}
}
return NashornNativeUtil.to( s );
}
}