org.apache.wink.json4j.internal.Tokenizer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.wink.json4j.internal;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.StringReader;
/**
* Tokenizes a stream into JSON tokens.
*/
public class Tokenizer {
/**
* The reader from which the JSON string is being read.
*/
private Reader reader;
/**
* The current line position in the JSON string.
*/
private int lineNo;
/**
* The current column position in the JSON string.
*/
private int colNo;
/**
* The last character read from the JSON string.
*/
private int lastChar;
/**
* Whether or not the parser should be spec strict, or allow unquoted strings and comments
*/
private boolean strict = false;
/**
* Constructor.
* @param reader The reader from which the JSON string is read. Same as Tokenizer(reader, false);
*
* @throws IOException Thrown on IOErrors such as invalid JSON or sudden reader closures.
*/
public Tokenizer(Reader reader) throws IOException {
super();
Class readerClass= reader.getClass();
//In-memory readers don't need to be buffered. Also, skip PushbackReaders
//because they probably already wrap a buffered stream. And lastly, anything
//that extends from a BufferedReader also doesn't need buffering!
if (!StringReader.class.isAssignableFrom(readerClass) &&
!CharArrayReader.class.isAssignableFrom(readerClass) &&
!PushbackReader.class.isAssignableFrom(readerClass) &&
!BufferedReader.class.isAssignableFrom(readerClass)) {
reader = new BufferedReader(reader);
}
this.reader = reader;
this.lineNo = 0;
this.colNo = 0;
this.lastChar = '\n';
readChar();
}
/**
* Constructor.
* @param reader The reader from which the JSON string is read.
* @param strict Whether or not the parser should be spec strict, or allow unquoted strings and comments.
*
* @throws IOException Thrown on IOErrors such as invalid JSON or sudden reader closures.
*/
public Tokenizer(Reader reader, boolean strict) throws IOException {
super();
Class readerClass= reader.getClass();
//In-memory readers don't need to be buffered. Also, skip PushbackReaders
//because they probably already wrap a buffered stream. And lastly, anything
//that extends from a BufferedReader also doesn't need buffering!
if (!StringReader.class.isAssignableFrom(readerClass) &&
!CharArrayReader.class.isAssignableFrom(readerClass) &&
!PushbackReader.class.isAssignableFrom(readerClass) &&
!BufferedReader.class.isAssignableFrom(readerClass)) {
reader = new BufferedReader(reader);
}
this.reader = reader;
this.lineNo = 0;
this.colNo = 0;
this.lastChar = '\n';
this.strict = strict;
readChar();
}
/**
* Method to get the next JSON token from the JSON String
* @return The next token in the stream, returning Token.TokenEOF when finished.
*
* @throws IOException Thrown if unexpected read error occurs or invalid character encountered in JSON string.
*/
public Token next() throws IOException {
// skip whitespace, use our own checker, it seems
// a bit faster than Java's default.
//while (Character.isWhitespace((char)lastChar)) {
while (isWhitespace((char)lastChar)) {
readChar();
}
// handle punctuation
switch (lastChar) {
case -1: readChar(); return Token.TokenEOF;
case '{': readChar(); return Token.TokenBraceL;
case '}': readChar(); return Token.TokenBraceR;
case '[': readChar(); return Token.TokenBrackL;
case ']': readChar(); return Token.TokenBrackR;
case ':': readChar(); return Token.TokenColon;
case ',': readChar(); return Token.TokenComma;
case '"':
case '\'':
String stringValue = readString();
return new Token(stringValue);
case '-':
case '.':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
Number numberValue = readNumber();
return new Token(numberValue);
case 'n':
case 't':
case 'f':
String ident = readIdentifier();
if (ident.equals("null")) return Token.TokenNull;
if (ident.equals("true")) return Token.TokenTrue;
if (ident.equals("false")) return Token.TokenFalse;
// Okay, this was some sort of unquoted string, may be okay
if (!this.strict) {
//Unquoted string. Non-strict mode allows this. It's still bad input
//from a spec perspective, but allowable in non-strict mode.
return new Token(ident);
} else {
throw new IOException("Unexpected unquoted string literal: [" + ident + "]. Unquoted strings are not allowed in strict mode");
}
case '/':
if (!this.strict) {
// Comment mode and not strict. Lets just devour the comment.
readComment();
return next();
} else {
throw new IOException("Unexpected character / encountered " + onLineCol() + ". Comments are not allowed in strict mode");
}
default:
if (!this.strict && isValidUnquotedChar((char)lastChar)) {
// Unquoted string. Bad form, but ... okay, lets accept it.
// some other parsers do.
String unquotedStr = readIdentifier();
return new Token(unquotedStr);
} else {
if (this.strict) {
throw new IOException("Unexpected character '" + (char)lastChar + "' " + onLineCol() + ". Unquoted strings are not allowed in strict mode.");
} else {
throw new IOException("Unexpected character '" + (char)lastChar + "' " + onLineCol());
}
}
}
}
/**
* Method to read out comments in the 'JSON'. JSON normally should not
* have comments, but I guess we need to be more permissive to make some Crockford code
* happy.
*/
private void readComment() throws IOException {
readChar();
if ((char)lastChar == '/') {
// Okay, // comment,so just read to \n or end of line
while ((char)lastChar != '\n' && lastChar != -1) {
readChar();
}
} else if ((char)lastChar == '*') {
// /* comment, so read past it.
char[] chars = new char[2];
readChar();
if (lastChar != -1) {
chars[0] = (char)lastChar;
} else {
return;
}
readChar();
if (lastChar != -1) {
chars[1] = (char)lastChar;
} else {
return;
}
while (chars[0] != '*' || chars[1] != '/') {
readChar();
if (lastChar != -1) {
chars[0] = chars[1];
chars[1] = (char)lastChar;
} else {
return;
}
}
}
}
/**
* Method to read a string from the JSON string, converting escapes accordingly.
* @return The parsed JSON string with all escapes properly converyed.
*
* @throws IOException Thrown on unterminated strings, invalid characters, bad escapes, and so on. Basically, invalid JSON.
*/
private String readString() throws IOException {
StringBuffer sb = new StringBuffer();
int delim = lastChar;
int l = lineNo;
int c = colNo;
readChar();
while ((-1 != lastChar) && (delim != lastChar)) {
StringBuffer digitBuffer;
if (lastChar != '\\') {
sb.append((char)lastChar);
readChar();
continue;
}
readChar();
switch (lastChar) {
case 'b': readChar(); sb.append('\b'); continue;
case 'f': readChar(); sb.append('\f'); continue;
case 'n': readChar(); sb.append('\n'); continue;
case 'r': readChar(); sb.append('\r'); continue;
case 't': readChar(); sb.append('\t'); continue;
case '\'': readChar(); sb.append('\''); continue;
case '"': readChar(); sb.append('"'); continue;
case '\\': readChar(); sb.append('\\'); continue;
case '/': readChar(); sb.append('/'); continue;
// hex constant
// unicode constant
case 'x':
case 'u':
digitBuffer = new StringBuffer();
int toRead = 2;
if (lastChar == 'u') toRead = 4;
for (int i=0; i= Integer.MIN_VALUE)) {
return new Integer(value.intValue());
}
else {
return value;
}
}
if (string.equals("0")) {
return new Integer(0);
} else if (string.startsWith("0") && string.length() > 1) {
Long value = Long.valueOf(sign + string.substring(1),8);
if (value.longValue() <= Integer.MAX_VALUE && (value.longValue() >= Integer.MIN_VALUE)) {
return new Integer(value.intValue());
}
else {
return value;
}
}
/**
* We have to check for the exponential and treat appropriately
* Exponentials should be treated as Doubles.
*/
if (string.indexOf("e") != -1 || string.indexOf("E") != -1) {
return Double.valueOf(sign + string);
} else {
Long value = Long.valueOf(sign + string,10);
if (value.longValue() <= Integer.MAX_VALUE && (value.longValue() >= Integer.MIN_VALUE)) {
return new Integer(value.intValue());
}
else {
return value;
}
}
} catch (NumberFormatException e) {
IOException iox = new IOException("Invalid number literal " + onLineCol(l,c));
iox.initCause(e);
throw iox;
}
}
/**
* Method to indicate if the character read is a HEX digit or not.
* @param c The character to check for being a HEX digit.
*/
private boolean isHexDigit(int c) {
switch (c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
return true;
}
return false;
}
/**
* Method to indicate if the character read is an OCTAL digit or not.
* @param c The character to check for being a OCTAL digit.
*/
private boolean isOctalDigit(int c) {
switch (c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
return true;
}
return false;
}
/**
* Method to indicate if the character read is a digit or not.
* @param c The character to check for being a digit.
*/
private boolean isDigitChar(int c) {
switch (c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
case 'e':
case 'E':
case 'x':
case 'X':
case '+':
case '-':
return true;
}
return false;
}
/**
* Method to read a partular character string.
* only really need to handle 'null', 'true', and 'false'
*/
private String readIdentifier() throws IOException {
StringBuffer sb = new StringBuffer();
if (this.strict) {
while ((-1 != lastChar) && (Character.isLetter((char)lastChar))) {
sb.append((char)lastChar);
readChar();
}
}
else {
while ((-1 != lastChar) && isValidUnquotedChar((char)lastChar)) {
sb.append((char)lastChar);
readChar();
}
}
return sb.toString();
}
/**
* Method to read the next character from the string, keeping track of line/column position.
*
* @throws IOEXception Thrown when underlying reader throws an error.
*/
private void readChar() throws IOException {
if ('\n' == lastChar) {
this.colNo = 0;
this.lineNo++;
}
lastChar = reader.read();
if (-1 == lastChar) return ;
colNo++;
}
/**
* Method to generate a String indicationg the current line and column position in the JSON string.
*/
private String onLineCol(int line, int col) {
return "on line " + line + ", column " + col;
}
/**
* Method to generate a String indicationg the current line and column position in the JSON string.
*/
public String onLineCol() {
return onLineCol(lineNo,colNo);
}
/**
* High speed test for whitespace! Faster than the java one (from some testing).
* @return if the indicated character is whitespace.
*/
public boolean isWhitespace(char c) {
switch (c) {
case 9: //'unicode: 0009
case 10: //'unicode: 000A'
case 11: //'unicode: 000B'
case 12: //'unicode: 000C'
case 13: //'unicode: 000D'
case 28: //'unicode: 001C'
case 29: //'unicode: 001D'
case 30: //'unicode: 001E'
case 31: //'unicode: 001F'
case ' ': // Space
//case Character.SPACE_SEPARATOR:
//case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
return true;
}
return false;
}
/**
* For non strict mode, check if char is valid when not quoted.
* @param c
* @return if character is valid unquoted character.
*/
public boolean isValidUnquotedChar(char c) {
if (Character.isLetterOrDigit(c)) {
return true;
}
switch (c) {
case '@':
case '-':
case '.':
case '$':
case '+':
case '!':
case '_':
return true;
}
return false;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy