com.crashnote.external.config.impl.Tokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of crashnote-appengine Show documentation
Show all versions of crashnote-appengine Show documentation
Reports exceptions from Java apps on Appengine to crashnote.com
/**
* Copyright (C) 2011-2012 Typesafe Inc.
*/
package com.crashnote.external.config.impl;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import com.crashnote.external.config.ConfigException;
import com.crashnote.external.config.ConfigOrigin;
import com.crashnote.external.config.ConfigSyntax;
final class Tokenizer {
// this exception should not leave this file
private static class ProblemException extends Exception {
private static final long serialVersionUID = 1L;
final private Token problem;
ProblemException(final Token problem) {
this.problem = problem;
}
Token problem() {
return problem;
}
}
private static String asString(final int codepoint) {
if (codepoint == '\n')
return "newline";
else if (codepoint == '\t')
return "tab";
else if (codepoint == -1)
return "end of file";
else if (Character.isISOControl(codepoint))
return String.format("control character 0x%x", codepoint);
else
return String.format("%c", codepoint);
}
/**
* Tokenizes a Reader. Does not close the reader; you have to arrange to do
* that after you're done with the returned iterator.
*/
static Iterator tokenize(final ConfigOrigin origin, final Reader input, final ConfigSyntax flavor) {
return new TokenIterator(origin, input, flavor != ConfigSyntax.JSON);
}
private static class TokenIterator implements Iterator {
private static class WhitespaceSaver {
// has to be saved inside value concatenations
private final StringBuilder whitespace;
// may need to value-concat with next value
private boolean lastTokenWasSimpleValue;
WhitespaceSaver() {
whitespace = new StringBuilder();
lastTokenWasSimpleValue = false;
}
void add(final int c) {
if (lastTokenWasSimpleValue)
whitespace.appendCodePoint(c);
}
Token check(final Token t, final ConfigOrigin baseOrigin, final int lineNumber) {
if (isSimpleValue(t)) {
return nextIsASimpleValue(baseOrigin, lineNumber);
} else {
nextIsNotASimpleValue();
return null;
}
}
// called if the next token is not a simple value;
// discards any whitespace we were saving between
// simple values.
private void nextIsNotASimpleValue() {
lastTokenWasSimpleValue = false;
whitespace.setLength(0);
}
// called if the next token IS a simple value,
// so creates a whitespace token if the previous
// token also was.
private Token nextIsASimpleValue(final ConfigOrigin baseOrigin,
final int lineNumber) {
if (lastTokenWasSimpleValue) {
// need to save whitespace between the two so
// the parser has the option to concatenate it.
if (whitespace.length() > 0) {
final Token t = Tokens.newUnquotedText(
lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
whitespace.setLength(0); // reset
return t;
} else {
// lastTokenWasSimpleValue = true still
return null;
}
} else {
lastTokenWasSimpleValue = true;
whitespace.setLength(0);
return null;
}
}
}
final private SimpleConfigOrigin origin;
final private Reader input;
final private LinkedList buffer;
private int lineNumber;
private ConfigOrigin lineOrigin;
final private Queue tokens;
final private WhitespaceSaver whitespaceSaver;
final private boolean allowComments;
TokenIterator(final ConfigOrigin origin, final Reader input, final boolean allowComments) {
this.origin = (SimpleConfigOrigin) origin;
this.input = input;
this.allowComments = allowComments;
this.buffer = new LinkedList();
lineNumber = 1;
lineOrigin = this.origin.setLineNumber(lineNumber);
tokens = new LinkedList();
tokens.add(Tokens.START);
whitespaceSaver = new WhitespaceSaver();
}
// this should ONLY be called from nextCharSkippingComments
// or when inside a quoted string, or when parsing a sequence
// like ${ or +=, everything else should use
// nextCharSkippingComments().
private int nextCharRaw() {
if (buffer.isEmpty()) {
try {
return input.read();
} catch (IOException e) {
throw new ConfigException.IO(origin, "read error: "
+ e.getMessage(), e);
}
} else {
final int c = buffer.pop();
return c;
}
}
private void putBack(final int c) {
if (buffer.size() > 2) {
throw new ConfigException.BugOrBroken(
"bug: putBack() three times, undesirable look-ahead");
}
buffer.push(c);
}
static boolean isWhitespace(final int c) {
return ConfigImplUtil.isWhitespace(c);
}
static boolean isWhitespaceNotNewline(final int c) {
return c != '\n' && ConfigImplUtil.isWhitespace(c);
}
private boolean startOfComment(final int c) {
if (c == -1) {
return false;
} else {
if (allowComments) {
if (c == '#') {
return true;
} else if (c == '/') {
final int maybeSecondSlash = nextCharRaw();
// we want to predictably NOT consume any chars
putBack(maybeSecondSlash);
if (maybeSecondSlash == '/') {
return true;
} else {
return false;
}
} else {
return false;
}
} else {
return false;
}
}
}
// get next char, skipping non-newline whitespace
private int nextCharAfterWhitespace(final WhitespaceSaver saver) {
for (;;) {
final int c = nextCharRaw();
if (c == -1) {
return -1;
} else {
if (isWhitespaceNotNewline(c)) {
saver.add(c);
continue;
} else {
return c;
}
}
}
}
private ProblemException problem(final String message) {
return problem("", message, null);
}
private ProblemException problem(final String what, final String message) {
return problem(what, message, null);
}
private ProblemException problem(final String what, final String message, final boolean suggestQuotes) {
return problem(what, message, suggestQuotes, null);
}
private ProblemException problem(final String what, final String message, final Throwable cause) {
return problem(lineOrigin, what, message, cause);
}
private ProblemException problem(final String what, final String message, final boolean suggestQuotes,
final Throwable cause) {
return problem(lineOrigin, what, message, suggestQuotes, cause);
}
private static ProblemException problem(final ConfigOrigin origin, final String what,
final String message,
final Throwable cause) {
return problem(origin, what, message, false, cause);
}
private static ProblemException problem(final ConfigOrigin origin, final String what, final String message,
final boolean suggestQuotes, final Throwable cause) {
if (what == null || message == null)
throw new ConfigException.BugOrBroken(
"internal error, creating bad ProblemException");
return new ProblemException(Tokens.newProblem(origin, what, message, suggestQuotes,
cause));
}
private static ProblemException problem(final ConfigOrigin origin, final String message) {
return problem(origin, "", message, null);
}
private static ConfigOrigin lineOrigin(final ConfigOrigin baseOrigin,
final int lineNumber) {
return ((SimpleConfigOrigin) baseOrigin).setLineNumber(lineNumber);
}
// ONE char has always been consumed, either the # or the first /, but
// not both slashes
private Token pullComment(final int firstChar) {
if (firstChar == '/') {
final int discard = nextCharRaw();
if (discard != '/')
throw new ConfigException.BugOrBroken("called pullComment but // not seen");
}
final StringBuilder sb = new StringBuilder();
for (;;) {
final int c = nextCharRaw();
if (c == -1 || c == '\n') {
putBack(c);
return Tokens.newComment(lineOrigin, sb.toString());
} else {
sb.appendCodePoint(c);
}
}
}
// chars JSON allows a number to start with
static final String firstNumberChars = "0123456789-";
// chars JSON allows to be part of a number
static final String numberChars = "0123456789eE+-.";
// chars that stop an unquoted string
static final String notInUnquotedText = "$\"{}[]:=,+#`^?!@*&\\";
// The rules here are intended to maximize convenience while
// avoiding confusion with real valid JSON. Basically anything
// that parses as JSON is treated the JSON way and otherwise
// we assume it's a string and let the parser sort it out.
private Token pullUnquotedText() {
final ConfigOrigin origin = lineOrigin;
final StringBuilder sb = new StringBuilder();
int c = nextCharRaw();
while (true) {
if (c == -1) {
break;
} else if (notInUnquotedText.indexOf(c) >= 0) {
break;
} else if (isWhitespace(c)) {
break;
} else if (startOfComment(c)) {
break;
} else {
sb.appendCodePoint(c);
}
// we parse true/false/null tokens as such no matter
// what is after them, as long as they are at the
// start of the unquoted token.
if (sb.length() == 4) {
final String s = sb.toString();
if (s.equals("true"))
return Tokens.newBoolean(origin, true);
else if (s.equals("null"))
return Tokens.newNull(origin);
} else if (sb.length() == 5) {
final String s = sb.toString();
if (s.equals("false"))
return Tokens.newBoolean(origin, false);
}
c = nextCharRaw();
}
// put back the char that ended the unquoted text
putBack(c);
final String s = sb.toString();
return Tokens.newUnquotedText(origin, s);
}
private Token pullNumber(final int firstChar) throws ProblemException {
final StringBuilder sb = new StringBuilder();
sb.appendCodePoint(firstChar);
boolean containedDecimalOrE = false;
int c = nextCharRaw();
while (c != -1 && numberChars.indexOf(c) >= 0) {
if (c == '.' || c == 'e' || c == 'E')
containedDecimalOrE = true;
sb.appendCodePoint(c);
c = nextCharRaw();
}
// the last character we looked at wasn't part of the number, put it
// back
putBack(c);
final String s = sb.toString();
try {
if (containedDecimalOrE) {
// force floating point representation
return Tokens.newDouble(lineOrigin, Double.parseDouble(s), s);
} else {
// this should throw if the integer is too large for Long
return Tokens.newLong(lineOrigin, Long.parseLong(s), s);
}
} catch (NumberFormatException e) {
throw problem(s, "Invalid number: '" + s + "'", true /* suggestQuotes */, e);
}
}
private void pullEscapeSequence(final StringBuilder sb) throws ProblemException {
final int escaped = nextCharRaw();
if (escaped == -1)
throw problem("End of input but backslash in string had nothing after it");
switch (escaped) {
case '"':
sb.append('"');
break;
case '\\':
sb.append('\\');
break;
case '/':
sb.append('/');
break;
case 'b':
sb.append('\b');
break;
case 'f':
sb.append('\f');
break;
case 'n':
sb.append('\n');
break;
case 'r':
sb.append('\r');
break;
case 't':
sb.append('\t');
break;
case 'u': {
// kind of absurdly slow, but screw it for now
final char[] a = new char[4];
for (int i = 0; i < 4; ++i) {
final int c = nextCharRaw();
if (c == -1)
throw problem("End of input but expecting 4 hex digits for \\uXXXX escape");
a[i] = (char) c;
}
final String digits = new String(a);
try {
sb.appendCodePoint(Integer.parseInt(digits, 16));
} catch (NumberFormatException e) {
throw problem(digits, String.format(
"Malformed hex digits after \\u escape in string: '%s'", digits), e);
}
}
break;
default:
throw problem(
asString(escaped),
String.format(
"backslash followed by '%s', this is not a valid escape sequence (quoted strings use JSON escaping, so use double-backslash \\\\ for literal backslash)",
asString(escaped)));
}
}
private void appendTripleQuotedString(final StringBuilder sb) throws ProblemException {
// we are after the opening triple quote and need to consume the
// close triple
int consecutiveQuotes = 0;
for (;;) {
final int c = nextCharRaw();
if (c == '"') {
consecutiveQuotes += 1;
} else if (consecutiveQuotes >= 3) {
// the last three quotes end the string and the others are
// kept.
sb.setLength(sb.length() - 3);
putBack(c);
break;
} else {
consecutiveQuotes = 0;
if (c == -1)
throw problem("End of input but triple-quoted string was still open");
}
sb.appendCodePoint(c);
}
}
private Token pullQuotedString() throws ProblemException {
// the open quote has already been consumed
final StringBuilder sb = new StringBuilder();
int c = '\0'; // value doesn't get used
do {
c = nextCharRaw();
if (c == -1)
throw problem("End of input but string quote was still open");
if (c == '\\') {
pullEscapeSequence(sb);
} else if (c == '"') {
// end the loop, done!
} else if (Character.isISOControl(c)) {
throw problem(asString(c), "JSON does not allow unescaped " + asString(c)
+ " in quoted strings, use a backslash escape");
} else {
sb.appendCodePoint(c);
}
} while (c != '"');
// maybe switch to triple-quoted string, sort of hacky...
if (sb.length() == 0) {
final int third = nextCharRaw();
if (third == '"') {
appendTripleQuotedString(sb);
} else {
putBack(third);
}
}
return Tokens.newString(lineOrigin, sb.toString());
}
private Token pullPlusEquals() throws ProblemException {
// the initial '+' has already been consumed
final int c = nextCharRaw();
if (c != '=') {
throw problem(asString(c), "'+' not followed by =, '" + asString(c)
+ "' not allowed after '+'", true /* suggestQuotes */);
}
return Tokens.PLUS_EQUALS;
}
private Token pullSubstitution() throws ProblemException {
// the initial '$' has already been consumed
final ConfigOrigin origin = lineOrigin;
int c = nextCharRaw();
if (c != '{') {
throw problem(asString(c), "'$' not followed by {, '" + asString(c)
+ "' not allowed after '$'", true /* suggestQuotes */);
}
boolean optional = false;
c = nextCharRaw();
if (c == '?') {
optional = true;
} else {
putBack(c);
}
final WhitespaceSaver saver = new WhitespaceSaver();
final List expression = new ArrayList();
Token t;
do {
t = pullNextToken(saver);
// note that we avoid validating the allowed tokens inside
// the substitution here; we even allow nested substitutions
// in the tokenizer. The parser sorts it out.
if (t == Tokens.CLOSE_CURLY) {
// end the loop, done!
break;
} else if (t == Tokens.END) {
throw problem(origin,
"Substitution ${ was not closed with a }");
} else {
final Token whitespace = saver.check(t, origin, lineNumber);
if (whitespace != null)
expression.add(whitespace);
expression.add(t);
}
} while (true);
return Tokens.newSubstitution(origin, optional, expression);
}
private Token pullNextToken(final WhitespaceSaver saver) throws ProblemException {
final int c = nextCharAfterWhitespace(saver);
if (c == -1) {
return Tokens.END;
} else if (c == '\n') {
// newline tokens have the just-ended line number
final Token line = Tokens.newLine(lineOrigin);
lineNumber += 1;
lineOrigin = origin.setLineNumber(lineNumber);
return line;
} else {
Token t;
if (startOfComment(c)) {
t = pullComment(c);
} else {
switch (c) {
case '"':
t = pullQuotedString();
break;
case '$':
t = pullSubstitution();
break;
case ':':
t = Tokens.COLON;
break;
case ',':
t = Tokens.COMMA;
break;
case '=':
t = Tokens.EQUALS;
break;
case '{':
t = Tokens.OPEN_CURLY;
break;
case '}':
t = Tokens.CLOSE_CURLY;
break;
case '[':
t = Tokens.OPEN_SQUARE;
break;
case ']':
t = Tokens.CLOSE_SQUARE;
break;
case '+':
t = pullPlusEquals();
break;
default:
t = null;
break;
}
if (t == null) {
if (firstNumberChars.indexOf(c) >= 0) {
t = pullNumber(c);
} else if (notInUnquotedText.indexOf(c) >= 0) {
throw problem(asString(c), "Reserved character '" + asString(c)
+ "' is not allowed outside quotes", true /* suggestQuotes */);
} else {
putBack(c);
t = pullUnquotedText();
}
}
}
if (t == null)
throw new ConfigException.BugOrBroken(
"bug: failed to generate next token");
return t;
}
}
private static boolean isSimpleValue(final Token t) {
if (Tokens.isSubstitution(t) || Tokens.isUnquotedText(t)
|| Tokens.isValue(t)) {
return true;
} else {
return false;
}
}
private void queueNextToken() throws ProblemException {
final Token t = pullNextToken(whitespaceSaver);
final Token whitespace = whitespaceSaver.check(t, origin, lineNumber);
if (whitespace != null)
tokens.add(whitespace);
tokens.add(t);
}
@Override
public boolean hasNext() {
return !tokens.isEmpty();
}
@Override
public Token next() {
final Token t = tokens.remove();
if (tokens.isEmpty() && t != Tokens.END) {
try {
queueNextToken();
} catch (ProblemException e) {
tokens.add(e.problem());
}
if (tokens.isEmpty())
throw new ConfigException.BugOrBroken(
"bug: tokens queue should not be empty here");
}
return t;
}
@Override
public void remove() {
throw new UnsupportedOperationException(
"Does not make sense to remove items from token stream");
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy