com.google.auto.value.processor.escapevelocity.Parser Maven / Gradle / Ivy
Show all versions of auto-value Show documentation
/*
* Copyright (C) 2015 Google, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.google.auto.value.processor.escapevelocity;
import com.google.auto.value.processor.escapevelocity.DirectiveNode.SetNode;
import com.google.auto.value.processor.escapevelocity.ExpressionNode.BinaryExpressionNode;
import com.google.auto.value.processor.escapevelocity.ExpressionNode.NotExpressionNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.IndexReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.MemberReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.MethodReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.PlainReferenceNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.CommentTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ElseIfTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ElseTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.EndTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.EofNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ForEachTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.IfTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.MacroDefinitionTokenNode;
import com.google.common.base.CharMatcher;
import com.google.common.base.Verify;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.primitives.Chars;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
/**
* A parser that reads input from the given {@link Reader} and parses it to produce a
* {@link Template}.
*
* @author [email protected] (Éamonn McManus)
*/
class Parser {
private static final int EOF = -1;
private final LineNumberReader reader;
/**
* The invariant of this parser is that {@code c} is always the next character of interest.
* This means that we never have to "unget" a character by reading too far. For example, after
* we parse an integer, {@code c} will be the first character after the integer, which is exactly
* the state we will be in when there are no more digits.
*/
private int c;
Parser(Reader reader) throws IOException {
this.reader = new LineNumberReader(reader);
this.reader.setLineNumber(1);
next();
}
/**
* Parse the input completely to produce a {@link Template}.
*
* Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
* entire references such as
* ${x.foo()[23]}
*
or entire directives such as
* #set ($x = $y + $z)
*
But tokens do not span complex constructs. For example,
* #if ($x == $y) something #end
*
is three tokens:
* #if ($x == $y)
* (literal text " something ")
* #end
*
*
* The second phase then takes the sequence of tokens and constructs a parse tree out of it.
* Some nodes in the parse tree will be unchanged from the token sequence, such as the
* ${x.foo()[23]}
* #set ($x = $y + $z)
*
examples above. But a construct such as the {@code #if ... #end} mentioned above will
* become a single IfNode in the parse tree in the second phase.
*
* The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
* top level, there can be arbitrary literal text; references like ${x.foo()}
; and
* directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
* neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
* the inside of
* #if ($x == $a + $b)
*
as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
* parser/lexer combination, where the lexer would need to switch between these two modes, we
* replace the lexer with an ad-hoc parser that is the first phase described above, and we
* define a simple parser over the resultant tokens that is the second phase.
*/
Template parse() throws IOException {
ImmutableList.Builder tokens = ImmutableList.builder();
Node token;
do {
token = parseNode();
tokens.add(token);
} while (!(token instanceof EofNode));
return new Reparser(tokens.build()).reparse();
}
private int lineNumber() {
return reader.getLineNumber();
}
/**
* Gets the next character from the reader and assigns it to {@code c}. If there are no more
* characters, sets {@code c} to {@link #EOF} if it is not already.
*/
private void next() throws IOException {
if (c != EOF) {
c = reader.read();
}
}
/**
* If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
* there are no more characters.
*/
private void skipSpace() throws IOException {
while (Character.isSpaceChar(c)) {
next();
}
}
/**
* Gets the next character from the reader, and if it is a space character, keeps reading until
* a non-space character is found.
*/
private void nextNonSpace() throws IOException {
next();
skipSpace();
}
/**
* Skips any space in the reader, and then throws an exception if the first non-space character
* found is not the expected one. Sets {@code c} to the first character after that expected one.
*/
private void expect(char expected) throws IOException {
skipSpace();
if (c == expected) {
next();
} else {
throw parseException("Expected " + expected);
}
}
/**
* Parses a single node from the reader, as part of the first parsing phase.
* {@code
* -> |
* |
*
* }
*/
private Node parseNode() throws IOException {
if (c == '#') {
next();
if (c == '#') {
return parseComment();
} else {
return parseDirective();
}
}
if (c == EOF) {
return new EofNode(lineNumber());
}
return parseNonDirective();
}
/**
* Parses a single non-directive node from the reader.
* {@code
* -> |
*
* }
*/
private Node parseNonDirective() throws IOException {
if (c == '$') {
next();
if (isAsciiLetter(c) || c == '{') {
return parseReference();
} else {
return parsePlainText('$');
}
} else {
int firstChar = c;
next();
return parsePlainText(firstChar);
}
}
/**
* Parses a single directive token from the reader. Directives can be spelled with or without
* braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
* here: {@code
* -> |
* |
* |
* |
* |
* |
* |
* |
*
* }
*/
private Node parseDirective() throws IOException {
String directive;
if (c == '{') {
next();
directive = parseId("Directive inside #{...}");
expect('}');
} else {
directive = parseId("Directive");
}
Node node;
if (directive.equals("end")) {
node = new EndTokenNode(lineNumber());
} else if (directive.equals("if") || directive.equals("elseif")) {
node = parseIfOrElseIf(directive);
} else if (directive.equals("else")) {
node = new ElseTokenNode(lineNumber());
} else if (directive.equals("foreach")) {
node = parseForEach();
} else if (directive.equals("set")) {
node = parseSet();
} else if (directive.equals("macro")) {
node = parseMacroDefinition();
} else {
node = parsePossibleMacroCall(directive);
}
// Velocity skips a newline after any directive.
// TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
if (c == '\n') {
next();
}
return node;
}
/**
* Parses the condition following {@code #if} or {@code #elseif}.
* {@code
* -> #if ( )
* -> #elseif ( )
* }
*
* @param directive either {@code "if"} or {@code "elseif"}.
*/
private Node parseIfOrElseIf(String directive) throws IOException {
expect('(');
ExpressionNode condition = parseExpression();
expect(')');
return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
}
/**
* Parses a {@code #foreach} token from the reader. {@code
* -> #foreach ( $ in )
* }
*/
private Node parseForEach() throws IOException {
expect('(');
expect('$');
String var = parseId("For-each variable");
skipSpace();
boolean bad = false;
if (c != 'i') {
bad = true;
} else {
next();
if (c != 'n') {
bad = true;
}
}
if (bad) {
throw parseException("Expected 'in' for #foreach");
}
next();
ExpressionNode collection = parseExpression();
expect(')');
return new ForEachTokenNode(var, collection);
}
/**
* Parses a {@code #set} token from the reader. {@code
* -> #set ( $ = )
* }
*/
private Node parseSet() throws IOException {
expect('(');
expect('$');
String var = parseId("#set variable");
expect('=');
ExpressionNode expression = parseExpression();
expect(')');
return new SetNode(var, expression);
}
/**
* Parses a {@code #macro} token from the reader. {@code
* -> #macro ( )
* -> |
* $
* }
*
* Macro parameters are not separated by commas, though method-reference parameters are.
*/
private Node parseMacroDefinition() throws IOException {
expect('(');
skipSpace();
String name = parseId("Macro name");
ImmutableList.Builder parameterNames = ImmutableList.builder();
while (true) {
skipSpace();
if (c == ')') {
next();
break;
}
if (c != '$') {
throw parseException("Macro parameters should look like $name");
}
next();
parameterNames.add(parseId("Macro parameter name"));
}
return new MacroDefinitionTokenNode(lineNumber(), name, parameterNames.build());
}
/**
* Parses an identifier after {@code #} that is not one of the standard directives. The assumption
* is that it is a call of a macro that is defined in the template. Macro definitions are
* extracted from the template during the second parsing phase (and not during evaluation of the
* template as you might expect). This means that a macro can be called before it is defined.
* {@code
* -> # ( )
* -> |
*
* -> | ,
* }
*/
private Node parsePossibleMacroCall(String directive) throws IOException {
skipSpace();
if (c != '(') {
throw parseException("Unrecognized directive #" + directive);
}
next();
ImmutableList.Builder parameterNodes = ImmutableList.builder();
while (true) {
skipSpace();
if (c == ')') {
next();
break;
}
parameterNodes.add(parsePrimary());
if (c == ',') {
// The documentation doesn't say so, but you can apparently have an optional comma in
// macro calls.
next();
}
}
return new DirectiveNode.MacroCallNode(lineNumber(), directive, parameterNodes.build());
}
/**
* Parses and discards a comment, which is {@code ##} followed by any number of characters up to
* and including the next newline.
*/
private Node parseComment() throws IOException {
int lineNumber = lineNumber();
while (c != '\n' && c != EOF) {
next();
}
next();
return new CommentTokenNode(lineNumber);
}
/**
* Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
* {@code firstChar} is the first character of the plain text, and {@link #c} is the second
* (if the plain text is more than one character).
*/
private Node parsePlainText(int firstChar) throws IOException {
StringBuilder sb = new StringBuilder();
sb.appendCodePoint(firstChar);
literal:
while (true) {
switch (c) {
case EOF:
case '$':
case '#':
break literal;
}
sb.appendCodePoint(c);
next();
}
return new ConstantExpressionNode(lineNumber(), sb.toString());
}
/**
* Parses a reference, which is everything that can start with a {@code $}. References can
* optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
* useful when text after the reference would otherwise be parsed as part of it. For example,
* {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
* Of course {@code $xy} would be a reference to the variable {@code $xy}.
* {@code
* -> $ |
* ${}
* }
*
* On entry to this method, {@link #c} is the character immediately after the {@code $}.
*/
private ReferenceNode parseReference() throws IOException {
if (c == '{') {
next();
ReferenceNode node = parseReferenceNoBrace();
expect('}');
return node;
} else {
return parseReferenceNoBrace();
}
}
/**
* Parses a reference, in the simple form without braces.
*
{@code
* ->
* }
*/
private ReferenceNode parseReferenceNoBrace() throws IOException {
String id = parseId("Reference");
ReferenceNode lhs = new PlainReferenceNode(lineNumber(), id);
return parseReferenceSuffix(lhs);
}
/**
* Parses the modifiers that can appear at the tail of a reference.
* {@code
* -> |
* |
*
* }
*
* @param lhs the reference node representing the first part of the reference
* {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
*/
private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
switch (c) {
case '.':
return parseReferenceMember(lhs);
case '[':
return parseReferenceIndex(lhs);
default:
return lhs;
}
}
/**
* Parses a reference member, which is either a property reference like {@code $x.y} or a method
* call like {@code $x.y($z)}.
* {@code
* -> .
* -> |
* ( )
* }
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x.foo} or {@code $x.foo()}.
*/
private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
assert c == '.';
next();
String id = parseId("Member");
ReferenceNode reference;
if (c == '(') {
reference = parseReferenceMethodParams(lhs, id);
} else {
reference = new MemberReferenceNode(lhs, id);
}
return parseReferenceSuffix(reference);
}
/**
* Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
* {@code
* -> |
*
* -> |
* ,
* }
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x.foo()}.
*/
private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
throws IOException {
assert c == '(';
nextNonSpace();
ImmutableList.Builder args = ImmutableList.builder();
if (c != ')') {
args.add(parseExpression());
while (c == ',') {
nextNonSpace();
args.add(parseExpression());
}
if (c != ')') {
throw parseException("Expected )");
}
}
assert c == ')';
next();
return new MethodReferenceNode(lhs, id, args.build());
}
/**
* Parses an index suffix to a method, like {@code $x[$i]}.
* {@code
* -> [ ]
* }
*
* @param lhs the reference node representing what appears to the left of the dot, like the
* {@code $x} in {@code $x[$i]}.
*/
private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
assert c == '[';
next();
ExpressionNode index = parseExpression();
if (c != ']') {
throw parseException("Expected ]");
}
next();
ReferenceNode reference = new IndexReferenceNode(lhs, index);
return parseReferenceSuffix(reference);
}
enum Operator {
/**
* A dummy operator with low precedence. When parsing subexpressions, we always stop when we
* reach an operator of lower precedence than the "current precedence". For example, when
* parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
* we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
* then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
* if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
* if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
*/
STOP("", 0),
// If a one-character operator is a prefix of a two-character operator, like < and <=, then
// the one-character operator must come first.
OR("||", 1),
AND("&&", 2),
EQUAL("==", 3), NOT_EQUAL("!=", 3),
LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
PLUS("+", 5), MINUS("-", 5),
TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
final String symbol;
final int precedence;
Operator(String symbol, int precedence) {
this.symbol = symbol;
this.precedence = precedence;
}
@Override
public String toString() {
return symbol;
}
}
/**
* Maps a code point to the operators that begin with that code point. For example, maps
* {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
*/
private static final ImmutableListMultimap CODE_POINT_TO_OPERATORS;
static {
ImmutableListMultimap.Builder builder = ImmutableListMultimap.builder();
for (Operator operator : Operator.values()) {
if (operator != Operator.STOP) {
builder.put((int) operator.symbol.charAt(0), operator);
}
}
CODE_POINT_TO_OPERATORS = builder.build();
}
/**
* Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
* or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
* {@code
* -> |
* ||
* -> |
* &&
* -> |
*
* -> == | !=
* -> |
*
* -> < | <= | > | >=
* -> |
*
* -> + | -
* -> |
*
* -> * | / | %
* }
*/
private ExpressionNode parseExpression() throws IOException {
ExpressionNode lhs = parseUnaryExpression();
return new OperatorParser().parse(lhs, 1);
}
/**
* An operator-precedence parser for the binary operations we understand. It implements an
* algorithm from Wikipedia
* that uses recursion rather than having an explicit stack of operators and values.
*/
private class OperatorParser {
/**
* The operator we have just scanned, in the same way that {@link #c} is the character we have
* just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
*/
private Operator currentOperator;
OperatorParser() throws IOException {
nextOperator();
}
/**
* Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
* operators with precedence at least {@code minPrecedence}.
*
* @return the parsed subexpression
*/
ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
while (currentOperator.precedence >= minPrecedence) {
Operator operator = currentOperator;
ExpressionNode rhs = parseUnaryExpression();
nextOperator();
while (currentOperator.precedence > operator.precedence) {
rhs = parse(rhs, currentOperator.precedence);
}
lhs = new BinaryExpressionNode(lhs, operator, rhs);
}
return lhs;
}
/**
* Updates {@link #currentOperator} to be an operator read from the input,
* or {@link Operator#STOP} if there is none.
*/
private void nextOperator() throws IOException {
skipSpace();
ImmutableList possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
if (possibleOperators.isEmpty()) {
currentOperator = Operator.STOP;
return;
}
char firstChar = Chars.checkedCast(c);
next();
Operator operator = null;
for (Operator possibleOperator : possibleOperators) {
if (possibleOperator.symbol.length() == 1) {
Verify.verify(operator == null);
operator = possibleOperator;
} else if (possibleOperator.symbol.charAt(1) == c) {
next();
operator = possibleOperator;
}
}
if (operator == null) {
throw parseException(
"Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar);
}
currentOperator = operator;
}
}
/**
* Parses an expression not containing any operators (except inside parentheses).
* {@code
* -> |
* ( ) |
* !
* }
*/
private ExpressionNode parseUnaryExpression() throws IOException {
skipSpace();
ExpressionNode node;
if (c == '(') {
nextNonSpace();
node = parseExpression();
expect(')');
skipSpace();
return node;
} else if (c == '!') {
next();
node = new NotExpressionNode(parseUnaryExpression());
skipSpace();
return node;
} else {
return parsePrimary();
}
}
/**
* Parses an expression containing only literals or references.
* {@code
* -> |
* |
* |
*
* }
*/
private ExpressionNode parsePrimary() throws IOException {
ExpressionNode node;
if (c == '$') {
next();
node = parseReference();
} else if (c == '"') {
node = parseStringLiteral();
} else if (c == '-') {
// Velocity does not have a negation operator. If we see '-' it must be the start of a
// negative integer literal.
next();
node = parseIntLiteral("-");
} else if (isAsciiDigit(c)) {
node = parseIntLiteral("");
} else if (isAsciiLetter(c)) {
node = parseBooleanLiteral();
} else {
throw parseException("Expected an expression");
}
skipSpace();
return node;
}
private ExpressionNode parseStringLiteral() throws IOException {
assert c == '"';
StringBuilder sb = new StringBuilder();
next();
while (c != '"') {
if (c == '\n' || c == EOF) {
throw parseException("Unterminated string constant");
}
if (c == '$' || c == '\\') {
// In real Velocity, you can have a $ reference expanded inside a "" string literal.
// There are also '' string literals where that is not so. We haven't needed that yet
// so it's not supported.
throw parseException(
"Escapes or references in string constants are not currently supported");
}
sb.appendCodePoint(c);
next();
}
next();
return new ConstantExpressionNode(lineNumber(), sb.toString());
}
private ExpressionNode parseIntLiteral(String prefix) throws IOException {
StringBuilder sb = new StringBuilder(prefix);
while (isAsciiDigit(c)) {
sb.appendCodePoint(c);
next();
}
Integer value = Ints.tryParse(sb.toString());
if (value == null) {
throw parseException("Invalid integer: " + sb);
}
return new ConstantExpressionNode(lineNumber(), value);
}
/**
* Parses a boolean literal, either {@code true} or {@code false}.
* -> true |
* false
*/
private ExpressionNode parseBooleanLiteral() throws IOException {
String s = parseId("Identifier without $");
boolean value;
if (s.equals("true")) {
value = true;
} else if (s.equals("false")) {
value = false;
} else {
throw parseException("Identifier in expression must be preceded by $ or be true or false");
}
return new ConstantExpressionNode(lineNumber(), value);
}
private static final CharMatcher ASCII_LETTER =
CharMatcher.inRange('A', 'Z')
.or(CharMatcher.inRange('a', 'z'))
.precomputed();
private static final CharMatcher ASCII_DIGIT =
CharMatcher.inRange('0', '9')
.precomputed();
private static final CharMatcher ID_CHAR =
ASCII_LETTER
.or(ASCII_DIGIT)
.or(CharMatcher.anyOf("-_"))
.precomputed();
private static boolean isAsciiLetter(int c) {
return (char) c == c && ASCII_LETTER.matches((char) c);
}
private static boolean isAsciiDigit(int c) {
return (char) c == c && ASCII_DIGIT.matches((char) c);
}
private static boolean isIdChar(int c) {
return (char) c == c && ID_CHAR.matches((char) c);
}
/**
* Parse an identifier as specified by the
* VTL
* . Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
* {@code _}.
*/
private String parseId(String what) throws IOException {
if (!isAsciiLetter(c)) {
throw parseException(what + " should start with an ASCII letter");
}
StringBuilder id = new StringBuilder();
while (isIdChar(c)) {
id.appendCodePoint(c);
next();
}
return id.toString();
}
/**
* Returns an exception to be thrown describing a parse error with the given message, and
* including information about where it occurred.
*/
private ParseException parseException(String message) throws IOException {
StringBuilder context = new StringBuilder();
if (c == EOF) {
context.append("EOF");
} else {
int count = 0;
while (c != EOF && count < 20) {
context.appendCodePoint(c);
next();
count++;
}
if (c != EOF) {
context.append("...");
}
}
return new ParseException(message, lineNumber(), context.toString());
}
}