com.google.escapevelocity.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of junit-processor Show documentation
The newest version!
/*
 * Copyright (C) 2018 Google, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.escapevelocity;

import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Verify;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.ForwardingSortedSet;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterables;
import com.google.common.primitives.Chars;
import com.google.common.primitives.Ints;
import com.google.escapevelocity.DirectiveNode.BreakNode;
import com.google.escapevelocity.DirectiveNode.DefineNode;
import com.google.escapevelocity.DirectiveNode.ForEachNode;
import com.google.escapevelocity.DirectiveNode.IfNode;
import com.google.escapevelocity.DirectiveNode.SetNode;
import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
import com.google.escapevelocity.StopNode.ElseIfNode;
import com.google.escapevelocity.StopNode.ElseNode;
import com.google.escapevelocity.StopNode.EndNode;
import com.google.escapevelocity.StopNode.EofNode;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Supplier;

/**
 * A parser that reads input from the given {@link Reader} and parses it to produce a
 * {@link Template}.
 *
 * @author [email protected] (Éamonn McManus)
 */
class Parser {
  private static final int EOF = -1;

  private static final ImmutableSet> EOF_CLASS =
      ImmutableSet.of(EofNode.class);
  private static final ImmutableSet> END_CLASS =
      ImmutableSet.of(EndNode.class);
  private static final ImmutableSet> ELSE_ELSEIF_END_CLASSES =
      ImmutableSet.of(ElseNode.class, ElseIfNode.class, EndNode.class);

  private final LineNumberReader reader;
  private final String resourceName;
  private final Template.ResourceOpener resourceOpener;

  /**
   * Map from resource name to already-parsed template. This map is shared between all of the nested
   * {@link Parser} instances that result from {@code #parse} directives, so we will only ever read
   * and parse any given resource name once.
   */
  private final Map parseCache;

  /**
   * Macros that have been defined during this parse. This means macros defined in a given {@code
   * foo.vm} file, without regard to whatever macros might be defined in another {@code bar.vm}
   * file. If the same name is defined more than once in {@code foo.vm}, only the first definition
   * has any effect.
   */
  private final Map macros = new TreeMap<>();

  /**
   * The invariant of this parser is that {@code c} is always the next character of interest.
   * This means that we almost never have to "unget" a character by reading too far. For example,
   * after we parse an integer, {@code c} will be the first character after the integer, which is
   * exactly the state we will be in when there are no more digits.
   *
   * Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}.
   */
  private int c;

  /**
   * A single character of pushback. If this is not negative, the {@link #next()} method will
   * return it instead of reading a character.
   */
  private int pushback = -1;

  Parser(
      Reader reader,
      String resourceName,
      Template.ResourceOpener resourceOpener,
      Map parseCache)
      throws IOException {
    this.reader = new LineNumberReader(reader);
    this.reader.setLineNumber(1);
    next();
    this.resourceName = resourceName;
    this.resourceOpener = resourceOpener;
    this.parseCache = parseCache;
  }

  /**
   * Parse the input completely to produce a {@link Template}. We use a fairly standard
   * recursive-descent parser with ad-hoc lexing and a few hacks needed to reproduce quirks of
   * Velocity's behaviour.
   */
  Template parse() throws IOException {
    ParseResult parseResult = parseToStop(EOF_CLASS, () -> "outside any construct");
    Node root = Node.cons(resourceName, lineNumber(), parseResult.nodes);
    return new Template(root, ImmutableMap.copyOf(macros));
  }

  private int lineNumber() {
    return reader.getLineNumber();
  }

  /**
   * Gets the next character from the reader and assigns it to {@code c}. If there are no more
   * characters, sets {@code c} to {@link #EOF} if it is not already.
   */
  private void next() throws IOException {
    if (c != EOF) {
      if (pushback < 0) {
        c = reader.read();
      } else {
        c = pushback;
        pushback = -1;
      }
    }
  }

  /**
   * Saves the current character {@code c} to be read again, and sets {@code c} to the given
   * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}.
   * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have
   * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent
   * calls to {@code next()} will continue reading from {@link #reader}. So the pushback
   * essentially puts us back in the state we were in before we read {@code y}.
   */
  private void pushback(int c1) {
    pushback = c;
    c = c1;
  }

  /**
   * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
   * there are no more characters.
   */
  private void skipSpace() throws IOException {
    while (Character.isWhitespace(c)) {
      next();
    }
  }

  /**
   * Gets the next character from the reader, and if it is a space character, keeps reading until
   * a non-space character is found.
   */
  private void nextNonSpace() throws IOException {
    next();
    skipSpace();
  }

  /**
   * Skips any space in the reader, and then throws an exception if the first non-space character
   * found is not the expected one. Sets {@code c} to the first character after that expected one.
   */
  private void expect(char expected) throws IOException {
    skipSpace();
    if (c == expected) {
      next();
    } else {
      throw parseException("Expected " + expected);
    }
  }

  private static class ParseResult {
    final ImmutableList nodes;
    final StopNode stop;

    ParseResult(ImmutableList nodes, StopNode stop) {
      this.nodes = nodes;
      this.stop = stop;
    }
  }

  /**
   * Parse until reaching a {@code StopNode}. The {@code StopNode} must have one of the classes in
   * {@code stopClasses}. This method is called recursively to parse nested constructs. At the
   * top level, we expect the parse to end when it reaches {@code EofNode}. In a {@code #foreach},
   * for example, we expect the parse to end when it reaches the matching {@code #end}. In an
   * {@code #if}, the parse can end with {@code #end}, {@code #else}, or {@code #elseif}. And then
   * after {@code #else} or {@code #elseif} we will call this method again to parse the next part.
   *
   * @return the nodes that were parsed, plus the {@code StopNode} that caused parsing to stop.
   */
  private ParseResult parseToStop(
      ImmutableSet> stopClasses, Supplier contextDescription)
      throws IOException {
    List nodes = new ArrayList<>();
    Node node;
    while (true) {
      node = parseNode();
      if (node instanceof StopNode) {
        break;
      }
      if (node instanceof SetNode && SetSpacing.shouldRemoveLastNodeBeforeSet(nodes)) {
        nodes.set(nodes.size() - 1, node);
      } else {
        nodes.add(node);
      }
    }
    StopNode stop = (StopNode) node;
    if (!stopClasses.contains(stop.getClass())) {
      throw parseException("Found " + stop.name() + " " + contextDescription.get());
    }
    return new ParseResult(ImmutableList.copyOf(nodes), stop);
  }

  /**
   * Skip the current character if it is a newline, then parse until reaching a {@code StopNode}.
   * This is used after directives like {@code #if}, where a newline is ignored after the final
   * {@code )} in {@code #if (condition)}.
   */
  private ParseResult skipNewlineAndParseToStop(
      ImmutableSet> stopClasses, Supplier contextDescription)
      throws IOException {
    if (c == '\n') {
      next();
    }
    return parseToStop(stopClasses, contextDescription);
  }

  /** Parses a single node from the reader. */
  private Node parseNode() throws IOException {
    if (c == '#') {
      next();
      switch (c) {
        case '#':
          return parseLineComment();
        case '*':
          return parseBlockComment();
        case '[':
          return parseHashSquare();
        case '{':
          return parseDirective();
        case '@':
          return parseMacroCallWithBody();
        default:
          if (isAsciiLetter(c)) {
            return parseDirective();
          } else {
            // For consistency with Velocity, we treat # not followed by a letter or one of the
            // characters above as a plain character, and we treat #$foo as a literal # followed by
            // the reference $foo.
            return parsePlainText('#');
          }
      }
    }
    if (c == EOF) {
      return new EofNode(resourceName, lineNumber());
    }
    return parseNonDirective();
  }

  private Node parseHashSquare() throws IOException {
    // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
    // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
    // that next character is.
    assert c == '[';
    next();
    if (c != '[') {
      return parsePlainText(new StringBuilder("#["));
    }
    int startLine = lineNumber();
    next();
    StringBuilder sb = new StringBuilder();
    while (true) {
      if (c == EOF) {
        throw new ParseException(
            "Unterminated #[[ - did not see matching ]]#", resourceName, startLine);
      }
      if (c == '#') {
        // This might be the last character of ]]# or it might just be a random #.
        int len = sb.length();
        if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
          next();
          break;
        }
      }
      sb.append((char) c);
      next();
    }
    String quoted = sb.substring(0, sb.length() - 2);
    return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
  }

  /**
   * Parses a single non-directive node from the reader. This is either a reference, like
   * {@code $foo} or {@code $bar.baz} or {@code $foo.bar[$baz].buh()}; or it is text containing
   * neither references (no {@code $}) nor directives (no {@code #}).
   */
  private Node parseNonDirective() throws IOException {
    if (c == '$') {
      return parseDollar();
    } else {
      int firstChar = c;
      next();
      return parsePlainText(firstChar);
    }
  }

  private Node parseDollar() throws IOException {
    assert c == '$';
    next();
    boolean silent = c == '!';
    if (silent) {
      next();
    }
    if (isAsciiLetter(c) || c == '{') {
      return parseReference(silent);
    } else if (silent) {
      return parsePlainText("$!");
    } else {
      return parsePlainText('$');
    }
  }

  /**
   * Parses a single directive token from the reader. Directives can be spelled with or without
   * braces, for example {@code #if} or {@code #{if}}. In the case of {@code #end}, {@code #else},
   * and {@code #elseif}, we return a {@link StopNode} representing just the token itself. In other
   * cases we also parse the complete directive, for example a complete {@code #foreach...#end}.
   */
  private Node parseDirective() throws IOException {
    String directive;
    if (c == '{') {
      next();
      directive = parseId("Directive inside #{...}");
      expect('}');
    } else {
      directive = parseId("Directive");
    }
    Node node;
    switch (directive) {
      case "end":
        node = new EndNode(resourceName, lineNumber());
        break;
      case "if":
        return parseIfOrElseIf("#if");
      case "elseif":
        node = new ElseIfNode(resourceName, lineNumber());
        break;
      case "else":
        node = new ElseNode(resourceName, lineNumber());
        break;
      case "foreach":
        return parseForEach();
      case "break":
        return parseBreak();
      case "set":
        node = parseSet();
        break;
      case "define":
        node = parseDefine();
        break;
      case "parse":
        node = parseParse();
        break;
      case "macro":
        return parseMacroDefinition();
      case "evaluate":
        return parseEvaluate();
      default:
        node = parseMacroCall("#", directive);
    }
    // Velocity skips a newline after any directive. In the case of #if etc, we'll have done this
    // when we stopped scanning the body at #end, so in those cases we return directly rather than
    // breaking into the code here.
    // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
    if (c == '\n') {
      next();
    }
    return node;
  }

  /**
   * Parses an {@code #if} construct, or an {@code #elseif} within one.
   *
   * 
{@code
   * #if (  )  #end
   * #if (  )  #else  #end
   * #if (  )  #elseif (  )  #else  #end
   * }
   */
  private Node parseIfOrElseIf(String directive) throws IOException {
    int startLine = lineNumber();
    expect('(');
    ExpressionNode condition = parseExpression();
    expect(')');
    ParseResult parsedTruePart =
        skipNewlineAndParseToStop(
            ELSE_ELSEIF_END_CLASSES,
            () -> "parsing " + directive + " starting on line " + startLine);
    Node truePart = Node.cons(resourceName, startLine, parsedTruePart.nodes);
    Node falsePart;
    if (parsedTruePart.stop instanceof EndNode) {
      falsePart = Node.emptyNode(resourceName, lineNumber());
    } else if (parsedTruePart.stop instanceof ElseIfNode) {
      falsePart = parseIfOrElseIf("#elseif");
    } else {
      int elseLine = lineNumber();
      ParseResult parsedFalsePart =
          parseToStop(END_CLASS, () -> "parsing #else starting on line " + elseLine);
      falsePart = Node.cons(resourceName, elseLine, parsedFalsePart.nodes);
    }
    return new IfNode(resourceName, startLine, condition, truePart, falsePart);
  }

  /**
   * Parses a {@code #foreach} token from the reader.
   *
   * {@code
   * #foreach ( $ in  )  #end
   * }
   */
  private Node parseForEach() throws IOException {
    int startLine = lineNumber();
    expect('(');
    skipSpace();
    if (c != '$') {
      throw parseException("Expected variable beginning with '$' for #foreach");
    }
    Node varNode = parseDollar();
    if (!(varNode instanceof PlainReferenceNode)) {
      throw parseException("Expected simple variable for #foreach");
    }
    String var = ((PlainReferenceNode) varNode).id;
    skipSpace();
    boolean bad = false;
    if (c != 'i') {
      bad = true;
    } else {
      next();
      if (c != 'n') {
        bad = true;
      }
    }
    if (bad) {
      throw parseException("Expected 'in' for #foreach");
    }
    next();
    ExpressionNode collection = parseExpression();
    expect(')');
    ParseResult parsedBody =
        skipNewlineAndParseToStop(
            END_CLASS, () -> "parsing #foreach starting on line " + startLine);
    Node body = Node.cons(resourceName, startLine, parsedBody.nodes);
    return new ForEachNode(resourceName, startLine, var, collection, body);
  }

  /**
   * Parses a {@code #break} token from the reader.
   *
   * There is an optional scope, so you can write {@code #break ($foreach)},
   * {@code #break ($foreach.parent)}, {@code #break ($parse)}, and so on. We only support
   * {@code $foreach}. If there is no scope, we will break from the nearest {@code #foreach} or
   * {@code #parse}, or, if there is none, from the whole template.
   */
  private Node parseBreak() throws IOException {
    // Unlike every other directive, #break has an *optional* parenthesized parameter. But even if
    // we *don't* see a `(` after skipping spaces, we can safely discard the spaces. It's a #break,
    // so any plain text after it will never be rendered anyway. (We could even discard any
    // non-space plain text, but it's probably not worth bothering.) For the same reason, we don't
    // need to skip a \n that might occur after the #break.
    skipSpace();
    ExpressionNode scope = null;
    if (c == '(') {
      next();
      scope = parsePrimary();
      expect(')');
    }
    return new BreakNode(resourceName, lineNumber(), scope);
  }

  /**
   * Parses a {@code #set} token from the reader.
   *
   * 
{@code
   * #set ( $ =  )
   * }
   */
  private Node parseSet() throws IOException {
    expect('(');
    expect('$');
    String var = parseId("#set variable");
    expect('=');
    ExpressionNode expression = parseExpression();
    expect(')');
    return new SetNode(var, expression);
  }

  /**
   * Parses a {@code #define} directive from the reader.
   *
   * {@code
   * #define ( $ )  #end
   * }
   */
  private Node parseDefine() throws IOException {
    int startLine = lineNumber();
    expect('(');
    expect('$');
    String var = parseId("#define variable");
    expect(')');
    ParseResult parseResult =
        skipNewlineAndParseToStop(END_CLASS, () -> "parsing #define starting on line " + startLine);
    return new DefineNode(var, Node.cons(resourceName, startLine, parseResult.nodes));
  }

  /**
   * Parses a {@code #parse} token from the reader.
   *
   * {@code
   * #parse (  )
   * }
   *
   * When we see a {@code #parse} directive while parsing a template, all we do is record it as a
   * {@link ParseNode} in the {@link Template} we produce. We only actually open and parse the
   * resource named in the {@code #parse} when the template is later evaluated. The {@code
   * parseCache} means that we will only do this once, at least if the argument to the {@code
   * #parse} is always the same string.
   */
  private Node parseParse() throws IOException {
    int startLine = lineNumber();
    expect('(');
    ExpressionNode nestedResourceNameExpression = parsePrimary();
    skipSpace();
    expect(')');
    return new ParseNode(
        resourceName, startLine, nestedResourceNameExpression, resourceOpener, parseCache);
  }

  /**
   * Parses a {@code #macro} token from the reader.
   *
   * 
{@code
   * #macro (  $ $ <...>)  #end
   * }
   *
   * Macro parameters are optionally separated by commas.
   */
  private Node parseMacroDefinition() throws IOException {
    int startLine = lineNumber();
    expect('(');
    skipSpace();
    String name = parseId("Macro name");
    ImmutableList.Builder parameterNames = ImmutableList.builder();
    while (true) {
      skipSpace();
      if (c == ')') {
        next();
        break;
      }
      if (c == ',') {
        next();
        skipSpace();
      }
      if (c != '$') {
        throw parseException("Macro parameters should look like $name");
      }
      next();
      parameterNames.add(parseId("Macro parameter name"));
    }
    ParseResult parsedBody =
        skipNewlineAndParseToStop(END_CLASS, () -> "parsing #macro starting on line " + startLine);
    if (!macros.containsKey(name)) {
      ImmutableList bodyNodes =
          ImmutableList.copyOf(SetSpacing.removeInitialSpaceBeforeSet(parsedBody.nodes));
      Node body = Node.cons(resourceName, startLine, bodyNodes);
      Macro macro = new Macro(startLine, name, parameterNames.build(), body);
      macros.put(name, macro);
    }
    return Node.emptyNode(resourceName, lineNumber());
  }

  /**
   * {@code #directives} that Velocity supports but we currently don't, and that don't have to be
   * followed by {@code (}. If we see one of these, we should complain, rather than just ignoring it
   * the way we would for {@code #random} or whatever. If it does have to be followed by
   * {@code (} then we will treat it as an undefined macro, which is fine.
   */
  private static final ImmutableSet UNSUPPORTED_VELOCITY_DIRECTIVES =
      ImmutableSet.of("stop");

  /**
   * Parses an identifier after {@code #} that is not one of the standard directives. The assumption
   * is that it is a call of a macro that is defined in the template. Macro definitions are
   * extracted from the template during parsing (and not during evaluation of the template as you
   * might expect). This means that a macro can be called before it is defined.
   *
   * 
{@code
   * # ()
   * # (  )
   * # (  )
   * # (  , )
   * ...
   * }
   */
  private Node parseMacroCall(String prefix, String directive) throws IOException {
    int startLine = lineNumber();
    StringBuilder sb = new StringBuilder(prefix).append(directive);
    while (Character.isWhitespace(c)) {
      sb.appendCodePoint(c);
      next();
    }
    if (c != '(') {
      if (UNSUPPORTED_VELOCITY_DIRECTIVES.contains(directive)) {
        throw parseException("EscapeVelocity does not currently support #" + directive);
      }
      // Velocity allows #foo, where #foo is not a directive and is not followed by `(` (so it can't
      // be a macro call). Then it is just plain text. BUT, sometimes but not always, Velocity will
      // reject #endfoo, a string beginning with #end. So we do always reject that.
      if (directive.startsWith("end")) {
        throw parseException("Unrecognized directive #" + directive);
      }
      return parsePlainText(sb);
    }
    next();
    ImmutableList.Builder parameterNodes = ImmutableList.builder();
    while (true) {
      skipSpace();
      if (c == ')') {
        next();
        break;
      }
      parameterNodes.add(parsePrimary());
      if (c == ',') {
        // The documentation doesn't say so, but you can apparently have an optional comma in
        // macro calls.
        next();
      }
    }
    Node bodyContent;
    if (prefix.equals("#")) {
      bodyContent = null;
    } else {
      ParseResult parseResult =
          skipNewlineAndParseToStop(
              END_CLASS, () -> "#@" + directive + " starting on line " + startLine);
      bodyContent = Node.cons(resourceName, startLine, parseResult.nodes);
    }
    return new DirectiveNode.MacroCallNode(
        resourceName, lineNumber(), directive, parameterNodes.build(), bodyContent);
  }

  private Node parseMacroCallWithBody() throws IOException {
    assert c == '@';
    next();
    if (!isAsciiLetter(c)) {
      return parsePlainText("#@");
    }
    String id = parseId("#@");
    return parseMacroCall("#@", id);
  }

  /**
   * Parses a line comment, which is {@code ##} followed by any number of characters
   * up to and including the next newline.
   */
  private Node parseLineComment() throws IOException {
    int lineNumber = lineNumber();
    while (c != '\n' && c != EOF) {
      next();
    }
    next();
    return new CommentNode(resourceName, lineNumber);
  }

  /**
   * Parses a block comment, which is {@code #*} followed by everything up to and
   * including the next {@code *#}.
   */
  private Node parseBlockComment() throws IOException {
    assert c == '*';
    int startLine = lineNumber();
    int lastC = '\0';
    next();
    // Consistently with Velocity, we do not make it an error if a #* comment is not closed.
    while (!(lastC == '*' && c == '#') && c != EOF) {
      lastC = c;
      next();
    }
    next(); // this may read EOF twice, which works
    return new CommentNode(resourceName, startLine);
  }

  /**
   * A node in the parse tree representing a comment. The only reason for recording comment nodes is
   * so that we can skip space between a comment and a following {@code #set}, to be compatible with
   * Velocity behaviour.
   */
  static class CommentNode extends Node {
    CommentNode(String resourceName, int lineNumber) {
      super(resourceName, lineNumber);
    }

    @Override
    void render(EvaluationContext context, StringBuilder output) {}
  }

  /**
   * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
   * {@code firstChar} is the first character of the plain text, and {@link #c} is the second
   * (if the plain text is more than one character).
   */
  private Node parsePlainText(int firstChar) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.appendCodePoint(firstChar);
    return parsePlainText(sb);
  }

  /**
   * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
   * {@code initialChars} are the first characters of the plain text, and {@link #c} is the
   * character after those.
   */
  private Node parsePlainText(String initialChars) throws IOException {
    return parsePlainText(new StringBuilder(initialChars));
  }

  private Node parsePlainText(StringBuilder sb) throws IOException {
    literal:
    while (true) {
      switch (c) {
        case EOF:
        case '$':
        case '#':
          break literal;
        default:
          // Just some random character.
      }
      sb.appendCodePoint(c);
      next();
    }
    return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
  }

  /**
   * Parses a reference, which is everything that can start with a {@code $}. References can
   * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
   * useful when text after the reference would otherwise be parsed as part of it. For example,
   * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
   * Of course {@code $xy} would be a reference to the variable {@code $xy}.
   * {@code
   *  -> $ |
   *                ${}
   *  ->  | !
   * }
   *
   * On entry to this method, {@link #c} is the character immediately after the {@code $}, or
   * the {@code !} if there is one.
   *
   * @param silent true if this is {@code $!}.
   */
  private Node parseReference(boolean silent) throws IOException {
    if (c == '{') {
      next();
      if (!isAsciiLetter(c)) {
        if (silent) {
          return parsePlainText("$!{");
        } else {
          return parsePlainText("${");
        }
      }
      ReferenceNode node = parseReferenceNoBrace(silent);
      expect('}');
      return node;
    } else {
      return parseReferenceNoBrace(silent);
    }
  }

  /**
   * Same as {@link #parseReference}, except it really must be a reference. A {@code $} in
   * normal text doesn't start a reference if it is not followed by an identifier. But in an
   * expression, for example in {@code #if ($x == 23)}, {@code $} must be followed by an
   * identifier.
   *
   * 
Velocity allows the {@code $!} syntax in these contexts, but it doesn't have any effect
   * since null values are allowed anyway.
   */
  private ReferenceNode parseRequiredReference() throws IOException {
    if (c == '!') {
      next();
    }
    if (c == '{') {
      next();
      ReferenceNode node = parseReferenceNoBrace(/* silent= */ false);
      expect('}');
      return node;
    } else {
      return parseReferenceNoBrace(/* silent= */ false);
    }
  }

  /**
   * Parses a reference, in the simple form without braces.
   * 
{@code
   *  -> 
   * }
   */
  private ReferenceNode parseReferenceNoBrace(boolean silent) throws IOException {
    String id = parseId("Reference");
    ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id, silent);
    return parseReferenceSuffix(lhs, silent);
  }

  /**
   * Parses the modifiers that can appear at the tail of a reference.
   * {@code
   *  ->  |
   *                        |
   *                       
   * }
   *
   * @param lhs the reference node representing the first part of the reference
   *     {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
   */
  private ReferenceNode parseReferenceSuffix(ReferenceNode lhs, boolean silent) throws IOException {
    switch (c) {
      case '.':
        return parseReferenceMember(lhs, silent);
      case '[':
        return parseReferenceIndex(lhs, silent);
      default:
        return lhs;
    }
  }

  /**
   * Parses a reference member, which is either a property reference like {@code $x.y} or a method
   * call like {@code $x.y($z)}.
   * {@code
   *  -> .
   *  ->  |
   *                                    (  )
   * }
   *
   * @param lhs the reference node representing what appears to the left of the dot, like the
   *     {@code $x} in {@code $x.foo} or {@code $x.foo()}.
   */
  private ReferenceNode parseReferenceMember(ReferenceNode lhs, boolean silent) throws IOException {
    assert c == '.';
    next();
    if (!isAsciiLetter(c)) {
      // We've seen something like `$foo.!`, so it turns out it's not a member after all.
      pushback('.');
      return lhs;
    }
    String id = parseId("Member");
    ReferenceNode reference;
    if (c == '(') {
      reference = parseReferenceMethodParams(lhs, id, silent);
    } else {
      reference = new MemberReferenceNode(lhs, id, silent);
    }
    return parseReferenceSuffix(reference, silent);
  }

  /**
   * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
   * {@code
   *  ->  |
   *                            
   *  ->  |
   *                                       , 
   * }
   *
   * @param lhs the reference node representing what appears to the left of the dot, like the
   *     {@code $x} in {@code $x.foo()}.
   */
  private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id, boolean silent)
      throws IOException {
    assert c == '(';
    nextNonSpace();
    ImmutableList.Builder args = ImmutableList.builder();
    if (c != ')') {
      args.add(parsePrimary(/* nullAllowed= */ true));
      while (c == ',') {
        nextNonSpace();
        args.add(parsePrimary(/* nullAllowed= */ true));
      }
      if (c != ')') {
        throw parseException("Expected )");
      }
    }
    assert c == ')';
    next();
    return new MethodReferenceNode(lhs, id, args.build(), silent);
  }

  /**
   * Parses an index suffix to a reference, like {@code $x[$i]}.
   * {@code
   *  -> [  ]
   * }
   *
   * @param lhs the reference node representing what appears to the left of the dot, like the
   *     {@code $x} in {@code $x[$i]}.
   */
  private ReferenceNode parseReferenceIndex(ReferenceNode lhs, boolean silent) throws IOException {
    assert c == '[';
    next();
    ExpressionNode index = parsePrimary();
    if (c != ']') {
      throw parseException("Expected ]");
    }
    next();
    ReferenceNode reference = new IndexReferenceNode(lhs, index, silent);
    return parseReferenceSuffix(reference, silent);
  }

  enum Operator {
    /**
     * A dummy operator with low precedence. When parsing subexpressions, we always stop when we
     * reach an operator of lower precedence than the "current precedence". For example, when
     * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
     * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
     * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
     * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
     * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
     */
    STOP("", 0),

    // If a one-character operator is a prefix of a two-character operator, like < and <=, then
    // the one-character operator must come first.
    OR("||", 1),
    AND("&&", 2),
    EQUAL("==", 3), NOT_EQUAL("!=", 3),
    LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
    PLUS("+", 5), MINUS("-", 5),
    TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);

    final String symbol;
    final int precedence;

    Operator(String symbol, int precedence) {
      this.symbol = symbol;
      this.precedence = precedence;
    }

    @Override
    public String toString() {
      return symbol;
    }

    /** True if this is an inequality operator, one of {@code < > <= >=}. */
    boolean isInequality() {
      // Slightly hacky way to check.
      return precedence == 4;
    }
  }

  /**
   * Maps a code point to the operators that begin with that code point. For example, maps
   * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
   */
  private static final ImmutableListMultimap CODE_POINT_TO_OPERATORS;
  static {
    ImmutableListMultimap.Builder builder = ImmutableListMultimap.builder();
    for (Operator operator : Operator.values()) {
      if (operator != Operator.STOP) {
        builder.put((int) operator.symbol.charAt(0), operator);
      }
    }
    CODE_POINT_TO_OPERATORS = builder.build();
  }

  /**
   * Parses an expression, which can occur within a directive like {@code #if} or {@code #set}.
   * Arbitrary expressions can't appear within a reference like {@code $x[$a + $b]} or
   * {@code $x.m($a + $b)}, consistent with Velocity.
   * {@code
   *  ->  |
   *                  || 
   *  ->  |
   *                      && 
   *  ->  |
   *                           
   *  -> == | !=
   *  ->  |
   *                              
   *  -> < | <= | > | >=
   *  ->  |
   *                            
   *  -> + | -
   *  ->  |
   *                                  
   *  -> * | / | %
   * }
   */
  private ExpressionNode parseExpression() throws IOException {
    ExpressionNode lhs = parseUnaryExpression();
    return new OperatorParser().parse(lhs, 1);
  }

  /**
   * An operator-precedence parser for the binary operations we understand. It implements an
   * algorithm from Wikipedia
   * that uses recursion rather than having an explicit stack of operators and values.
   */
  private class OperatorParser {
    /**
     * The operator we have just scanned, in the same way that {@link #c} is the character we have
     * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
     */
    private Operator currentOperator;

    OperatorParser() throws IOException {
      nextOperator();
    }

    /**
     * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
     * operators with precedence at least {@code minPrecedence}.
     *
     * @return the parsed subexpression
     */
    ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
      while (currentOperator.precedence >= minPrecedence) {
        Operator operator = currentOperator;
        ExpressionNode rhs = parseUnaryExpression();
        nextOperator();
        while (currentOperator.precedence > operator.precedence) {
          rhs = parse(rhs, currentOperator.precedence);
        }
        lhs = new BinaryExpressionNode(lhs, operator, rhs);
      }
      return lhs;
    }

    /**
     * Updates {@link #currentOperator} to be an operator read from the input,
     * or {@link Operator#STOP} if there is none.
     */
    private void nextOperator() throws IOException {
      skipSpace();
      switch (c) {
        case 'a':
          wordOperator("and", Operator.AND);
          return;
        case 'o':
          wordOperator("or", Operator.OR);
          return;
        default: // this will fail later, but just stopping the expression here is fine
      }
      ImmutableList possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
      if (possibleOperators.isEmpty()) {
        currentOperator = Operator.STOP;
        return;
      }
      char firstChar = Chars.checkedCast(c);
      next();
      Operator operator = null;
      for (Operator possibleOperator : possibleOperators) {
        if (possibleOperator.symbol.length() == 1) {
          Verify.verify(operator == null);
          operator = possibleOperator;
        } else if (possibleOperator.symbol.charAt(1) == c) {
          next();
          operator = possibleOperator;
        }
      }
      if (operator == null) {
        throw parseException(
            "Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar);
      }
      currentOperator = operator;
    }

    private void wordOperator(String symbol, Operator operator) throws IOException {
      String id = parseId("");
      if (id.equals(symbol)) {
        currentOperator = operator;
      } else {
        throw parseException("Expected '" + symbol + "' but was '" + id + "'");
      }
    }
  }

  /**
   * Parses an expression not containing any operators (except inside parentheses).
   * {@code
   *  ->  |
   *                       (  ) |
   *                       ! 
   * }
   */
  private ExpressionNode parseUnaryExpression() throws IOException {
    skipSpace();
    ExpressionNode node;
    if (c == '(') {
      nextNonSpace();
      node = parseExpression();
      expect(')');
      skipSpace();
      return node;
    } else if (c == '!') {
      next();
      node = new NotExpressionNode(parseUnaryExpression());
      skipSpace();
      return node;
    } else {
      return parsePrimary();
    }
  }


  /**
   * Parses an expression containing only literals or references.
   * {@code
   *  ->  |
   *               |
   *               |
   *               |
   *              
   * }
   */
  private ExpressionNode parsePrimary() throws IOException {
    return parsePrimary(false);
  }

  private ExpressionNode parsePrimary(boolean nullAllowed) throws IOException {
    skipSpace();
    ExpressionNode node;
    if (c == '$') {
      next();
      node = parseRequiredReference();
    } else if (c == '"') {
      node = parseStringLiteral('"', true);
    } else if (c == '\'') {
      node = parseStringLiteral('\'', false);
    } else if (c == '-') {
      // Velocity does not have a negation operator. If we see '-' it must be the start of a
      // negative integer literal.
      next();
      node = parseIntLiteral("-");
    } else if (c == '[') {
      node = parseListLiteral();
    } else if (isAsciiDigit(c)) {
      node = parseIntLiteral("");
    } else if (isAsciiLetter(c)) {
      node = parseNotOrBooleanOrNullLiteral(nullAllowed);
    } else {
      throw parseException("Expected a reference or a literal");
    }
    skipSpace();
    return node;
  }

  /**
   * Parses a list or range literal.
   *
   * {@code
   *  ->  | 
   *  -> [ ]
   *  -> [  
   *  ->  | 
   *  -> ..  ]
   *  ->  | ,  
   *  -> ]
   * }
   */
  private ExpressionNode parseListLiteral() throws IOException {
    assert c == '[';
    nextNonSpace();
    if (c == ']') {
      next();
      return new ListLiteralNode(resourceName, lineNumber(), ImmutableList.of());
    }
    ExpressionNode first = parsePrimary(false);
    if (c == '.') {
      return parseRangeLiteral(first);
    } else {
      return parseRemainderOfListLiteral(first);
    }
  }

  private ExpressionNode parseRangeLiteral(ExpressionNode first) throws IOException {
    assert c == '.';
    next();
    if (c != '.') {
      throw parseException("Expected two dots (..) not just one");
    }
    nextNonSpace();
    ExpressionNode last = parsePrimary(false);
    if (c != ']') {
      throw parseException("Expected ] at end of range literal");
    }
    nextNonSpace();
    return new RangeLiteralNode(resourceName, lineNumber(), first, last);
  }

  private ExpressionNode parseRemainderOfListLiteral(ExpressionNode first) throws IOException {
    ImmutableList.Builder builder = ImmutableList.builder();
    builder.add(first);
    while (c == ',') {
      next();
      builder.add(parsePrimary(false));
    }
    if (c != ']') {
      throw parseException("Expected ] at end of list literal");
    }
    next();
    return new ListLiteralNode(resourceName, lineNumber(), builder.build());
  }

  private static class RangeLiteralNode extends ExpressionNode {
    private final ExpressionNode first;
    private final ExpressionNode last;

    RangeLiteralNode(
        String resourceName, int lineNumber, ExpressionNode first, ExpressionNode last) {
      super(resourceName, lineNumber);
      this.first = first;
      this.last = last;
    }

    @Override
    public String toString() {
      return "[" + first + ".." + last + "]";
    }

    @Override
    Object evaluate(EvaluationContext context, boolean undefinedIsFalse) {
      int from = first.intValue(context);
      int to = last.intValue(context);
      ImmutableSortedSet set =
          (from <= to)
              ? ContiguousSet.closed(from, to)
              : ContiguousSet.closed(to, from).descendingSet();
      return new ForwardingSortedSet() {
        @Override
        protected ImmutableSortedSet delegate() {
          return set;
        }

        @Override
        public String toString() {
          // ContiguousSet returns [1..3] whereas Velocity uses [1, 2, 3].
          return set.asList().toString();
        }
      };
    }
  }

  private static class ListLiteralNode extends ExpressionNode {
    private final ImmutableList elements;

    ListLiteralNode(String resourceName, int lineNumber, ImmutableList elements) {
      super(resourceName, lineNumber);
      this.elements = elements;
    }

    @Override
    public String toString() {
      return "[" + Joiner.on(", ").join(elements) + "]";
    }

    @Override
    Object evaluate(EvaluationContext context, boolean undefinedIsFalse) {
      // We can't use ImmutableList because there can be nulls.
      List