com.google.auto.value.processor.escapevelocity.Parser Maven / Gradle / Ivy

Go to download
/*
 * Copyright (C) 2015 Google, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.google.auto.value.processor.escapevelocity;

import com.google.auto.value.processor.escapevelocity.DirectiveNode.SetNode;
import com.google.auto.value.processor.escapevelocity.ExpressionNode.BinaryExpressionNode;
import com.google.auto.value.processor.escapevelocity.ExpressionNode.NotExpressionNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.IndexReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.MemberReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.MethodReferenceNode;
import com.google.auto.value.processor.escapevelocity.ReferenceNode.PlainReferenceNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.CommentTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ElseIfTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ElseTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.EndTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.EofNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.ForEachTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.IfTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.MacroDefinitionTokenNode;
import com.google.auto.value.processor.escapevelocity.TokenNode.NestedTokenNode;
import com.google.common.base.CharMatcher;
import com.google.common.base.Verify;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.primitives.Chars;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;

/**
 * A parser that reads input from the given {@link Reader} and parses it to produce a
 * {@link Template}.
 *
 * @author [email protected] (Éamonn McManus)
 */
class Parser {
  private static final int EOF = -1;

  private final LineNumberReader reader;
  private final String resourceName;
  private final Template.ResourceOpener resourceOpener;

  /**
   * The invariant of this parser is that {@code c} is always the next character of interest.
   * This means that we almost never have to "unget" a character by reading too far. For example,
   * after we parse an integer, {@code c} will be the first character after the integer, which is
   * exactly the state we will be in when there are no more digits.
   *
   * Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}.
   */
  private int c;

  /**
   * A single character of pushback. If this is not negative, the {@link #next()} method will
   * return it instead of reading a character.
   */
  private int pushback = -1;

  Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
      throws IOException {
    this.reader = new LineNumberReader(reader);
    this.reader.setLineNumber(1);
    next();
    this.resourceName = resourceName;
    this.resourceOpener = resourceOpener;
  }

  /**
   * Parse the input completely to produce a {@link Template}.
   *
   * 
Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
   * entire references such as 
   *    ${x.foo()[23]}
   * 
or entire directives such as   *    #set ($x = $y + $z)
   * 
But tokens do not span complex constructs. For example,   *    #if ($x == $y) something #end
   * 
is three tokens:   *    #if ($x == $y)
   *    (literal text " something ")
   *   #end
   * 
   *
   * The second phase then takes the sequence of tokens and constructs a parse tree out of it.
   * Some nodes in the parse tree will be unchanged from the token sequence, such as the 
   *    ${x.foo()[23]}
   *    #set ($x = $y + $z)
   *  examples above. But a construct such as the {@code #if ... #end} mentioned above will
   * become a single IfNode in the parse tree in the second phase.
   *
   * The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
   * top level, there can be arbitrary literal text; references like ${x.foo()}; and
   * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
   * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
   * the inside of 
   *    #if ($x == $a + $b)
   *  as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
   * parser/lexer combination, where the lexer would need to switch between these two modes, we
   * replace the lexer with an ad-hoc parser that is the first phase described above, and we
   * define a simple parser over the resultant tokens that is the second phase.
   */
  Template parse() throws IOException {
    ImmutableList tokens = parseTokens();
    return new Reparser(tokens).reparse();
  }

  private ImmutableList parseTokens() throws IOException {
    ImmutableList.Builder tokens = ImmutableList.builder();
    Node token;
    do {
      token = parseNode();
      tokens.add(token);
    } while (!(token instanceof EofNode));
    return tokens.build();
  }

  private int lineNumber() {
    return reader.getLineNumber();
  }

  /**
   * Gets the next character from the reader and assigns it to {@code c}. If there are no more
   * characters, sets {@code c} to {@link #EOF} if it is not already.
   */
  private void next() throws IOException {
    if (c != EOF) {
      if (pushback < 0) {
        c = reader.read();
      } else {
        c = pushback;
        pushback = -1;
      }
    }
  }

  /**
   * Saves the current character {@code c} to be read again, and sets {@code c} to the given
   * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}.
   * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have
   * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent
   * calls to {@code next()} will continue reading from {@link #reader}. So the pushback
   * essentially puts us back in the state we were in before we read {@code y}.
   */
  private void pushback(int c1) {
    pushback = c;
    c = c1;
  }

  /**
   * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
   * there are no more characters.
   */
  private void skipSpace() throws IOException {
    while (Character.isWhitespace(c)) {
      next();
    }
  }

  /**
   * Gets the next character from the reader, and if it is a space character, keeps reading until
   * a non-space character is found.
   */
  private void nextNonSpace() throws IOException {
    next();
    skipSpace();
  }

  /**
   * Skips any space in the reader, and then throws an exception if the first non-space character
   * found is not the expected one. Sets {@code c} to the first character after that expected one.
   */
  private void expect(char expected) throws IOException {
    skipSpace();
    if (c == expected) {
      next();
    } else {
      throw parseException("Expected " + expected);
    }
  }

  /**
   * Parses a single node from the reader, as part of the first parsing phase.
   * {@code
   *