com.google.sitebricks.compiler.Parsing Maven / Gradle / Ivy

Go to download
package com.google.sitebricks.compiler;


import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import com.google.inject.Inject;
import com.google.sitebricks.conversion.TypeConverter;
import com.google.sitebricks.rendering.Strings;

/**
 * Utility tokenizes text into expressions and raw text, and provides other
 * text parsing tools.
 *
 * @author Dhanji R. Prasanna (dhanji at gmail com)
 * @since 1.0
 */
public class Parsing {

  private static TypeConverter converter;
  
  private Parsing() {
  }

  //converts comma-separated name/value pairs into expression/variable bindings
  public static Map toBindMap(String expression) {
    if (Strings.empty(expression))
      return Collections.emptyMap();

    Deque escapes = new ArrayDeque();
    List pairs = new ArrayList();
    int index = 0;
    for (int i = 0; i < expression.length(); i++) {
      char c = expression.charAt(i);

      if (('"' == c && (escapes.isEmpty() || escapes.peek().charValue() != c))
          || '[' == c || '{' == c || '(' == c) {
        escapes.push(c);
      } else if (('"' == c && escapes.peek().charValue() == c) || ']' == c || '}' == c || ')' == c) {
        escapes.pop();
      }

      if (escapes.isEmpty() && ',' == c) {
        if (index < i)
          pairs.add(expression.substring(index, i));

        //skip comma & whitespace if any
        for (; i < expression.length() && (',' == expression.charAt(i) || ' ' == expression.charAt(i));)
          i++;

        //reset new start index
        index = i;
      }

    }

    //add last pair if needed
    if (index < expression.length()) {

      //chew up leading comma & whitespace if any
      //noinspection StatementWithEmptyBody
      for (; ',' == expression.charAt(index) || ' ' == expression.charAt(index); index++) ;

      final String pair = expression.substring(index, expression.length()).trim();

      //only consider this a pair if it has something in it!
      if (pair.length() > 1)
        pairs.add(pair);
    }

    //nice to preserve insertion order
    final Map map = new LinkedHashMap();
    for (String pair : pairs) {
      final String[] nameAndValue = pair.split("=", 2);

      //do some validation
      if (nameAndValue.length != 2)
        throw new IllegalArgumentException("Invalid parameter binding format: " + pair);

      Strings.nonEmpty(nameAndValue[0], "Cannot have an empty left hand side target parameter: " + pair);
      Strings.nonEmpty(nameAndValue[1], "Must provide a non-empty right hand side expression: " + pair);

      map.put(nameAndValue[0].trim(), nameAndValue[1].trim());
    }

    return Collections.unmodifiableMap(map);
  }


  //tokenizes text into raw text chunks interspersed with expression chunks
  public static List tokenize(String warpRawText, EvaluatorCompiler compiler) throws ExpressionCompileException {
    ArrayList tokens = new ArrayList();

    //simple state machine to iterate the text and break it up into chunks
    char[] characters = warpRawText.toCharArray();

    StringBuilder token = new StringBuilder();
    TokenizerState state = TokenizerState.READING_TEXT;
    for (int i = 0; i < characters.length; i++) {

      //test for start of an expression
      if (TokenizerState.READING_TEXT.equals(state)) {
        if ('$' == characters[i]) {
          if ('{' == characters[i + 1]) {
            //YES it is the start of an expr, so close up the existing token & start a new one
            if (token.length() > 0) {
              tokens.add(CompiledToken.text(token.toString()));
              token = new StringBuilder();
            }

            state = TokenizerState.READING_EXPRESSION;
          }
        }
      }

      //test for end of an expr
      if (TokenizerState.READING_EXPRESSION.equals(state)) {
        if ('}' == characters[i]) {
          //YES it is the end of the expr, so close it up and start a new token
          token.append(characters[i]);

          tokens.add(CompiledToken.expression(token.toString(), compiler));
          token = new StringBuilder();

          state = TokenizerState.READING_TEXT;
          continue;   //dont add the trailing } to the new text field
        }
      }

      //add characters to the token normally
      token.append(characters[i]);
    }

    //should never be in reading expr mode at this point
    if (TokenizerState.READING_EXPRESSION.equals(state))
      throw new IllegalStateException("Error. Expression was not terminated properly: " + token.toString());

    //add last token read if it has any content (is always text)
    if (token.length() > 0)
      tokens.add(CompiledToken.text(token.toString()));

    // Pack list capacity to size (saves memory).
    tokens.trimToSize();

    return tokens;
  }

  public static String stripExpression(String expr) {
    return expr.substring(2, expr.length() - 1);
  }

  public static boolean isExpression(String attribute) {
    return attribute.startsWith("${");
  }


  //dont pass null or empty string or 1 char
  public static String stripQuotes(String var) {
    return var.substring(1, var.length() - 1);
  }

  /**
   * Remember this method is not so much about verifying something is XML as it is
   * verifying that something is a NON-Xml template. In other words, read this as
   * whether or not we should *treat* something as XML, then complain that it's malformed
   * later (if necessary).
   *
   * @param template A fully loaded template as a string.
   * @return Returns true if this template should be treated as an XML template.
   *         Templates that are not XML *MUST* begin with a {@code @Meta} annotation.
   */
  public static boolean treatAsXml(String template) {
    return 0 > indexOfMeta(template);
  }

  /**
   * Converts the given token stream into a rendered output evaluating each expression
   * against the provided context object which may be a regular Java POJO with getters
   * and setters or a map of string/value pairs.
   */
  public static String render(List tokens, Map arguments) {
    StringBuilder builder = new StringBuilder();
    for (Token token : tokens) {
      builder.append(token.render(arguments));
    }

    return builder.toString();
  }

  public static int indexOfMeta(String template) {
    //do a manual character scan (coz indexOf(regex) will be O(n) runtime)
    for (int i = 0; i < template.length(); i++) {
      char c = template.charAt(i);

      //skip leading whitespace
      if (isWhitespace(c))
        continue;

      //Does this template begin with @Meta or @Meta(  --> then it is *not* XML
      if ('@' == c) {
        final char trailing = template.charAt(i + 5);

        if ("Meta".equals(template.substring(i + 1, i + 5))

            && ('(' == trailing || isWhitespace(trailing)))

          return i;
      }

      //do not go past the first non-whitespace character (short-circuit)
      return -1;
    }

    //treat everything else as XML
    return -1;
  }

  private static boolean isWhitespace(char c) {
    return ' ' == c || '\n' == c || '\r' == c || '\t' == c;
  }

  private static enum TokenizerState {
    READING_TEXT, READING_EXPRESSION
  }

  //URI test regex: (([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?
  //Taken from stylus studio message board http://www.stylusstudio.com/xmldev/200108/post10890.html

  private final static Pattern URI_REGEX
      = Pattern.compile("(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?");

//      "(([a-zA-Z][0-9a-zA-Z+\\\\-\\\\.]*:)?/{0,2}[0-9a-zA-Z;" +
//      "/?:@&=+$\\\\.\\\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\\\.\\\\-_!~*'()%]+)?");

  //TODO
  private final static Pattern TEMPLATE_URI_PATTERN = Pattern.compile("(([a-zA-Z][0-9a-zA-Z+\\\\-\\\\.]*:)?/{0,2}[0-9a-zA-Z;" +
      "/?:@&=+$\\\\.\\\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\\\.\\\\-_!~*'()%]+)?");


  //less expensive method tests whether string is a valid URI
  public static boolean isValidURI(String uri) {
    return (null != uri)
        && URI_REGEX
        .matcher(uri)
        .matches();
  }
  
  public static TypeConverter getTypeConverter() {
    return Parsing.converter;
  }

  @Inject
  public static void setTypeConverter(TypeConverter converter) {
    Parsing.converter = converter;
  }
}