All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.com.integradev.delphi.preprocessor.directive.expression.ExpressionParser Maven / Gradle / Ivy

The newest version!
/*
 * Sonar Delphi Plugin
 * Copyright (C) 2019 Integrated Application Development
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
 */
package au.com.integradev.delphi.preprocessor.directive.expression;

import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.AND;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.COMMENT;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.DIRECTIVE;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.DIV;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.DIVIDE;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.EQUALS;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.GREATER_THAN;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.GREATER_THAN_EQUAL;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.IN;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.LESS_THAN;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.LESS_THAN_EQUAL;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.MINUS;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.MOD;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.MULTIPLY;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.NOT;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.NOT_EQUALS;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.OR;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.PLUS;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.SHL;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.SHR;
import static au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType.XOR;
import static java.util.Objects.requireNonNullElse;

import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode;
import au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;

public class ExpressionParser {
  public static class ExpressionParserError extends RuntimeException {
    ExpressionParserError(String message, @Nullable Token got) {
      super(message + " Got '" + requireNonNullElse(got, END_OF_INPUT).getText() + "'");
    }
  }

  private static final Token END_OF_INPUT = new Token(TokenType.UNKNOWN, "");

  private static final ImmutableSet RELATIONAL_OPERATORS =
      Sets.immutableEnumSet(
          EQUALS, GREATER_THAN, LESS_THAN, GREATER_THAN_EQUAL, LESS_THAN_EQUAL, NOT_EQUALS, IN);

  private static final ImmutableSet ADD_OPERATORS =
      Sets.immutableEnumSet(PLUS, MINUS, OR, XOR);

  private static final ImmutableSet MULTIPLICATION_OPERATORS =
      Sets.immutableEnumSet(MULTIPLY, DIVIDE, DIV, MOD, AND, SHL, SHR);

  private static final ImmutableSet UNARY_OPERATORS =
      Sets.immutableEnumSet(PLUS, MINUS, NOT);

  private final TextBlockLineEndingMode textBlockLineEndingMode;

  // Parser state
  private List tokens;
  private int position;

  public ExpressionParser(TextBlockLineEndingMode textBlockLineEndingMode) {
    this.textBlockLineEndingMode = textBlockLineEndingMode;
  }

  public Expression parse(List tokens) {
    this.tokens = tokens;
    this.position = 0;

    return parseExpression();
  }

  private Token peekToken() {
    while (position < tokens.size()) {
      if (tokens.get(position).getType() == COMMENT) {
        ++position;
        continue;
      }
      return tokens.get(position);
    }
    return END_OF_INPUT;
  }

  private Token getToken() {
    Token token = peekToken();
    if (token != END_OF_INPUT) {
      ++position;
    }
    return token;
  }

  private Expression parseExpression() {
    return parseRelational();
  }

  private Expression parseRelational() {
    Expression result = parseAddition();
    Token token;

    while ((token = peekToken()) != END_OF_INPUT) {
      TokenType type = token.getType();
      if (!RELATIONAL_OPERATORS.contains(type)) {
        break;
      }

      getToken();
      result = Expressions.binary(result, type, parseAddition());
    }

    return result;
  }

  private Expression parseAddition() {
    Expression result = parseMultiplication();
    Token token;

    while ((token = peekToken()) != END_OF_INPUT) {
      TokenType type = token.getType();
      if (!ADD_OPERATORS.contains(type)) {
        break;
      }

      getToken();
      result = Expressions.binary(result, type, parseMultiplication());
    }

    return result;
  }

  private Expression parseMultiplication() {
    Expression result = parseUnary();
    Token token;

    while ((token = peekToken()) != END_OF_INPUT) {
      TokenType type = token.getType();
      if (!MULTIPLICATION_OPERATORS.contains(type)) {
        break;
      }

      getToken();
      result = Expressions.binary(result, type, parseUnary());
    }

    return result;
  }

  private Expression parseUnary() {
    Token token = peekToken();

    if (token != END_OF_INPUT) {
      TokenType type = token.getType();
      if (UNARY_OPERATORS.contains(type)) {
        getToken();
        return Expressions.unary(type, parseUnary());
      }
    }

    return parsePrimary();
  }

  private Expression parsePrimary() {
    Token token = peekToken();
    switch (token.getType()) {
      case STRING:
      case MULTILINE_STRING:
      case INTEGER:
      case REAL:
        return parseLiteral();

      case IDENTIFIER:
        return parseIdentifier();

      case LBRACKET:
        return parseSet();

      case LPAREN:
        return parseSubExpression();

      default:
        // Do nothing
    }

    if (token.getType() == DIRECTIVE) {
      // HACK:
      // We don't handle nested directives. When we find one, we just dumbly return a False value.
      //
      // Problem:
      // - We separate parsing into 3 steps:
      //   - Lexing
      //   - Preprocessing
      //   - Parsing
      // - As a result we have 2 sets of lexers/parsers, an ANTLR-generated
      //   `DelphiLexer`/`DelphiParser` and a hand-rolled `CompilerDirectiveParser` with
      //   corresponding `ExpressionLexer`/`ExpressionParser`. The compiler directive parser is used
      //   as part of the preprocessing step to help generate a correct list of tokens for parsing.
      // - The preprocessor is tailored towards DelphiTokens and not the constant expression
      //   tokens generated by `ExpressionLexer`. Code-share would be quite complicated and require
      //   a lot of effort, and the result still wouldn't be fully accurate.
      //
      // To handle this accurately, we need to:
      // - Merge the 3 parsing steps into a hand-rolled "DelphiScanner":
      //   - Lex tokens on-demand instead of doing it upfront.
      //   - Evaluate compiler directives as they're lexed from the input data instead of afterward.
      //   - Perform symbol table construction simultaneously instead of afterward.
      // - When evaluating compiler directives, parse directive expressions using the normal
      //   Delphi expression parser. That will give us nested directive handling for free!
      //
      // For more information, see:
      //   https://github.com/integrated-application-development/sonar-delphi/issues/261
      //
      return Expressions.nameReference("False");
    }

    throw new ExpressionParserError("Expected expression.", token);
  }

  private Expression parseLiteral() {
    Token token = getToken();

    String text;
    switch (token.getType()) {
      case STRING:
        text = evaluateString(token.getText());
        break;
      case MULTILINE_STRING:
        text = evaluateMultilineString(token.getText(), textBlockLineEndingMode);
        break;
      default:
        text = token.getText();
    }

    return Expressions.literal(token.getType(), text);
  }

  private static String evaluateString(String text) {
    text = text.substring(1, text.length() - 1);
    text = text.replace("''", "'");
    return text;
  }

  private String evaluateMultilineString(String text, TextBlockLineEndingMode lineEndingMode) {
    Deque lines = text.lines().collect(Collectors.toCollection(ArrayDeque::new));

    lines.removeFirst();

    String last = lines.removeLast();
    String indentation = readLeadingWhitespace(last);

    String lineEnding;
    switch (lineEndingMode) {
      case CR:
        lineEnding = "\r";
        break;
      case LF:
        lineEnding = "\n";
        break;
      default:
        lineEnding = "\r\n";
    }

    return lines.stream()
        .map(line -> StringUtils.removeStart(line, indentation))
        .collect(Collectors.joining(lineEnding));
  }

  private static String readLeadingWhitespace(String input) {
    StringBuilder result = new StringBuilder();
    for (int i = 0; i < input.length(); ++i) {
      char c = input.charAt(i);
      if (c <= 0x20 || c == 0x3000) {
        result.append(c);
      } else {
        break;
      }
    }
    return result.toString();
  }

  private Expression parseIdentifier() {
    StringBuilder identifier = new StringBuilder();
    while (peekToken().getType() == TokenType.IDENTIFIER) {
      identifier.append(getToken().getText());

      Token peek = peekToken();
      if (peek.getType() != TokenType.DOT) {
        if (peek.getType() == TokenType.LPAREN) {
          String invocationName = identifier.toString();
          List argumentList = parseArgumentList();
          return Expressions.invocation(invocationName, argumentList);
        }
        break;
      }

      identifier.append(getToken().getText());
    }
    return Expressions.nameReference(identifier.toString());
  }

  private List parseArgumentList() {
    List arguments = new ArrayList<>();
    getToken();

    Token peek = peekToken();
    if (peek.getType() == TokenType.RPAREN) {
      getToken();
      return arguments;
    }

    while (true) {
      arguments.add(parseExpression());
      Token token = getToken();
      if (token.getType() != TokenType.COMMA) {
        TokenType type = token.getType();
        if (type == TokenType.RPAREN) {
          return arguments;
        }
        throw new ExpressionParserError("Expected ',' or ')' in argument list.", token);
      }
    }
  }

  private Expression parseSet() {
    getToken();

    Token peek = peekToken();
    if (peek.getType() == TokenType.RBRACKET) {
      getToken();
      return Expressions.emptySet();
    }

    Set elements = new HashSet<>();

    while (true) {
      elements.add(parseExpression());
      Token token = getToken();
      if (token.getType() != TokenType.COMMA) {
        TokenType type = token.getType();
        if (type == TokenType.RBRACKET) {
          return Expressions.set(elements);
        }
        throw new ExpressionParserError("Expected ',' or ']' in set literal.", token);
      }
    }
  }

  private Expression parseSubExpression() {
    getToken();

    Expression result = parseExpression();
    Token token = getToken();

    if (token.getType() == TokenType.RPAREN) {
      return result;
    }

    throw new ExpressionParserError("Expected ')' after expression.", token);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy