All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.com.integradev.delphi.preprocessor.directive.expression.ExpressionLexer Maven / Gradle / Ivy

The newest version!
/*
 * Sonar Delphi Plugin
 * Copyright (C) 2019 Integrated Application Development
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
 */
package au.com.integradev.delphi.preprocessor.directive.expression;

import au.com.integradev.delphi.preprocessor.directive.expression.Token.TokenType;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Predicate;
import javax.annotation.Nullable;

public class ExpressionLexer {
  public static class ExpressionLexerError extends RuntimeException {
    ExpressionLexerError(String message) {
      super(message);
    }
  }

  private static final Map SYNTAX_CHARACTERS =
      Map.of(
          '.', TokenType.DOT,
          ',', TokenType.COMMA,
          '(', TokenType.LPAREN,
          ')', TokenType.RPAREN,
          '[', TokenType.LBRACKET,
          ']', TokenType.RBRACKET);

  private static final Map OPERATOR_CHARACTERS =
      Map.of(
          '=', TokenType.EQUALS,
          '<', TokenType.LESS_THAN,
          '>', TokenType.GREATER_THAN,
          '+', TokenType.PLUS,
          '-', TokenType.MINUS,
          '*', TokenType.MULTIPLY,
          '/', TokenType.DIVIDE);

  private static final Map OPERATOR_IDENTIFIERS =
      new TreeMap<>(String.CASE_INSENSITIVE_ORDER);

  static {
    OPERATOR_IDENTIFIERS.put("div", TokenType.DIV);
    OPERATOR_IDENTIFIERS.put("mod", TokenType.MOD);
    OPERATOR_IDENTIFIERS.put("shl", TokenType.SHL);
    OPERATOR_IDENTIFIERS.put("shr", TokenType.SHR);
    OPERATOR_IDENTIFIERS.put("in", TokenType.IN);
    OPERATOR_IDENTIFIERS.put("not", TokenType.NOT);
    OPERATOR_IDENTIFIERS.put("and", TokenType.AND);
    OPERATOR_IDENTIFIERS.put("or", TokenType.OR);
    OPERATOR_IDENTIFIERS.put("xor", TokenType.XOR);
  }

  private static final char END_OF_INPUT = '\0';
  private String data;
  private int position;
  private final NumberReader numberReader = new NumberReader();

  public List lex(String data) {
    this.data = data;
    this.position = 0;

    List result = new ArrayList<>();
    Token token;

    while ((token = readToken()) != null) {
      result.add(token);
    }

    return result;
  }

  private char peekChar() {
    return peekChar(0);
  }

  private char peekChar(int offset) {
    if (position + offset < data.length()) {
      return data.charAt(position + offset);
    }
    return END_OF_INPUT;
  }

  private char getChar() {
    char character = peekChar();
    if (character != END_OF_INPUT) {
      ++position;
    }
    return character;
  }

  @Nullable
  private Token readToken() {
    char character;

    // Trim out whitespace
    while ((character = peekChar()) != END_OF_INPUT) {
      if (!Character.isWhitespace(character)) {
        break;
      }
      ++position;
    }

    if (character == END_OF_INPUT) {
      return null;
    } else if (numberReader.isNumberStart(character)) {
      return readNumber();
    } else if (Character.isLetter(character) || character == '_') {
      return readIdentifier();
    } else if (character == '\'') {
      return readString();
    } else if (character == '/' && peekChar(1) == '/') {
      return readLineComment();
    } else if (character == '{' || (character == '(' && peekChar(1) == '*')) {
      return readMultilineComment();
    } else if (OPERATOR_CHARACTERS.containsKey(character)) {
      return readOperator();
    } else if (SYNTAX_CHARACTERS.containsKey(character)) {
      return readSyntaxToken();
    } else {
      throw new ExpressionLexerError("Unexpected character: '" + character + "'");
    }
  }

  private Token readNumber() {
    return numberReader.read();
  }

  private Token readOperator() {
    char character = getChar();
    String characterString = Character.toString(character);

    if (character == '<' && peekChar() == '>') {
      return new Token(TokenType.NOT_EQUALS, characterString + getChar());
    } else if (character == '<' && peekChar() == '=') {
      return new Token(TokenType.LESS_THAN_EQUAL, characterString + getChar());
    } else if (character == '>' && peekChar() == '=') {
      return new Token(TokenType.GREATER_THAN_EQUAL, characterString + getChar());
    } else {
      return new Token(OPERATOR_CHARACTERS.get(character), characterString);
    }
  }

  private Token readSyntaxToken() {
    char character = getChar();
    String characterString = Character.toString(character);

    if (character == '(' && peekChar() == '.') {
      return new Token(TokenType.LBRACKET, characterString + getChar());
    } else if (character == '.' && peekChar() == ')') {
      return new Token(TokenType.RBRACKET, characterString + getChar());
    } else {
      return new Token(SYNTAX_CHARACTERS.get(character), Character.toString(character));
    }
  }

  private Token readIdentifier() {
    char character;
    StringBuilder identifier = new StringBuilder();

    while ((character = peekChar()) != END_OF_INPUT) {
      if (character == '_' || Character.isLetter(character) || Character.isDigit(character)) {
        identifier.append(getChar());
      } else {
        break;
      }
    }

    String text = identifier.toString();
    TokenType type = OPERATOR_IDENTIFIERS.getOrDefault(text, TokenType.IDENTIFIER);

    return new Token(type, text);
  }

  private Token readString() {
    Token result = readMultilineString();
    if (result == null) {
      result = readSingleLineString();
    }
    return result;
  }

  private Token readSingleLineString() {
    StringBuilder value = new StringBuilder();
    value.append(getChar());

    char character;

    while ((character = getChar()) != END_OF_INPUT && !isNewLine(character)) {
      value.append(character);
      if (character == '\'') {
        if (peekChar() == '\'') {
          value.append(getChar());
        } else {
          break;
        }
      }
    }

    return new Token(TokenType.STRING, value.toString());
  }

  private Token readMultilineString() {
    int lookahead = lookaheadMultilineString(0);
    if (lookahead == 0) {
      return null;
    }

    String value = data.substring(position, position + lookahead);
    position += lookahead;

    return new Token(TokenType.MULTILINE_STRING, value);
  }

  private int lookaheadString(int i) {
    int offset = lookaheadMultilineString(i);
    if (offset == 0) {
      offset = lookaheadSingleLineString(i);
    }
    return i + offset;
  }

  private int lookaheadMultilineString(int i) {
    int startQuotes = lookaheadSingleQuotes(i);
    if (startQuotes >= 3 && (startQuotes % 2 == 1) && isNewLine(peekChar(i + startQuotes))) {
      int offset = startQuotes - 1;
      while (true) {
        switch (peekChar(i + ++offset)) {
          case '\'':
            int quotes = Math.min(startQuotes, lookaheadSingleQuotes(i + offset));
            offset += quotes;
            if (quotes == startQuotes) {
              return offset;
            }
            break;

          case END_OF_INPUT:
            return 0;

          default:
            // do nothing
        }
      }
    }
    return 0;
  }

  private int lookaheadSingleQuotes(int i) {
    int result = 0;
    while (peekChar(i++) == '\'') {
      ++result;
    }
    return result;
  }

  private int lookaheadSingleLineString(int i) {
    int offset = 1;

    char character;

    while ((character = peekChar(i + offset)) != END_OF_INPUT && !isNewLine(character)) {
      ++offset;
      if (character == '\'') {
        if (peekChar(i + offset) == '\'') {
          ++offset;
        } else {
          break;
        }
      }
    }

    return offset;
  }

  private Token readLineComment() {
    StringBuilder value = new StringBuilder();
    char character;

    while ((character = peekChar()) != END_OF_INPUT) {
      if (isNewLine(character)) {
        break;
      }
      value.append(character);
      ++position;
    }

    return new Token(TokenType.COMMENT, value.toString());
  }

  private Token readMultilineComment() {
    int offset = 1;
    Token.TokenType type = TokenType.COMMENT;
    String end;

    if (peekChar() == '(') {
      ++offset;
      end = "*)";
    } else {
      end = "}";
    }

    if (peekChar(offset) == '$') {
      type = TokenType.DIRECTIVE;
    }

    int lookahead = lookaheadMultilineComment(end, offset);
    if (lookahead == 0) {
      return null;
    }

    String value = data.substring(position, position + lookahead);
    position += lookahead;

    return new Token(type, value);
  }

  private int lookaheadLineComment(int i) {
    while (true) {
      int character = peekChar(i);
      if (isNewLine(character) || character == END_OF_INPUT) {
        return i;
      }
      ++i;
    }
  }

  private int lookaheadMultilineComment(String end, int i) {
    char endStart = end.charAt(0);
    String directiveName = null;

    if (peekChar(i) == '$') {
      StringBuilder directiveNameBuilder = new StringBuilder();
      int character = peekChar(i + 1);

      while ((character >= 'a' && character <= 'z')
          || (character >= 'A' && character <= 'Z')
          || Character.isDigit(character)
          || character == '_') {
        ++i;
        directiveNameBuilder.append((char) character);
        character = peekChar(i + 1);
      }

      directiveName = directiveNameBuilder.toString();
    }

    boolean nestedExpression =
        "if".equalsIgnoreCase(directiveName) || "elseif".equalsIgnoreCase(directiveName);

    while (true) {
      int character = peekChar(i);

      if (character == endStart) {
        int j;
        for (j = 1; j < end.length(); ++j) {
          if (peekChar(i + j) != end.charAt(j)) {
            break;
          }
        }
        if (j == end.length()) {
          return i + j;
        }
      }

      switch (character) {
        case '\'':
          if (nestedExpression) {
            i = lookaheadString(i) - 1;
          }
          break;

        case '/':
          if (nestedExpression && peekChar(i + 1) == '/') {
            i = lookaheadLineComment(i + 2);
          }
          break;

        case '{':
          if (nestedExpression) {
            i = lookaheadMultilineComment("}", i + 1);
          }
          break;

        case '(':
          if (nestedExpression && peekChar(i + 1) == '*') {
            i = lookaheadMultilineComment("*)", i + 2);
          }
          break;

        case END_OF_INPUT:
          return 0;

        default:
          // do nothing
      }

      ++i;
    }
  }

  private boolean isNewLine(int c) {
    return c == '\r' || c == '\n';
  }

  private static boolean isHexDigit(char character) {
    character = Character.toLowerCase(character);
    return Character.isDigit(character) || (character >= 'a' && character <= 'f');
  }

  private static boolean isBinaryDigit(char character) {
    return character == '0' || character == '1';
  }

  private final class NumberReader {
    TokenType type;
    StringBuilder value = new StringBuilder();
    private Predicate isDigitCharacter;
    private boolean canBeReal;

    private void init() {
      type = TokenType.INTEGER;
      value.setLength(0);
      switch (peekChar()) {
        case '$':
          isDigitCharacter = ExpressionLexer::isHexDigit;
          canBeReal = false;
          value.append(getChar());
          break;
        case '%':
          isDigitCharacter = ExpressionLexer::isBinaryDigit;
          canBeReal = false;
          value.append(getChar());
          break;
        default:
          isDigitCharacter = Character::isDigit;
          canBeReal = true;
      }
    }

    private boolean readCharacter(char character) {
      if (character == '.') {
        if (type == TokenType.REAL || !canBeReal) {
          throw new ExpressionLexerError("Unexpected '.' in numeric literal");
        }
        type = TokenType.REAL;
        value.append(getChar());
        return true;
      }

      if (canBeReal && Character.toLowerCase(character) == 'e') {
        type = TokenType.REAL;
        value.append(getChar());
        if (peekChar() == '+' || peekChar() == '-') {
          value.append(getChar());
        }
        if (!readDigitSequence(Character::isDigit)) {
          throw new ExpressionLexerError("Expected a digit sequence to follow E");
        }
        return false;
      }

      return readDigitSequence(isDigitCharacter);
    }

    private boolean readDigitSequence(Predicate isDigitCharacter) {
      boolean result = false;
      char character;
      while ((character = peekChar()) != END_OF_INPUT) {
        if (isDigitCharacter.test(character) || (value.length() > 0 && character == '_')) {
          value.append(getChar());
          result = true;
        } else {
          break;
        }
      }
      return result;
    }

    public Token read() {
      init();
      char character;
      while ((character = peekChar()) != END_OF_INPUT) {
        if (!readCharacter(character)) {
          break;
        }
      }
      return new Token(type, value.toString());
    }

    public boolean isNumberStart(char character) {
      return Character.isDigit(character) || character == '$' || character == '%';
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy