All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lokalized.ExpressionTokenizer Maven / Gradle / Ivy

/*
 * Copyright 2017 Product Mog LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.lokalized;


import javax.annotation.Nonnull;
import javax.annotation.concurrent.ThreadSafe;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static java.lang.String.format;
import static java.util.Arrays.stream;
import static java.util.Objects.requireNonNull;

/**
 * Breaks an expression into its {@link Token} components.
 *
 * @author Mark Allen
 */
@ThreadSafe
class ExpressionTokenizer {
  @Nonnull
  private static final Map PATTERNS_BY_TOKEN_TYPE;
  @Nonnull
  private static final Map GROUP_NAMES_BY_TOKEN_TYPE;
  @Nonnull
  private static final Pattern TOKEN_PATTERN;
  @Nonnull
  private static final String WHITESPACE_GROUP_NAME = "WHITESPACE";
  @Nonnull
  private static final String WHITESPACE_GROUP_PATTERN = "\\p{Space}";
  @Nonnull
  private static final String ERROR_GROUP_NAME = "ERROR";
  @Nonnull
  private static final String ERROR_GROUP_PATTERN = ".+";

  static {
    // Performs double-duty: keyset maintains insertion order so the regexes are always attempted in correct order.
    // Must escape \.[]{}()*+-?^$|
    PATTERNS_BY_TOKEN_TYPE = Collections.unmodifiableMap(new LinkedHashMap() {{
      put(TokenType.GROUP_START, "\\(");
      put(TokenType.GROUP_END, "\\)");
      put(TokenType.AND, "&&");
      put(TokenType.OR, "\\|\\|");
      put(TokenType.LESS_THAN_OR_EQUAL_TO, "<=");
      put(TokenType.GREATER_THAN_OR_EQUAL_TO, ">=");
      put(TokenType.LESS_THAN, "<");
      put(TokenType.GREATER_THAN, ">");
      put(TokenType.EQUAL_TO, "==");
      put(TokenType.NOT_EQUAL_TO, "!=");
      put(TokenType.CARDINALITY_ZERO, "CARDINALITY_ZERO");
      put(TokenType.CARDINALITY_ONE, "CARDINALITY_ONE");
      put(TokenType.CARDINALITY_TWO, "CARDINALITY_TWO");
      put(TokenType.CARDINALITY_FEW, "CARDINALITY_FEW");
      put(TokenType.CARDINALITY_MANY, "CARDINALITY_MANY");
      put(TokenType.CARDINALITY_OTHER, "CARDINALITY_OTHER");
      put(TokenType.ORDINALITY_ZERO, "ORDINALITY_ZERO");
      put(TokenType.ORDINALITY_ONE, "ORDINALITY_ONE");
      put(TokenType.ORDINALITY_TWO, "ORDINALITY_TWO");
      put(TokenType.ORDINALITY_FEW, "ORDINALITY_FEW");
      put(TokenType.ORDINALITY_MANY, "ORDINALITY_MANY");
      put(TokenType.ORDINALITY_OTHER, "ORDINALITY_OTHER");
      put(TokenType.MASCULINE, "MASCULINE");
      put(TokenType.FEMININE, "FEMININE");
      put(TokenType.NEUTER, "NEUTER");
      put(TokenType.NUMBER, "-?(0|([1-9]\\d*))(\\.\\d+)?");
      put(TokenType.VARIABLE, "\\p{Alnum}+");
    }});

    // Underscore is illegal in regex group names.
    GROUP_NAMES_BY_TOKEN_TYPE = Collections.unmodifiableMap(stream(TokenType.values())
        .collect(Collectors.toMap(tokenType -> tokenType, (TokenType tokenType) -> tokenType.name().replace("_", ""))));

    StringBuilder tokenPatterns = new StringBuilder();

    tokenPatterns.append(format("(?<%s>%s)", WHITESPACE_GROUP_NAME, WHITESPACE_GROUP_PATTERN));

    for (TokenType tokenType : PATTERNS_BY_TOKEN_TYPE.keySet())
      tokenPatterns.append(format("|(?<%s>%s)", GROUP_NAMES_BY_TOKEN_TYPE.get(tokenType),
          PATTERNS_BY_TOKEN_TYPE.get(tokenType)));

    tokenPatterns.append(format("|(?<%s>%s)", ERROR_GROUP_NAME, ERROR_GROUP_PATTERN));

    // Compile and cache pattern for performance
    TOKEN_PATTERN = Pattern.compile(tokenPatterns.toString());
  }

  /**
   * Given an {@code expression}, scan it into a set of {@link Token} components.
   *
   * @param expression the expression to tokenize
   * @return the tokens that comprise the expression
   * @throws ExpressionEvaluationException if an error occurs while extracting tokens
   */
  public List extractTokens(@Nonnull String expression) {
    requireNonNull(expression);

    List tokens = new ArrayList<>();
    Matcher matcher = TOKEN_PATTERN.matcher(expression);

    while (matcher.find()) {
      for (TokenType tokenType : TokenType.values()) {
        String group = matcher.group(GROUP_NAMES_BY_TOKEN_TYPE.get(tokenType));

        if (group != null)
          tokens.add(new Token(tokenType, group));
      }

      if (matcher.group(WHITESPACE_GROUP_NAME) != null)
        continue;

      if (matcher.group(ERROR_GROUP_NAME) != null) {
        String errorGroup = matcher.group(ERROR_GROUP_NAME);

        String errorMessage =
            format("Unexpected content '%s' encountered while evaluating expression '%s'.", errorGroup, expression);

        // Special message for common error of using "=" instead of "==" for equality checks
        if (errorGroup.startsWith("= "))
          errorMessage = format("%s Did you mean '=%s'?", errorMessage, errorGroup);

        throw new ExpressionEvaluationException(errorMessage);
      }
    }

    return tokens;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy