All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.org.apache.calcite.runtime.Like Maven / Gradle / Ivy

There is a newer version: 5.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hazelcast.org.apache.calcite.runtime;

import com.hazelcast.org.checkerframework.checker.nullness.qual.Nullable;

import java.util.Arrays;
import java.util.Locale;

/**
 * Utilities for converting SQL {@code LIKE} and {@code SIMILAR} operators
 * to regular expressions.
 */
public class Like {
  private static final String JAVA_REGEX_SPECIALS = "[]()|^-+*?{}$\\.";
  private static final String SQL_SIMILAR_SPECIALS = "[]()|^-+*_%?{}";
  private static final String [] REG_CHAR_CLASSES = {
      "[:ALPHA:]", "\\p{Alpha}",
      "[:alpha:]", "\\p{Alpha}",
      "[:UPPER:]", "\\p{Upper}",
      "[:upper:]", "\\p{Upper}",
      "[:LOWER:]", "\\p{Lower}",
      "[:lower:]", "\\p{Lower}",
      "[:DIGIT:]", "\\d",
      "[:digit:]", "\\d",
      "[:SPACE:]", " ",
      "[:space:]", " ",
      "[:WHITESPACE:]", "\\s",
      "[:whitespace:]", "\\s",
      "[:ALNUM:]", "\\p{Alnum}",
      "[:alnum:]", "\\p{Alnum}"
  };

  // It's important to have XDigit before Digit to match XDigit first
  // (i.e. see the posixRegexToPattern method)
  private static final String[] POSIX_CHARACTER_CLASSES = new String[] { "Lower", "Upper", "ASCII",
      "Alpha", "XDigit", "Digit", "Alnum", "Punct", "Graph", "Print", "Blank", "Cntrl", "Space" };

  private Like() {
  }

  /**
   * Translates a SQL LIKE pattern to Java regex pattern, with optional
   * escape string.
   */
  static String sqlToRegexLike(
      String sqlPattern,
      @Nullable CharSequence escapeStr) {
    final char escapeChar;
    if (escapeStr != null) {
      if (escapeStr.length() != 1) {
        throw invalidEscapeCharacter(escapeStr.toString());
      }
      escapeChar = escapeStr.charAt(0);
    } else {
      escapeChar = 0;
    }
    return sqlToRegexLike(sqlPattern, escapeChar);
  }

  /**
   * Translates a SQL LIKE pattern to Java regex pattern.
   */
  static String sqlToRegexLike(
      String sqlPattern,
      char escapeChar) {
    int i;
    final int len = sqlPattern.length();
    final StringBuilder javaPattern = new StringBuilder(len + len);
    for (i = 0; i < len; i++) {
      char c = sqlPattern.charAt(i);
      if (JAVA_REGEX_SPECIALS.indexOf(c) >= 0) {
        javaPattern.append('\\');
      }
      if (c == escapeChar) {
        if (i == (sqlPattern.length() - 1)) {
          throw invalidEscapeSequence(sqlPattern, i);
        }
        char nextChar = sqlPattern.charAt(i + 1);
        if ((nextChar == '_')
            || (nextChar == '%')
            || (nextChar == escapeChar)) {
          javaPattern.append(nextChar);
          i++;
        } else {
          throw invalidEscapeSequence(sqlPattern, i);
        }
      } else if (c == '_') {
        javaPattern.append('.');
      } else if (c == '%') {
        javaPattern.append("(?s:.*)");
      } else {
        javaPattern.append(c);
      }
    }
    return javaPattern.toString();
  }

  private static RuntimeException invalidEscapeCharacter(String s) {
    return new RuntimeException(
        "Invalid escape character '" + s + "'");
  }

  private static RuntimeException invalidEscapeSequence(String s, int i) {
    return new RuntimeException(
        "Invalid escape sequence '" + s + "', " + i);
  }

  private static void similarEscapeRuleChecking(
      String sqlPattern,
      char escapeChar) {
    if (escapeChar == 0) {
      return;
    }
    if (SQL_SIMILAR_SPECIALS.indexOf(escapeChar) >= 0) {
      // The the escape character is a special character
      // SQL 2003 Part 2 Section 8.6 General Rule 3.b
      for (int i = 0; i < sqlPattern.length(); i++) {
        if (sqlPattern.charAt(i) == escapeChar) {
          if (i == (sqlPattern.length() - 1)) {
            throw invalidEscapeSequence(sqlPattern, i);
          }
          char c = sqlPattern.charAt(i + 1);
          if ((SQL_SIMILAR_SPECIALS.indexOf(c) < 0)
              && (c != escapeChar)) {
            throw invalidEscapeSequence(sqlPattern, i);
          }
        }
      }
    }

    // SQL 2003 Part 2 Section 8.6 General Rule 3.c
    if (escapeChar == ':') {
      int position;
      position = sqlPattern.indexOf("[:");
      if (position >= 0) {
        position = sqlPattern.indexOf(":]");
      }
      if (position < 0) {
        throw invalidEscapeSequence(sqlPattern, position);
      }
    }
  }

  private static RuntimeException invalidRegularExpression(
      String pattern, int i) {
    return new RuntimeException(
        "Invalid regular expression '" + pattern + "', index " + i);
  }

  private static int sqlSimilarRewriteCharEnumeration(
      String sqlPattern,
      StringBuilder javaPattern,
      int pos,
      char escapeChar) {
    int i;
    for (i = pos + 1; i < sqlPattern.length(); i++) {
      char c = sqlPattern.charAt(i);
      if (c == ']') {
        return i - 1;
      } else if (c == escapeChar) {
        i++;
        char nextChar = sqlPattern.charAt(i);
        if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) {
          if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) {
            javaPattern.append('\\');
          }
          javaPattern.append(nextChar);
        } else if (escapeChar == nextChar) {
          javaPattern.append(nextChar);
        } else {
          throw invalidRegularExpression(sqlPattern, i);
        }
      } else if (c == '-') {
        javaPattern.append('-');
      } else if (c == '^') {
        javaPattern.append('^');
      } else if (sqlPattern.startsWith("[:", i)) {
        int numOfRegCharSets = REG_CHAR_CLASSES.length / 2;
        boolean found = false;
        for (int j = 0; j < numOfRegCharSets; j++) {
          if (sqlPattern.startsWith(REG_CHAR_CLASSES[j + j], i)) {
            javaPattern.append(REG_CHAR_CLASSES[j + j + 1]);

            i += REG_CHAR_CLASSES[j + j].length() - 1;
            found = true;
            break;
          }
        }
        if (!found) {
          throw invalidRegularExpression(sqlPattern, i);
        }
      } else if (SQL_SIMILAR_SPECIALS.indexOf(c) >= 0) {
        throw invalidRegularExpression(sqlPattern, i);
      } else {
        javaPattern.append(c);
      }
    }
    return i - 1;
  }

  /**
   * Translates a SQL SIMILAR pattern to Java regex pattern, with optional
   * escape string.
   */
  static String sqlToRegexSimilar(
      String sqlPattern,
      @Nullable CharSequence escapeStr) {
    final char escapeChar;
    if (escapeStr != null) {
      if (escapeStr.length() != 1) {
        throw invalidEscapeCharacter(escapeStr.toString());
      }
      escapeChar = escapeStr.charAt(0);
    } else {
      escapeChar = 0;
    }
    return sqlToRegexSimilar(sqlPattern, escapeChar);
  }

  /**
   * Translates SQL SIMILAR pattern to Java regex pattern.
   */
  static String sqlToRegexSimilar(
      String sqlPattern,
      char escapeChar) {
    similarEscapeRuleChecking(sqlPattern, escapeChar);

    boolean insideCharacterEnumeration = false;
    final StringBuilder javaPattern =
        new StringBuilder(sqlPattern.length() * 2);
    final int len = sqlPattern.length();
    for (int i = 0; i < len; i++) {
      char c = sqlPattern.charAt(i);
      if (c == escapeChar) {
        if (i == (len - 1)) {
          // It should never reach here after the escape rule
          // checking.
          throw invalidEscapeSequence(sqlPattern, i);
        }
        char nextChar = sqlPattern.charAt(i + 1);
        if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) {
          // special character, use \ to replace the escape char.
          if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) {
            javaPattern.append('\\');
          }
          javaPattern.append(nextChar);
        } else if (nextChar == escapeChar) {
          javaPattern.append(nextChar);
        } else {
          // It should never reach here after the escape rule
          // checking.
          throw invalidEscapeSequence(sqlPattern, i);
        }
        i++; // we already process the next char.
      } else {
        switch (c) {
        case '_':
          javaPattern.append('.');
          break;
        case '%':
          javaPattern.append("(?s:.*)");
          break;
        case '[':
          javaPattern.append('[');
          insideCharacterEnumeration = true;
          i = sqlSimilarRewriteCharEnumeration(
              sqlPattern,
              javaPattern,
              i,
              escapeChar);
          break;
        case ']':
          if (!insideCharacterEnumeration) {
            throw invalidRegularExpression(sqlPattern, i);
          }
          insideCharacterEnumeration = false;
          javaPattern.append(']');
          break;
        case '\\':
          javaPattern.append("\\\\");
          break;
        case '$':

          // $ is special character in java regex, but regular in
          // SQL regex.
          javaPattern.append("\\$");
          break;
        default:
          javaPattern.append(c);
        }
      }
    }
    if (insideCharacterEnumeration) {
      throw invalidRegularExpression(sqlPattern, len);
    }

    return javaPattern.toString();
  }

  static java.util.regex.Pattern posixRegexToPattern(String regex, boolean caseSensitive) {
    // Replace existing character classes with java equivalent ones
    String originalRegex = regex;
    String[] existingExpressions = Arrays.stream(POSIX_CHARACTER_CLASSES)
        .filter(v -> originalRegex.contains(v.toLowerCase(Locale.ROOT))).toArray(String[]::new);
    for (String v : existingExpressions) {
      regex = regex.replace(v.toLowerCase(Locale.ROOT), "\\p{" + v + "}");
    }

    int flags = caseSensitive ? 0 : java.util.regex.Pattern.CASE_INSENSITIVE;
    return java.util.regex.Pattern.compile(regex, flags);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy