All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.re2j.Utils Maven / Gradle / Ivy

There is a newer version: 1.7
Show newest version
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package com.google.re2j;

/**
 * Various constants and helper utilities.
 */
abstract class Utils {

  static final int[] EMPTY_INTS = {};

  // Returns true iff |c| is an ASCII letter or decimal digit.
  static boolean isalnum(int c) {
    return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z';
  }

  // If |c| is an ASCII hex digit, returns its value, otherwise -1.
  static int unhex(int c) {
    if ('0' <= c && c <= '9') {
      return c - '0';
    }
    if ('a' <= c && c <= 'f') {
      return c - 'a' + 10;
    }
    if ('A' <= c && c <= 'F') {
      return c - 'A' + 10;
    }
    return -1;
  }

  private static final String METACHARACTERS = "\\.+*?()|[]{}^$";

  // Appends a RE2 literal to |out| for rune |rune|,
  // with regexp metacharacters escaped.
  static void escapeRune(StringBuilder out, int rune) {
    if (Unicode.isPrint(rune)) {
      if (METACHARACTERS.indexOf((char) rune) >= 0) {
        out.append('\\');
      }
      out.appendCodePoint(rune);
      return;
    }

    switch (rune) {
      case '"':  out.append("\\\""); break;
      case '\\': out.append("\\\\"); break;
      case '\t': out.append("\\t");  break;
      case '\n': out.append("\\n");  break;
      case '\r': out.append("\\r");  break;
      case '\b': out.append("\\b");  break;
      case '\f': out.append("\\f");  break;
      default: {
        String s = Integer.toHexString(rune);
        if (rune < 0x100) {
          out.append("\\x");
          if (s.length() == 1) {
            out.append('0');
          }
          out.append(s);
        } else {
          out.append("\\x{").append(s).append('}');
        }
        break;
      }
    }
  }

  // Returns the array of runes in the specified Java UTF-16 string.
  static int[] stringToRunes(String str) {
    int charlen = str.length();
    int runelen = str.codePointCount(0, charlen);
    int[] runes = new int[runelen];
    int r = 0, c = 0;
    while (c < charlen) {
      int rune = str.codePointAt(c);
      runes[r++] = rune;
      c += Character.charCount(rune);
    }
    return runes;
  }

  // Returns the Java UTF-16 string containing the single rune |r|.
  static String runeToString(int r) {
    char c = (char) r;
    return r == c
        ? String.valueOf(c)
        : new String(Character.toChars(c));
  }

  // Returns a new copy of the specified subarray.
  static int[] subarray(int[] array, int start, int end) {
    int[] r = new int[end - start];
    for (int i = start; i < end; ++i) {
      r[i - start] = array[i];
    }
    return r;
  }

  // Returns a new copy of the specified subarray.
  static byte[] subarray(byte[] array, int start, int end) {
    byte[] r = new byte[end - start];
    for (int i = start; i < end; ++i) {
      r[i - start] = array[i];
    }
    return r;
  }

  // Returns the index of the first occurrence of array |target| within
  // array |source| after |fromIndex|, or -1 if not found.
  static int indexOf(byte[] source, byte[] target, int fromIndex) {
    if (fromIndex >= source.length) {
      return target.length == 0 ? source.length : -1;
    }
    if (fromIndex < 0) {
      fromIndex = 0;
    }
    if (target.length == 0) {
      return fromIndex;
    }

    byte first = target[0];
    for (int i = fromIndex, max = source.length - target.length; i <= max;
         i++) {
      // Look for first byte.
      if (source[i] != first) {
        while (++i <= max && source[i] != first) {}
      }

      // Found first byte, now look at the rest of v2.
      if (i <= max) {
        int j = i + 1;
        int end = j + target.length - 1;
        for (int k = 1; j < end && source[j] == target[k]; j++, k++) {}

        if (j == end) {
          return i;  // found whole array
        }
      }
    }
    return -1;
  }

  // isWordRune reports whether r is consider a ``word character''
  // during the evaluation of the \b and \B zero-width assertions.
  // These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
  static boolean isWordRune(int r)  {
    return ('A' <= r && r <= 'Z' ||
            'a' <= r && r <= 'z' ||
            '0' <= r && r <= '9' ||
            r == '_');
  }

  //// EMPTY_* flags

  static final int EMPTY_BEGIN_LINE       = 0x01;
  static final int EMPTY_END_LINE         = 0x02;
  static final int EMPTY_BEGIN_TEXT       = 0x04;
  static final int EMPTY_END_TEXT         = 0x08;
  static final int EMPTY_WORD_BOUNDARY    = 0x10;
  static final int EMPTY_NO_WORD_BOUNDARY = 0x20;
  static final int EMPTY_ALL              = -1;  // (impossible)

  // emptyOpContext returns the zero-width assertions satisfied at the position
  // between the runes r1 and r2, a bitmask of EMPTY_* flags.
  // Passing r1 == -1 indicates that the position is at the beginning of the
  // text.
  // Passing r2 == -1 indicates that the position is at the end of the text.
  // TODO(adonovan): move to Machine.
  static int emptyOpContext(int r1, int r2) {
    int op = 0;
    if (r1 < 0) {
      op |= EMPTY_BEGIN_TEXT | EMPTY_BEGIN_LINE;
    }
    if (r1 == '\n') {
      op |= EMPTY_BEGIN_LINE;
    }
    if (r2 < 0) {
      op |= EMPTY_END_TEXT | EMPTY_END_LINE;
    }
    if (r2 == '\n') {
      op |= EMPTY_END_LINE;
    }
    if (isWordRune(r1) != isWordRune(r2)) {
      op |= EMPTY_WORD_BOUNDARY;
    } else {
      op |= EMPTY_NO_WORD_BOUNDARY;
    }
    return op;
  }

  private Utils() {}  // uninstantiable

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy