All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.spf4j.base.CharSequences Maven / Gradle / Ivy

/*
 * Copyright (c) 2001-2017, Zoltan Farkas All Rights Reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * Additionally licensed with:
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.spf4j.base;

import com.google.common.annotations.GwtCompatible;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.io.CharSource;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import static java.lang.Math.min;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

/**
 * Special methods to use for character sequences...
 *
 * @author zoly
 */
@GwtCompatible
public final class CharSequences {

  private CharSequences() {
  }

  /**
   * function that calculates the number of operations that are needed to transform s1 into s2. operations are: char
   * add, char delete, char modify See https://en.wikipedia.org/wiki/Levenshtein_distance for more info.
   *
   * @param s1
   * @param s2
   * @return the number of operations required to transfor s1 into s2
   */
  public static int distance(@Nonnull final CharSequence s1, @Nonnull final CharSequence s2) {
    int l1 = s1.length();
    int l2 = s2.length();

    int[] prev = new int[l2];
    char c1 = s1.charAt(0);
    prev[0] = distance(c1, s2.charAt(0));
    for (int j = 1; j < l2; j++) {
      prev[j] = prev[j - 1] + distance(c1, s2.charAt(j));
    }

    for (int i = 1; i < l1; i++) {
      int[] dist = new int[l2];
      c1 = s1.charAt(i);
      dist[0] = prev[i - 1] + distance(c1, s2.charAt(0));
      for (int j = 1; j < l2; j++) {
        dist[j] = min(prev[j - 1] + distance(c1, s2.charAt(j)),
                min(prev[j] + 1, dist[j - 1] + 1));
      }
      prev = dist;
    }
    return prev[l2 - 1];
  }

  public static int distance(final char c1, final char c2) {
    return (c1 == c2) ? 0 : 1;
  }

  /**
   * compare s to t.
   *
   * @param s
   * @param t
   * @return
   * @deprecated use compare.
   */
  @Deprecated
  public static int compareTo(@Nonnull final CharSequence s, @Nonnull final CharSequence t) {
    return compare(s, t);
  }

  public static int compare(@Nonnull final CharSequence s, @Nonnull final CharSequence t) {
    return compare(s, 0, s.length(), t, 0, t.length());
  }

  public static int compare(@Nonnull final CharSequence s, final int sLength,
          @Nonnull final CharSequence t, final int tLength) {
    return compare(s, 0, sLength, t, 0, tLength);
  }

  /**
   * compare 2 CharSequence fragments.
   *
   * @param s the charsequence to compare
   * @param sFrom the index for the first chars to compare.
   * @param sLength the number of characters to compare.
   * @param t the charsequence to compare to
   * @param tFrom the index for the first character to compare to.
   * @param tLength the number of characters to compare to.
   * @return
   */
  public static int compare(@Nonnull final CharSequence s, final int sFrom, final int sLength,
          @Nonnull final CharSequence t, final int tFrom, final int tLength) {

    int lim = min(sLength, tLength);
    int i = sFrom;
    int j = tFrom;
    int sTo = sFrom + lim;
    while (i < sTo) {
      char c1 = s.charAt(i);
      char c2 = t.charAt(j);
      if (c1 != c2) {
        return c1 - c2;
      }
      i++;
      j++;
    }
    return sLength - tLength;
  }

  public static boolean equalsNullables(@Nullable final CharSequence s, @Nullable final CharSequence t) {
    if (s == null) {
      return null == t;
    } else if (t == null) {
      return true;
    } else {
      return equals(s, t);
    }
  }

  public static boolean equals(@Nonnull final CharSequence s, @Nonnull final CharSequence t) {
    final int sl = s.length();
    final int tl = t.length();
    if (sl != tl) {
      return false;
    } else {
      for (int i = 0; i < sl; i++) {
        if (s.charAt(i) != t.charAt(i)) {
          return false;
        }
      }
      return true;
    }
  }

  public static int hashcode(@Nonnull final CharSequence cs) {
    if (cs instanceof String) {
      return ((String) cs).hashCode();
    }
    int h = 0;
    int len = cs.length();
    if (len > 0) {
      int off = 0;
      for (int i = 0; i < len; i++) {
        h = 31 * h + cs.charAt(off++);
      }
    }
    return h;
  }

  public static CharSequence subSequence(@Nonnull final CharSequence seq, final int startIdx, final int endIdx) {
    if (startIdx == 0 && endIdx == seq.length()) {
      return seq;
    } else if (startIdx >= endIdx) {
      return "";
    } else {
      return new SubSequence(seq, endIdx - startIdx, startIdx);
    }
  }

  private static final class SubSequence implements CharSequence {

    private final CharSequence underlyingSequence;
    private final int length;
    private final int startIdx;

    SubSequence(final CharSequence underlyingSequence, final int length, final int startIdx) {
      this.underlyingSequence = underlyingSequence;
      this.length = length;
      this.startIdx = startIdx;
    }

    @Override
    public int length() {
      return length;
    }

    @Override
    public char charAt(final int index) {
      return underlyingSequence.charAt(startIdx + index);
    }

    @Override
    public CharSequence subSequence(final int start, final int end) {
      return CharSequences.subSequence(underlyingSequence, startIdx + start, startIdx + end);
    }

    @Override
    @SuppressFBWarnings("STT_STRING_PARSING_A_FIELD")
    public String toString() {
      if (underlyingSequence instanceof String) {
        return ((String) underlyingSequence).substring(startIdx, startIdx + length);
      } else if (underlyingSequence instanceof StringBuilder) {
        return ((StringBuilder) underlyingSequence).substring(startIdx, startIdx + length);
      } else {
        char[] chars = new char[length];
        int idx = startIdx;
        for (int i = 0; i < length; i++, idx++) {
          chars[i] = underlyingSequence.charAt(idx);
        }
        return new String(chars);
      }
    }

  }

  public static boolean endsWith(final CharSequence qc, final CharSequence with) {
    int l = qc.length();
    int start = l - with.length();
    if (start >= 0) {
      for (int i = start, j = 0; i < l; i++, j++) {
        if (qc.charAt(i) != with.charAt(j)) {
          return false;
        }
      }
      return true;
    } else {
      return false;
    }
  }

  public static Appendable lineNumbered(final int startLineNr, final Appendable appendable)
          throws IOException {
    return lineNumbered(startLineNr, appendable, IntAppender.CommentNumberAppender.INSTANCE);
  }

  public static Appendable lineNumbered(final int startLineNr, final Appendable appendable, final IntAppender ia)
          throws IOException {
    ia.append(startLineNr, appendable);
    return new Appendable() {
      private int lineNr = startLineNr + 1;

      @Override
      public Appendable append(final CharSequence csq) throws IOException {
        return append(csq, 0, csq.length());
      }

      @Override
      public Appendable append(final CharSequence csq, final int start, final int end) throws IOException {
        int lastIdx = start;
        for (int i = start; i < end; i++) {
          if (csq.charAt(i) == '\n') {
            int next = i + 1;
            appendable.append(csq, lastIdx, next);
            ia.append(lineNr++, appendable);
            lastIdx = next;
          }
        }
        if (lastIdx < end) {
          appendable.append(csq, lastIdx, end);
        }
        return this;
      }

      @Override
      public Appendable append(final char c) throws IOException {
        appendable.append(c);
        if (c == '\n') {
          ia.append(lineNr++, appendable);
        }
        return this;
      }
    };
  }

  public static CharSequence toLineNumbered(final int startLineNr, final CharSequence source) {
    return toLineNumbered(startLineNr, source, IntAppender.CommentNumberAppender.INSTANCE);
  }

  public static CharSequence toLineNumbered(final int startLineNr, final CharSequence source, final IntAppender ia) {
    int length = source.length();
    StringBuilder destination = new StringBuilder(length + 6 * length / 80);
    try {
      lineNumbered(startLineNr, destination, ia).append(source);
    } catch (IOException ex) {
      throw new UncheckedIOException(ex);
    }
    return destination;
  }

  /**
   * A more flexible version of Integer.parseInt.
   *
   * @see java.lang.Integer.parseInt
   */
  public static int parseInt(@Nonnull final CharSequence s) {
    return parseInt(s, 10);
  }

  /**
   * A more flexible version of Integer.parseInt.
   *
   * @see java.lang.Integer.parseInt
   */
  public static int parseInt(@Nonnull final CharSequence cs, final int radix) {

    if (radix < Character.MIN_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " less than Character.MIN_RADIX");
    }

    if (radix > Character.MAX_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " greater than Character.MAX_RADIX");
    }

    int result = 0;
    boolean negative = false;
    int len = cs.length();

    if (len > 0) {
      int i = 0;
      int limit = -Integer.MAX_VALUE;
      int multmin;
      int digit;
      char firstChar = cs.charAt(0);
      if (firstChar < '0') { // Possible leading "+" or "-"
        if (firstChar == '-') {
          negative = true;
          limit = Integer.MIN_VALUE;
        } else if (firstChar != '+') {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }

        if (len == 1) { // Cannot have lone "+" or "-"
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        i++;
      }
      multmin = limit / radix;
      while (i < len) {
        // Accumulating negatively avoids surprises near MAX_VALUE
        digit = Character.digit(cs.charAt(i++), radix);
        if (digit < 0) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        if (result < multmin) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        result *= radix;
        if (result < limit + digit) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        result -= digit;
      }
    } else {
      throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
    }
    return negative ? result : -result;
  }


  /**
   * will parse a unsigned integer from a char sequence from idxFrom.
   * @param cs
   * @param radix
   * @param idxFrom
   * @return
   */
  @SuppressWarnings("checkstyle:InnerAssignment")
  public static int parseUnsignedInt(@Nonnull final CharSequence cs, final int radix, final int idxFrom) {
    return parseUnsignedInt(cs, radix, idxFrom, cs.length());
  }


  /**
   * will parse a unsigned integer from a char sequence from idxFrom.
   * @param cs
   * @param radix
   * @param idxFrom
   * @param idxTo
   * @return
   */
  @SuppressWarnings("checkstyle:InnerAssignment")
  public static int parseUnsignedInt(@Nonnull final CharSequence cs, final int radix,
          final int idxFrom, final int idxTo) {
    if (radix < Character.MIN_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " less than Character.MIN_RADIX");
    }
    if (radix > Character.MAX_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " greater than Character.MAX_RADIX");
    }

    int result = 0;
    int i = idxFrom;
    int limit = -Integer.MAX_VALUE;
    int multmin = limit / radix;
    int length = idxTo;
    int digit;
    while (i < length && (digit = Character.digit(cs.charAt(i), radix)) >= 0) {
      if (result < multmin) {
        throw new NumberFormatException("For input char sequence: \"" + cs + "\" at " + i);
      }
      result *= radix;
      if (result < limit + digit) {
        throw new NumberFormatException("For input char sequence: \"" + cs + "\" at " + i);
      }
      result -= digit;
      i++;
    }
    if (i == idxFrom) {
      throw new NumberFormatException("No numnber in \"" + cs + "\" at " + idxFrom);
    }
    return -result;
  }


 @SuppressWarnings("checkstyle:InnerAssignment")
  public static long parseUnsignedLong(@Nonnull final CharSequence cs, final int radix, final int idxFrom) {
    if (radix < Character.MIN_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " less than Character.MIN_RADIX");
    }
    if (radix > Character.MAX_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " greater than Character.MAX_RADIX");
    }

    long result = 0;
    int i = idxFrom;
    long limit = -Long.MAX_VALUE;
    long multmin = limit / radix;
    int digit;
    int length = cs.length();
    while (i < length && (digit = Character.digit(cs.charAt(i), radix)) >= 0) {
      if (result < multmin) {
        throw new NumberFormatException("For input char sequence: \"" + cs + "\" at " + i);
      }
      result *= radix;
      if (result < limit + digit) {
        throw new NumberFormatException("For input char sequence: \"" + cs + "\" at " + i);
      }
      result -= digit;
      i++;
    }
    if (i == idxFrom) {
      throw new NumberFormatException("No numnber in " + cs + " at " + idxFrom);
    }
    return -result;
  }

  /**
   * A more flexible version of Long.parseLong.
   *
   * @see java.lang.Long.parseLong
   */
  public static long parseLong(@Nonnull final CharSequence cs) {
    return parseLong(cs, 10);
  }

  /**
   * A more flexible version of Long.parseLong.
   *
   * @see java.lang.Long.parseLong
   */
  public static long parseLong(@Nonnull final CharSequence cs, final int radix) {

    if (radix < Character.MIN_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " less than Character.MIN_RADIX");
    }
    if (radix > Character.MAX_RADIX) {
      throw new NumberFormatException("radix " + radix
              + " greater than Character.MAX_RADIX");
    }

    long result = 0;
    boolean negative = false;
    int len = cs.length();

    if (len > 0) {
      int i = 0;
      long limit = -Long.MAX_VALUE;
      long multmin;
      int digit;
      char firstChar = cs.charAt(0);
      if (firstChar < '0') { // Possible leading "+" or "-"
        if (firstChar == '-') {
          negative = true;
          limit = Long.MIN_VALUE;
        } else if (firstChar != '+') {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }

        if (len == 1) { // Cannot have lone "+" or "-"
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        i++;
      }
      multmin = limit / radix;
      while (i < len) {
        // Accumulating negatively avoids surprises near MAX_VALUE
        digit = Character.digit(cs.charAt(i++), radix);
        if (digit < 0) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        if (result < multmin) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        result *= radix;
        if (result < limit + digit) {
          throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
        }
        result -= digit;
      }
    } else {
      throw new NumberFormatException("For input char sequence: \"" + cs + '\"');
    }
    return negative ? result : -result;
  }

  public static boolean containsAnyChar(final CharSequence string, final char... chars) {
    for (int i = 0; i < string.length(); i++) {
      char c = string.charAt(i);
      if (Arrays.search(chars, c) >= 0) {
        return true;
      }
    }
    return false;
  }

  public static boolean isValidFileName(@Nonnull final CharSequence fileName) {
    return !containsAnyChar(fileName, '/', '\\');
  }

  public static  T validatedFileName(@Nonnull final T fileName) {
    if (!isValidFileName(fileName)) {
      throw new IllegalArgumentException("Invalid file name: " + fileName);
    }
    return fileName;
  }

  /**
   * Equivalent to String.regionMatches.
   */
  public static boolean regionMatches(final CharSequence t, final int toffset,
          final CharSequence other, final int ooffset, final int plen) {
    int to = toffset;
    int po = ooffset;
    // Note: toffset, ooffset, or len might be near -1>>>1.
    if ((ooffset < 0) || (toffset < 0) || (toffset > (long) t.length() - plen)
            || (ooffset > (long) other.length() - plen)) {
      return false;
    }
    int len = plen;
    while (len-- > 0) {
      if (t.charAt(to++) != other.charAt(po++)) {
        return false;
      }
    }
    return true;
  }

  /**
   * Equivalent/based on to String.regionMatches.
   */
  public static boolean regionMatchesIgnoreCase(final CharSequence ta, final int toffset,
          final CharSequence pa, final int ooffset, final int plen) {
    int to = toffset;
    int po = ooffset;
    // Note: toffset, ooffset, or len might be near -1>>>1.
    if ((ooffset < 0) || (toffset < 0)
            || (toffset > (long) ta.length() - plen)
            || (ooffset > (long) pa.length() - plen)) {
      return false;
    }
    int len = plen;
    while (len-- > 0) {
      char c1 = ta.charAt(to++);
      char c2 = pa.charAt(po++);
      if (c1 == c2) {
        continue;
      }
      // If characters don't match but case may be ignored,
      // try converting both characters to uppercase.
      // If the results match, then the comparison scan should
      // continue.
      char u1 = Character.toUpperCase(c1);
      char u2 = Character.toUpperCase(c2);
      if (u1 == u2) {
        continue;
      }
      // Unfortunately, conversion to uppercase does not work properly
      // for the Georgian alphabet, which has strange rules about case
      // conversion.  So we need to make one last check before
      // exiting.
      if (Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
        return false;
      }
    }
    return true;
  }

  /**
   * regular wildcard matcher. * matches any number of consecutive characters. ? matches any single character.
   *
   * @param wildcard
   * @param cs2Match
   * @return
   */
  public static boolean match(final CharSequence wildcard, final CharSequence cs2Match) {
    int i = 0;
    int j = 0;
    final int length = wildcard.length();
    for (; i < length; i++, j++) {
      final char some2 = wildcard.charAt(i);
      if (some2 != cs2Match.charAt(j)) {
        if (some2 == '*') {
          i++;
          if (i == length) {
            return true;
          }
          final char some = wildcard.charAt(i);
          while (some != cs2Match.charAt(j)) {
            ++j;
          }
          j--;
        } else if (some2 != '?') {
          return false;
        }
      }
    }
    return j == cs2Match.length();
  }

  /**
   * Transform a wildcard expression 2 a java regular expression. * matches any number of consecutive characters. ?
   * matches any single character.
   *
   * @param wildcard
   * @return
   */
  public CharSequence getJavaRegexpStr(final CharSequence wildcard) {
    final int length = wildcard.length();
    final StringBuilder buff = new StringBuilder(length + 4);
    for (int i = 0; i < length; i++) {
      final char c = wildcard.charAt(i);
      switch (c) {
        case '*':
          buff.append(".*");
          break;
        case '?':
          buff.append('.');
          break;
        case '[':
        case ']':
        case '(':
        case ')':
        case '{':
        case '}':
        case '.':
          buff.append('\\').append(c);
          break;
        default:
          buff.append(c);
      }
    }
    return buff;
  }

  public static int indexOf(final CharSequence cs, final int from, final int to, final char c) {
    for (int i = from; i < to; i++) {
      if (c == cs.charAt(i)) {
        return i;
      }
    }
    return -1;
  }

  public static int indexOf(final CharSequence cs, final int from, final int to, final char... chars) {
    for (int i = from; i < to; i++) {
      char charAt = cs.charAt(i);
      for (char c : chars) {
        if (c == charAt) {
          return i;
        }
      }
    }
    return -1;
  }

  public static boolean containsIgnoreCase(final CharSequence str, final CharSequence searchStr) {
    return lastIndexOfIgnoreCase(str, searchStr) >= 0;
  }

  public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr) {
    final int length = searchStr.length();
    if (length == 0) {
      return 0;
    }
    for (int i = str.length() - length; i >= 0; i--) {
      if (regionMatchesIgnoreCase(str, i, searchStr, 0, length)) {
        return i;
      }
    }
    return -1;
  }

  public static int indexOfIgnoreCase(final CharSequence str, final CharSequence searchStr, final int idxStart) {
    final int sLen = searchStr.length();
    if (sLen == 0) {
      return 0;
    }
    for (int i = idxStart, l = str.length() - sLen; i <= l; i++) {
      if (regionMatchesIgnoreCase(str, i, searchStr, 0, sLen)) {
        return i;
      }
    }
    return -1;
  }


  public static int countIgnoreCase(final CharSequence str, final CharSequence searchStr) {
    int result = 0;
    int sLen = searchStr.length();
    if (sLen == 0) {
      return 0;
    }
    int from = 0;
    int idx;
    while ((idx = indexOfIgnoreCase(str, searchStr, from)) >= 0) {
      result++;
      from = idx + sLen;
    }
    return result;
  }


  @GwtIncompatible
  public static Reader reader(final CharSequence cs) {
    try {
      return CharSource.wrap(cs).openStream();
    } catch (IOException ex) {
      throw new UncheckedIOException(ex);
    }
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy