All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.lang.CharSequences Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
 ********************************************************************************
 * Copyright (C) 2010-2011, Google, International Business Machines Corporation *
 * and others. All Rights Reserved.                                                 *
 ********************************************************************************
 */
package com.ibm.icu.lang;


/**
 * A number of utilities for dealing with CharSequences and related classes.
 * For accessing codepoints with a CharSequence, also see 
 * 
    *
  • {@link java.lang.Character#codePointAt(CharSequence, int)}
  • *
  • {@link java.lang.Character#codePointBefore(CharSequence, int)}
  • *
  • {@link java.lang.Character#codePointCount(CharSequence, int, int)}
  • *
  • {@link java.lang.Character#charCount(int)}
  • *
  • {@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}
  • *
  • {@link java.lang.Character#toChars(int, char[], int)}
  • *
  • {@link java.lang.Character#toCodePoint(char, char)}
  • *
* @author markdavis * @internal * @deprecated This API is ICU internal only. */ public class CharSequences { // TODO // compareTo(a, b); // compareToIgnoreCase(a, b) // contentEquals(a, b) // contentEqualsIgnoreCase(a, b) // contains(a, b) => indexOf >= 0 // endsWith(a, b) // startsWith(a, b) // lastIndexOf(a, b, fromIndex) // indexOf(a, ch, fromIndex) // lastIndexOf(a, ch, fromIndex); // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set. // add UnicodeSet.split(CharSequence s); /** * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary. * @internal * @deprecated This API is ICU internal only. */ public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) { int i = aIndex, j = bIndex; int alen = a.length(); int blen = b.length(); for (; i < alen && j < blen; ++i, ++j) { char ca = a.charAt(i); char cb = b.charAt(j); if (ca != cb) { break; } } // if we failed a match make sure that we didn't match half a character int result = i - aIndex; if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) { --result; // backup } return result; } /** * Count the code point length. Unpaired surrogates count as 1. * @internal * @deprecated This API is ICU internal only. */ public int codePointLength(CharSequence s) { return Character.codePointCount(s, 0, s.length()); // int length = s.length(); // int result = length; // for (int i = 1; i < length; ++i) { // char ch = s.charAt(i); // if (0xDC00 <= ch && ch <= 0xDFFF) { // char ch0 = s.charAt(i-1); // if (0xD800 <= ch && ch <= 0xDbFF) { // --result; // } // } // } } /** * Utility function for comparing codepoint to string without generating new * string. * * @internal * @deprecated This API is ICU internal only. */ public static final boolean equals(int codepoint, CharSequence other) { if (other == null) { return false; } switch (other.length()) { case 1: return codepoint == other.charAt(0); case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0); default: return false; } } /** * @internal * @deprecated This API is ICU internal only. */ public static final boolean equals(CharSequence other, int codepoint) { return equals(codepoint, other); } /** * Utility to compare a string to a code point. * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) * and comparing, but much faster (no object creation). * Actually, there is one difference; a null compares as less. * Note that this (=String) order is UTF-16 order -- *not* code point order. * * @internal * @deprecated This API is ICU internal only. */ public static int compare(CharSequence string, int codePoint) { if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) { throw new IllegalArgumentException(); } int stringLength = string.length(); if (stringLength == 0) { return -1; } char firstChar = string.charAt(0); int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; if (offset < 0) { // BMP codePoint int result = firstChar - codePoint; if (result != 0) { return result; } return stringLength - 1; } // non BMP char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE); int result = firstChar - lead; if (result != 0) { return result; } if (stringLength > 1) { char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); result = string.charAt(1) - trail; if (result != 0) { return result; } } return stringLength - 2; } /** * Utility to compare a string to a code point. * Same results as turning the code point into a string and comparing, but much faster (no object creation). * Actually, there is one difference; a null compares as less. * Note that this (=String) order is UTF-16 order -- *not* code point order. * * @internal * @deprecated This API is ICU internal only. */ public static int compare(int codepoint, CharSequence a) { return -compare(a, codepoint); } /** * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE. * * @internal * @deprecated This API is ICU internal only. */ public static int getSingleCodePoint(CharSequence s) { int length = s.length(); if (length < 1 || length > 2) { return Integer.MAX_VALUE; } int result = Character.codePointAt(s, 0); return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE; } /** * Utility function for comparing objects that may be null * string. * * @internal * @deprecated This API is ICU internal only. */ public static final boolean equals(T a, T b) { return a == null ? b == null : b == null ? false : a.equals(b); } /** * Utility for comparing the contents of CharSequences * * @internal * @deprecated This API is ICU internal only. */ public static int compare(CharSequence a, CharSequence b) { int alength = a.length(); int blength = b.length(); int min = alength <= blength ? alength : blength; for (int i = 0; i < min; ++i) { int diff = a.charAt(i) - b.charAt(i); if (diff != 0) { return diff; } } return alength - blength; } /** * Utility for comparing the contents of CharSequences * * @internal * @deprecated This API is ICU internal only. */ public static boolean equalsChars(CharSequence a, CharSequence b) { // do length test first for fast path return a.length() == b.length() && compare(a,b) == 0; } /** * Are we on a character boundary? * * @internal * @deprecated This API is ICU internal only. */ public static boolean onCharacterBoundary(CharSequence s, int i) { return i <= 0 || i >= s.length() || !Character.isHighSurrogate(s.charAt(i-1)) || !Character.isLowSurrogate(s.charAt(i)); } /** * Find code point in string. * * @internal * @deprecated This API is ICU internal only. */ public static int indexOf(CharSequence s, int codePoint) { int cp; for (int i = 0; i < s.length(); i += Character.charCount(cp)) { cp = Character.codePointAt(s, i); if (cp == codePoint) { return i; } } return -1; } /** * Utility function for simplified, more robust loops, such as: *
     *   for (int codePoint : CharSequences.codePoints(string)) {
     *     doSomethingWith(codePoint);
     *   }
     * 
* * @internal * @deprecated This API is ICU internal only. */ public static int[] codePoints(CharSequence s) { int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same int j = 0; for (int i = 0; i < s.length(); ++i) { char cp = s.charAt(i); if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed char last = (char) result[j-1]; if (last >= 0xD800 && last <= 0xDBFF) { // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block. result[j-1] = Character.toCodePoint(last, cp); continue; } } result[j++] = cp; } if (j == result.length) { return result; } int[] shortResult = new int[j]; System.arraycopy(result, 0, shortResult, 0, j); return shortResult; } private CharSequences() { } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy