libcore.icu.ICU Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of android-all Show documentation
A library jar that provides APIs for Applications written for the Google Android Platform.
There is a newer version: 14-robolectric-10818077
/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package libcore.icu;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import libcore.util.BasicLruCache;

/**
 * Makes ICU data accessible to Java.
 */
public final class ICU {
  private static final BasicLruCache CACHED_PATTERNS =
      new BasicLruCache(8);

  private static Locale[] availableLocalesCache;

  private static String[] isoCountries;

  private static String[] isoLanguages;

  /**
   * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
   */
  public static String[] getISOLanguages() {
    if (isoLanguages == null) {
      isoLanguages = getISOLanguagesNative();
    }
    return isoLanguages.clone();
  }

  /**
   * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
   */
  public static String[] getISOCountries() {
    if (isoCountries == null) {
      isoCountries = getISOCountriesNative();
    }
    return isoCountries.clone();
  }

  private static final int IDX_LANGUAGE = 0;
  private static final int IDX_SCRIPT = 1;
  private static final int IDX_REGION = 2;
  private static final int IDX_VARIANT = 3;

  /*
   * Parse the {Language, Script, Region, Variant*} section of the ICU locale
   * ID. This is the bit that appears before the keyword separate "@". The general
   * structure is a series of ASCII alphanumeric strings (subtags)
   * separated by underscores.
   *
   * Each subtag is interpreted according to its position in the list of subtags
   * AND its length (groan...). The various cases are explained in comments
   * below.
   */
  private static void parseLangScriptRegionAndVariants(String string,
          String[] outputArray) {
    final int first = string.indexOf('_');
    final int second = string.indexOf('_', first + 1);
    final int third = string.indexOf('_', second + 1);

    if (first == -1) {
      outputArray[IDX_LANGUAGE] = string;
    } else if (second == -1) {
      // Language and country ("ja_JP") OR
      // Language and script ("en_Latn") OR
      // Language and variant ("en_POSIX").

      outputArray[IDX_LANGUAGE] = string.substring(0, first);
      final String secondString = string.substring(first + 1);

      if (secondString.length() == 4) {
          // 4 Letter ISO script code.
          outputArray[IDX_SCRIPT] = secondString;
      } else if (secondString.length() == 2 || secondString.length() == 3) {
          // 2 or 3 Letter region code.
          outputArray[IDX_REGION] = secondString;
      } else {
          // If we're here, the length of the second half is either 1 or greater
          // than 5. Assume that ICU won't hand us malformed tags, and therefore
          // assume the rest of the string is a series of variant tags.
          outputArray[IDX_VARIANT] = secondString;
      }
    } else if (third == -1) {
      // Language and country and variant ("ja_JP_TRADITIONAL") OR
      // Language and script and variant ("en_Latn_POSIX") OR
      // Language and script and region ("en_Latn_US"). OR
      // Language and variant with multiple subtags ("en_POSIX_XISOP")

      outputArray[IDX_LANGUAGE] = string.substring(0, first);
      final String secondString = string.substring(first + 1, second);
      final String thirdString = string.substring(second + 1);

      if (secondString.length() == 4) {
          // The second subtag is a script.
          outputArray[IDX_SCRIPT] = secondString;

          // The third subtag can be either a region or a variant, depending
          // on its length.
          if (thirdString.length() == 2 || thirdString.length() == 3 ||
                  thirdString.isEmpty()) {
              outputArray[IDX_REGION] = thirdString;
          } else {
              outputArray[IDX_VARIANT] = thirdString;
          }
      } else if (secondString.isEmpty() ||
              secondString.length() == 2 || secondString.length() == 3) {
          // The second string is a region, and the third a variant.
          outputArray[IDX_REGION] = secondString;
          outputArray[IDX_VARIANT] = thirdString;
      } else {
          // Variant with multiple subtags.
          outputArray[IDX_VARIANT] = string.substring(first + 1);
      }
    } else {
      // Language, script, region and variant with 1 or more subtags
      // ("en_Latn_US_POSIX") OR
      // Language, region and variant with 2 or more subtags
      // (en_US_POSIX_VARIANT).
      outputArray[IDX_LANGUAGE] = string.substring(0, first);
      final String secondString = string.substring(first + 1, second);
      if (secondString.length() == 4) {
          outputArray[IDX_SCRIPT] = secondString;
          outputArray[IDX_REGION] = string.substring(second + 1, third);
          outputArray[IDX_VARIANT] = string.substring(third + 1);
      } else {
          outputArray[IDX_REGION] = secondString;
          outputArray[IDX_VARIANT] = string.substring(second + 1);
      }
    }
  }

  /**
   * Returns the appropriate {@code Locale} given a {@code String} of the form returned
   * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
   * this method can parse strings that {@code Locale.toString} won't produce.
   * Used to remove duplication.
   */
  public static Locale localeFromIcuLocaleId(String localeId) {
    // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
    final int extensionsIndex = localeId.indexOf('@');

    Map extensionsMap = Collections.EMPTY_MAP;
    Map unicodeKeywordsMap = Collections.EMPTY_MAP;
    Set unicodeAttributeSet = Collections.EMPTY_SET;

    if (extensionsIndex != -1) {
      extensionsMap = new HashMap();
      unicodeKeywordsMap = new HashMap();
      unicodeAttributeSet = new HashSet();

      // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
      // containing all "keywords" it could parse. An ICU keyword is a key-value pair
      // separated by an "=" (ULOC_KEYWORD_ASSIGN).
      //
      // Each keyword item can be one of three things :
      // - A unicode extension attribute list: In this case the item key is "attribute"
      //   and the value is a hyphen separated list of unicode attributes.
      // - A unicode extension keyword: In this case, the item key will be larger than
      //   1 char in length, and the value will be the unicode extension value.
      // - A BCP-47 extension subtag: In this case, the item key will be exactly one
      //   char in length, and the value will be a sequence of unparsed subtags that
      //   represent the extension.
      //
      // Note that this implies that unicode extension keywords are "promoted" to
      // to the same namespace as the top level extension subtags and their values.
      // There can't be any collisions in practice because the BCP-47 spec imposes
      // restrictions on their lengths.
      final String extensionsString = localeId.substring(extensionsIndex + 1);
      final String[] extensions = extensionsString.split(";");
      for (String extension : extensions) {
        // This is the special key for the unicode attributes
        if (extension.startsWith("attribute=")) {
          String unicodeAttributeValues = extension.substring("attribute=".length());
          for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
            unicodeAttributeSet.add(unicodeAttribute);
          }
        } else {
          final int separatorIndex = extension.indexOf('=');

          if (separatorIndex == 1) {
            // This is a BCP-47 extension subtag.
            final String value = extension.substring(2);
            final char extensionId = extension.charAt(0);

            extensionsMap.put(extensionId, value);
          } else {
            // This is a unicode extension keyword.
            unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
            extension.substring(separatorIndex + 1));
          }
        }
      }
    }

    final String[] outputArray = new String[] { "", "", "", "" };
    if (extensionsIndex == -1) {
      parseLangScriptRegionAndVariants(localeId, outputArray);
    } else {
      parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
          outputArray);
    }

    return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION],
        outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT],
        unicodeAttributeSet, unicodeKeywordsMap, extensionsMap,
        true /* has validated fields */);
  }

  public static Locale[] localesFromStrings(String[] localeNames) {
    // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
    // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
    // both so that we never need to convert back when talking to it.
    LinkedHashSet set = new LinkedHashSet();
    for (String localeName : localeNames) {
      set.add(localeFromIcuLocaleId(localeName));
    }
    return set.toArray(new Locale[set.size()]);
  }

  public static Locale[] getAvailableLocales() {
    if (availableLocalesCache == null) {
      availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
    }
    return availableLocalesCache.clone();
  }

  public static Locale[] getAvailableBreakIteratorLocales() {
    return localesFromStrings(getAvailableBreakIteratorLocalesNative());
  }

  public static Locale[] getAvailableCalendarLocales() {
    return localesFromStrings(getAvailableCalendarLocalesNative());
  }

  public static Locale[] getAvailableCollatorLocales() {
    return localesFromStrings(getAvailableCollatorLocalesNative());
  }

  public static Locale[] getAvailableDateFormatLocales() {
    return localesFromStrings(getAvailableDateFormatLocalesNative());
  }

  public static Locale[] getAvailableDateFormatSymbolsLocales() {
    return getAvailableDateFormatLocales();
  }

  public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
    return getAvailableNumberFormatLocales();
  }

  public static Locale[] getAvailableNumberFormatLocales() {
    return localesFromStrings(getAvailableNumberFormatLocalesNative());
  }

  public static String getBestDateTimePattern(String skeleton, Locale locale) {
    String languageTag = locale.toLanguageTag();
    String key = skeleton + "\t" + languageTag;
    synchronized (CACHED_PATTERNS) {
      String pattern = CACHED_PATTERNS.get(key);
      if (pattern == null) {
        pattern = getBestDateTimePatternNative(skeleton, languageTag);
        CACHED_PATTERNS.put(key, pattern);
      }
      return pattern;
    }
  }

  private static native String getBestDateTimePatternNative(String skeleton, String languageTag);

  public static char[] getDateFormatOrder(String pattern) {
    char[] result = new char[3];
    int resultIndex = 0;
    boolean sawDay = false;
    boolean sawMonth = false;
    boolean sawYear = false;

    for (int i = 0; i < pattern.length(); ++i) {
      char ch = pattern.charAt(i);
      if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
        if (ch == 'd' && !sawDay) {
          result[resultIndex++] = 'd';
          sawDay = true;
        } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
          result[resultIndex++] = 'M';
          sawMonth = true;
        } else if ((ch == 'y') && !sawYear) {
          result[resultIndex++] = 'y';
          sawYear = true;
        }
      } else if (ch == 'G') {
        // Ignore the era specifier, if present.
      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
        throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
      } else if (ch == '\'') {
        if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
          ++i;
        } else {
          i = pattern.indexOf('\'', i + 1);
          if (i == -1) {
            throw new IllegalArgumentException("Bad quoting in " + pattern);
          }
          ++i;
        }
      } else {
        // Ignore spaces and punctuation.
      }
    }
    return result;
  }

  /**
   * Returns the version of the CLDR data in use, such as "22.1.1".
   */
  public static native String getCldrVersion();

  /**
   * Returns the icu4c version in use, such as "50.1.1".
   */
  public static native String getIcuVersion();

  /**
   * Returns the Unicode version our ICU supports, such as "6.2".
   */
  public static native String getUnicodeVersion();

  // --- Case mapping.

  public static String toLowerCase(String s, Locale locale) {
    return toLowerCase(s, locale.toLanguageTag());
  }

  private static native String toLowerCase(String s, String languageTag);

  public static String toUpperCase(String s, Locale locale) {
    return toUpperCase(s, locale.toLanguageTag());
  }

  private static native String toUpperCase(String s, String languageTag);

  // --- Errors.

  // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
  public static final int U_ZERO_ERROR = 0;
  public static final int U_INVALID_CHAR_FOUND = 10;
  public static final int U_TRUNCATED_CHAR_FOUND = 11;
  public static final int U_ILLEGAL_CHAR_FOUND = 12;
  public static final int U_BUFFER_OVERFLOW_ERROR = 15;

  public static boolean U_FAILURE(int error) {
    return error > U_ZERO_ERROR;
  }

  // --- Native methods accessing ICU's database.

  private static native String[] getAvailableBreakIteratorLocalesNative();
  private static native String[] getAvailableCalendarLocalesNative();
  private static native String[] getAvailableCollatorLocalesNative();
  private static native String[] getAvailableDateFormatLocalesNative();
  private static native String[] getAvailableLocalesNative();
  private static native String[] getAvailableNumberFormatLocalesNative();

  public static native String[] getAvailableCurrencyCodes();
  public static native String getCurrencyCode(String countryCode);

  public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
    return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
  }

  private static native String getCurrencyDisplayName(String languageTag, String currencyCode);

  public static native int getCurrencyFractionDigits(String currencyCode);
  public static native int getCurrencyNumericCode(String currencyCode);

  public static String getCurrencySymbol(Locale locale, String currencyCode) {
    return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
  }

  private static native String getCurrencySymbol(String languageTag, String currencyCode);

  public static String getDisplayCountry(Locale targetLocale, Locale locale) {
    return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
  }

  private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);

  public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
    return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
  }

  private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);

  public static String getDisplayVariant(Locale targetLocale, Locale locale) {
    return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
  }

  private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);

  public static String getDisplayScript(Locale targetLocale, Locale locale) {
    return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
  }

  private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);

  public static native String getISO3Country(String languageTag);

  public static native String getISO3Language(String languageTag);

  public static Locale addLikelySubtags(Locale locale) {
      return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
  }

  /**
   * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
   */
  @Deprecated
  public static native String addLikelySubtags(String locale);

  /**
   * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
   *     around only for the support library.
   */
  @Deprecated
  public static native String getScript(String locale);

  private static native String[] getISOLanguagesNative();
  private static native String[] getISOCountriesNative();

  static native boolean initLocaleDataNative(String languageTag, LocaleData result);

  /**
   * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
   */
  public static native void setDefaultLocale(String languageTag);

  /**
   * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
   */
  public static native String getDefaultLocale();
}