com.ibm.icu.impl.coll.CollationLoader Maven / Gradle / Ivy
The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1996-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* CollationLoader.java, ported from ucol_res.cpp
*
* created by: Markus W. Scherer
*/
package com.ibm.icu.impl.coll;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.MissingResourceException;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
/**
* Convenience string denoting the Collation data tree
*/
public final class CollationLoader {
// not implemented, all methods are static
private CollationLoader() {
}
private static volatile String rootRules = null;
private static void loadRootRules() {
if (rootRules != null) {
return;
}
synchronized(CollationLoader.class) {
if (rootRules == null) {
UResourceBundle rootBundle = UResourceBundle.getBundleInstance(
ICUData.ICU_COLLATION_BASE_NAME, ULocale.ROOT);
rootRules = rootBundle.getString("UCARules");
}
}
}
// C++: static void appendRootRules(UnicodeString &s)
public static String getRootRules() {
loadRootRules();
return rootRules;
}
/**
* Simpler/faster methods for ASCII than ones based on Unicode data.
* TODO: There should be code like this somewhere already??
*/
private static final class ASCII {
static String toLowerCase(String s) {
for (int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
if ('A' <= c && c <= 'Z') {
StringBuilder sb = new StringBuilder(s.length());
sb.append(s, 0, i).append((char)(c + 0x20));
while (++i < s.length()) {
c = s.charAt(i);
if ('A' <= c && c <= 'Z') { c = (char)(c + 0x20); }
sb.append(c);
}
return sb.toString();
}
}
return s;
}
}
static String loadRules(ULocale locale, String collationType) {
UResourceBundle bundle = UResourceBundle.getBundleInstance(
ICUData.ICU_COLLATION_BASE_NAME, locale);
UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback(
"collations/" + ASCII.toLowerCase(collationType));
String rules = data.getString("Sequence");
return rules;
}
private static final UResourceBundle findWithFallback(UResourceBundle table, String entryName) {
return ((ICUResourceBundle)table).findWithFallback(entryName);
}
public static CollationTailoring loadTailoring(ULocale locale, Output outValidLocale) {
// Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not
// work well when alias table is involved in a resource path, unless full path is specified.
// For now, collation resources does not contain such data, so the code below should work fine.
CollationTailoring root = CollationRoot.getRoot();
String localeName = locale.getName();
if (localeName.length() == 0 || localeName.equals("root")) {
outValidLocale.value = ULocale.ROOT;
return root;
}
UResourceBundle bundle = null;
try {
bundle = ICUResourceBundle.getBundleInstance(
ICUData.ICU_COLLATION_BASE_NAME, locale,
ICUResourceBundle.OpenType.LOCALE_ROOT);
} catch (MissingResourceException e) {
outValidLocale.value = ULocale.ROOT;
return root;
}
ULocale validLocale = bundle.getULocale();
// Normalize the root locale. See
// https://unicode-org.atlassian.net/browse/ICU-10715
String validLocaleName = validLocale.getName();
if (validLocaleName.length() == 0 || validLocaleName.equals("root")) {
validLocale = ULocale.ROOT;
}
outValidLocale.value = validLocale;
// There are zero or more tailorings in the collations table.
UResourceBundle collations;
try {
collations = bundle.get("collations");
if (collations == null) {
return root;
}
} catch(MissingResourceException ignored) {
return root;
}
// Fetch the collation type from the locale ID and the default type from the data.
String type = locale.getKeywordValue("collation");
String defaultType = "standard";
String defT = ((ICUResourceBundle)collations).findStringWithFallback("default");
if (defT != null) {
defaultType = defT;
}
if (type == null || type.equals("default")) {
type = defaultType;
} else {
type = ASCII.toLowerCase(type);
}
// Load the collations/type tailoring, with type fallback.
// Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in
// ICU4C, but not used by ICU4J
// boolean typeFallback = false;
UResourceBundle data = findWithFallback(collations, type);
if (data == null &&
type.length() > 6 && type.startsWith("search")) {
// fall back from something like "searchjl" to "search"
// typeFallback = true;
type = "search";
data = findWithFallback(collations, type);
}
if (data == null && !type.equals(defaultType)) {
// fall back to the default type
// typeFallback = true;
type = defaultType;
data = findWithFallback(collations, type);
}
if (data == null && !type.equals("standard")) {
// fall back to the "standard" type
// typeFallback = true;
type = "standard";
data = findWithFallback(collations, type);
}
if (data == null) {
return root;
}
// Is this the same as the root collator? If so, then use that instead.
ULocale actualLocale = data.getULocale();
// https://unicode-org.atlassian.net/browse/ICU-10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT
// Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) {
String actualLocaleName = actualLocale.getName();
if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) {
actualLocale = ULocale.ROOT;
if (type.equals("standard")) {
return root;
}
}
CollationTailoring t = new CollationTailoring(root.settings);
t.actualLocale = actualLocale;
// deserialize
UResourceBundle binary = data.get("%%CollationBin");
ByteBuffer inBytes = binary.getBinary();
try {
CollationDataReader.read(root, inBytes, t);
} catch (IOException e) {
throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
+ actualLocale + " type:" + type, e);
}
// Try to fetch the optional rules string.
try {
t.setRulesResource(data.get("Sequence"));
} catch(MissingResourceException ignored) {
}
// Set the collation types on the informational locales,
// except when they match the default types (for brevity and backwards compatibility).
// For the valid locale, suppress the default type.
if (!type.equals(defaultType)) {
outValidLocale.value = validLocale.setKeywordValue("collation", type);
}
// For the actual locale, suppress the default type *according to the actual locale*.
// For example, zh has default=pinyin and contains all of the Chinese tailorings.
// zh_Hant has default=stroke but has no other data.
// For the valid locale "zh_Hant" we need to suppress stroke.
// For the actual locale "zh" we need to suppress pinyin instead.
if (!actualLocale.equals(validLocale)) {
// Opening a bundle for the actual locale should always succeed.
UResourceBundle actualBundle = UResourceBundle.getBundleInstance(
ICUData.ICU_COLLATION_BASE_NAME, actualLocale);
defT = ((ICUResourceBundle)actualBundle).findStringWithFallback("collations/default");
if (defT != null) {
defaultType = defT;
}
}
if (!type.equals(defaultType)) {
t.actualLocale = t.actualLocale.setKeywordValue("collation", type);
}
// if (typeFallback) {
// ICU4C implementation sets U_USING_DEFAULT_WARNING here
// }
return t;
}
}