Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.ibm.icu.text.Collator Maven / Gradle / Ivy
Go to download
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/**
*******************************************************************************
* Copyright (C) 1996-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.Set;
import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.util.Freezable;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.ULocale.Category;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.VersionInfo;
/**
* {@icuenhanced java.text.Collator}.{@icu _usage_}
*
* Collator performs locale-sensitive string comparison. A concrete
* subclass, RuleBasedCollator, allows customization of the collation
* ordering by the use of rule sets.
*
* Following the Unicode
* Consortium 's specifications for the
* Unicode Collation
* Algorithm (UCA) , there are 5 different levels of strength used
* in comparisons:
*
*
* PRIMARY strength: Typically, this is used to denote differences between
* base characters (for example, "a" < "b").
* It is the strongest difference. For example, dictionaries are divided
* into different sections by base character.
* SECONDARY strength: Accents in the characters are considered secondary
* differences (for example, "as" < "às" < "at"). Other
* differences
* between letters can also be considered secondary differences, depending
* on the language. A secondary difference is ignored when there is a
* primary difference anywhere in the strings.
* TERTIARY strength: Upper and lower case differences in characters are
* distinguished at tertiary strength (for example, "ao" < "Ao" <
* "aò"). In addition, a variant of a letter differs from the base
* form on the tertiary strength (such as "A" and "Ⓐ"). Another
* example is the
* difference between large and small Kana. A tertiary difference is ignored
* when there is a primary or secondary difference anywhere in the strings.
* QUATERNARY strength: When punctuation is ignored
*
* (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
* strength, an additional strength level can
* be used to distinguish words with and without punctuation (for example,
* "ab" < "a-b" < "aB").
* This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
* difference. The QUATERNARY strength should only be used if ignoring
* punctuation is required.
* IDENTICAL strength:
* When all other strengths are equal, the IDENTICAL strength is used as a
* tiebreaker. The Unicode code point values of the NFD form of each string
* are compared, just in case there is no difference.
* For example, Hebrew cantellation marks are only distinguished at this
* strength. This strength should be used sparingly, as only code point
* value differences between two strings is an extremely rare occurrence.
* Using this strength substantially decreases the performance for both
* comparison and collation key generation APIs. This strength also
* increases the size of the collation key.
*
*
* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
* the canonical decomposition mode and one that does not use any decomposition.
* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
* is not supported here. If the canonical
* decomposition mode is set, the Collator handles un-normalized text properly,
* producing the same results as if the text were normalized in NFD. If
* canonical decomposition is turned off, it is the user's responsibility to
* ensure that all text is already in the appropriate form before performing
* a comparison or before getting a CollationKey.
*
* For more information about the collation service see the
* users
* guide .
*
* Examples of use
*
* // Get the Collator for US English and set its strength to PRIMARY
* Collator usCollator = Collator.getInstance(Locale.US);
* usCollator.setStrength(Collator.PRIMARY);
* if (usCollator.compare("abc", "ABC") == 0) {
* System.out.println("Strings are equivalent");
* }
*
* The following example shows how to compare two strings using the
* Collator for the default locale.
*
* // Compare two strings in the default locale
* Collator myCollator = Collator.getInstance();
* myCollator.setDecomposition(NO_DECOMPOSITION);
* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
* System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
* myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
* System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
* }
* else {
* System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
* }
* }
* else {
* System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
* }
*
*
* @see RuleBasedCollator
* @see CollationKey
* @author Syn Wee Quek
* @stable ICU 2.8
*/
public abstract class Collator implements Comparator, Freezable
{
// public data members ---------------------------------------------------
/**
* Strongest collator strength value. Typically used to denote differences
* between base characters. See class documentation for more explanation.
* @see #setStrength
* @see #getStrength
* @stable ICU 2.8
*/
public final static int PRIMARY = 0;
/**
* Second level collator strength value.
* Accents in the characters are considered secondary differences.
* Other differences between letters can also be considered secondary
* differences, depending on the language.
* See class documentation for more explanation.
* @see #setStrength
* @see #getStrength
* @stable ICU 2.8
*/
public final static int SECONDARY = 1;
/**
* Third level collator strength value.
* Upper and lower case differences in characters are distinguished at this
* strength level. In addition, a variant of a letter differs from the base
* form on the tertiary level.
* See class documentation for more explanation.
* @see #setStrength
* @see #getStrength
* @stable ICU 2.8
*/
public final static int TERTIARY = 2;
/**
* {@icu} Fourth level collator strength value.
* When punctuation is ignored
*
* (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
* strength, an additional strength level can
* be used to distinguish words with and without punctuation.
* See class documentation for more explanation.
* @see #setStrength
* @see #getStrength
* @stable ICU 2.8
*/
public final static int QUATERNARY = 3;
/**
* Smallest Collator strength value. When all other strengths are equal,
* the IDENTICAL strength is used as a tiebreaker. The Unicode code point
* values of the NFD form of each string are compared, just in case there
* is no difference.
* See class documentation for more explanation.
*
*
* Note this value is different from JDK's
*
* @stable ICU 2.8
*/
public final static int IDENTICAL = 15;
/**
* {@icunote} This is for backwards compatibility with Java APIs only. It
* should not be used, IDENTICAL should be used instead. ICU's
* collation does not support Java's FULL_DECOMPOSITION mode.
* @stable ICU 3.4
*/
public final static int FULL_DECOMPOSITION = IDENTICAL;
/**
* Decomposition mode value. With NO_DECOMPOSITION set, Strings
* will not be decomposed for collation. This is the default
* decomposition setting unless otherwise specified by the locale
* used to create the Collator.
*
* Note this value is different from the JDK's.
* @see #CANONICAL_DECOMPOSITION
* @see #getDecomposition
* @see #setDecomposition
* @stable ICU 2.8
*/
public final static int NO_DECOMPOSITION = 16;
/**
* Decomposition mode value. With CANONICAL_DECOMPOSITION set,
* characters that are canonical variants according to the Unicode standard
* will be decomposed for collation.
*
* CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
* described in
* Unicode Technical Report #15 .
*
* @see #NO_DECOMPOSITION
* @see #getDecomposition
* @see #setDecomposition
* @stable ICU 2.8
*/
public final static int CANONICAL_DECOMPOSITION = 17;
/**
* Reordering codes for non-script groups that can be reordered under collation.
*
* @see #getReorderCodes
* @see #setReorderCodes
* @see #getEquivalentReorderCodes
* @stable ICU 4.8
*/
public static interface ReorderCodes {
/**
* A special reordering code that is used to specify the default reordering codes for a locale.
* @stable ICU 4.8
*/
public final static int DEFAULT = -1; // == UScript.INVALID_CODE
/**
* A special reordering code that is used to specify no reordering codes.
* @stable ICU 4.8
*/
public final static int NONE = UScript.UNKNOWN;
/**
* A special reordering code that is used to specify all other codes used for reordering except
* for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
* @stable ICU 4.8
*/
public final static int OTHERS = UScript.UNKNOWN;
/**
* Characters with the space property.
* This is equivalent to the rule value "space".
* @stable ICU 4.8
*/
public final static int SPACE = 0x1000;
/**
* The first entry in the enumeration of reordering groups. This is intended for use in
* range checking and enumeration of the reorder codes.
* @stable ICU 4.8
*/
public final static int FIRST = SPACE;
/**
* Characters with the punctuation property.
* This is equivalent to the rule value "punct".
* @stable ICU 4.8
*/
public final static int PUNCTUATION = 0x1001;
/**
* Characters with the symbol property.
* This is equivalent to the rule value "symbol".
* @stable ICU 4.8
*/
public final static int SYMBOL = 0x1002;
/**
* Characters with the currency property.
* This is equivalent to the rule value "currency".
* @stable ICU 4.8
*/
public final static int CURRENCY = 0x1003;
/**
* Characters with the digit property.
* This is equivalent to the rule value "digit".
* @stable ICU 4.8
*/
public final static int DIGIT = 0x1004;
/**
* The limit of the reorder codes. This is intended for use in range checking
* and enumeration of the reorder codes.
* @stable ICU 4.8
*/
public final static int LIMIT = 0x1005;
}
// public methods --------------------------------------------------------
// public setters --------------------------------------------------------
/**
* Sets this Collator's strength property. The strength property
* determines the minimum level of difference considered significant
* during comparison.
*
* The default strength for the Collator is TERTIARY, unless specified
* otherwise by the locale used to create the Collator.
*
* See the Collator class description for an example of use.
* @param newStrength the new strength value.
* @see #getStrength
* @see #PRIMARY
* @see #SECONDARY
* @see #TERTIARY
* @see #QUATERNARY
* @see #IDENTICAL
* @throws IllegalArgumentException if the new strength value is not one
* of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
* @stable ICU 2.8
*/
public void setStrength(int newStrength)
{
if (isFrozen()) {
throw new UnsupportedOperationException("Attempt to modify frozen object");
}
if ((newStrength != PRIMARY) &&
(newStrength != SECONDARY) &&
(newStrength != TERTIARY) &&
(newStrength != QUATERNARY) &&
(newStrength != IDENTICAL)) {
throw new IllegalArgumentException("Incorrect comparison level.");
}
m_strength_ = newStrength;
}
/**
* @internal
* @deprecated This API is ICU internal only.
*/
public Collator setStrength2(int newStrength)
{
setStrength(newStrength);
return this;
}
/**
* Sets the decomposition mode of this Collator. Setting this
* decomposition property with CANONICAL_DECOMPOSITION allows the
* Collator to handle un-normalized text properly, producing the
* same results as if the text were normalized. If
* NO_DECOMPOSITION is set, it is the user's responsibility to
* insure that all text is already in the appropriate form before
* a comparison or before getting a CollationKey. Adjusting
* decomposition mode allows the user to select between faster and
* more complete collation behavior.
*
* Since a great many of the world's languages do not require
* text normalization, most locales set NO_DECOMPOSITION as the
* default decomposition mode.
*
* The default decompositon mode for the Collator is
* NO_DECOMPOSITON, unless specified otherwise by the locale used
* to create the Collator.
*
* See getDecomposition for a description of decomposition
* mode.
*
* @param decomposition the new decomposition mode
* @see #getDecomposition
* @see #NO_DECOMPOSITION
* @see #CANONICAL_DECOMPOSITION
* @throws IllegalArgumentException If the given value is not a valid
* decomposition mode.
* @stable ICU 2.8
*/
public void setDecomposition(int decomposition)
{
if (isFrozen()) {
throw new UnsupportedOperationException("Attempt to modify frozen object");
}
internalSetDecomposition(decomposition);
}
/**
* Internal set decompostion call to workaround frozen state because of self-modification
* in the RuleBasedCollator. This method must only be called by code that has
* passed the frozen check already and has the lock if the Collator is frozen.
* Better still this method should go away and RuleBasedCollator.getSortKeyBytes()
* should be fixed to not self-modify.
* @param decomposition
* @internal
*/
protected void internalSetDecomposition(int decomposition)
{
if ((decomposition != NO_DECOMPOSITION) &&
(decomposition != CANONICAL_DECOMPOSITION)) {
throw new IllegalArgumentException("Wrong decomposition mode.");
}
m_decomposition_ = decomposition;
if (decomposition != NO_DECOMPOSITION) {
// ensure the FCD data is initialized
Norm2AllModes.getFCDNormalizer2();
}
}
/**
* Sets the reordering codes for this collator.
* Collation reordering allows scripts and some other defined blocks of characters
* to be moved relative to each other as a block. This reordering is done on top of
* the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
* at the start and/or the end of the collation order. These groups are specified using
* UScript codes and UColReorderCode entries.
*
By default, reordering codes specified for the start of the order are placed in the
* order given after a group of "special" non-script blocks. These special groups of characters
* are space, punctuation, symbol, currency, and digit. These special groups are represented with
* UColReorderCode entries. Script groups can be intermingled with
* these special non-script blocks if those special blocks are explicitly specified in the reordering.
*
The special code OTHERS stands for any script that is not explicitly
* mentioned in the list of reordering codes given. Anything that is after OTHERS
* will go at the very end of the reordering in the order given.
*
The special reorder code DEFAULT will reset the reordering for this collator
* to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
* was specified when this collator was created from resource data or from rules. The
* DEFAULT code must be the sole code supplied when it used. If not
* that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
*
The special reorder code NONE will remove any reordering for this collator.
* The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
* NONE code must be the sole code supplied when it used.
* @param order the reordering codes to apply to this collator; if this is null or an empty array
* then this clears any existing reordering
* @see #getReorderCodes
* @see #getEquivalentReorderCodes
* @see Collator.ReorderCodes
* @see UScript
* @stable ICU 4.8
*/
public void setReorderCodes(int... order)
{
throw new UnsupportedOperationException();
}
// public getters --------------------------------------------------------
/**
* Returns the Collator for the current default locale.
* The default locale is determined by java.util.Locale.getDefault().
* @return the Collator for the default locale (for example, en_US) if it
* is created successfully. Otherwise if there is no Collator
* associated with the current locale, the default UCA collator
* will be returned.
* @see java.util.Locale#getDefault()
* @see #getInstance(Locale)
* @stable ICU 2.8
*/
public static final Collator getInstance()
{
return getInstance(ULocale.getDefault());
}
/**
* Clones the collator.
* @stable ICU 2.6
* @return a clone of this collator.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
// begin registry stuff
/**
* A factory used with registerFactory to register multiple collators and provide
* display names for them. If standard locale display names are sufficient,
* Collator instances may be registered instead.
*
Note: as of ICU4J 3.2, the default API for CollatorFactory uses
* ULocale instead of Locale. Instead of overriding createCollator(Locale),
* new implementations should override createCollator(ULocale). Note that
* one of these two methods MUST be overridden or else an infinite
* loop will occur.
* @stable ICU 2.6
*/
public static abstract class CollatorFactory {
/**
* Return true if this factory will be visible. Default is true.
* If not visible, the locales supported by this factory will not
* be listed by getAvailableLocales.
*
* @return true if this factory is visible
* @stable ICU 2.6
*/
public boolean visible() {
return true;
}
/**
* Return an instance of the appropriate collator. If the locale
* is not supported, return null.
* Note: as of ICU4J 3.2, implementations should override
* this method instead of createCollator(Locale).
* @param loc the locale for which this collator is to be created.
* @return the newly created collator.
* @stable ICU 3.2
*/
public Collator createCollator(ULocale loc) {
return createCollator(loc.toLocale());
}
/**
* Return an instance of the appropriate collator. If the locale
* is not supported, return null.
*
Note: as of ICU4J 3.2, implementations should override
* createCollator(ULocale) instead of this method, and inherit this
* method's implementation. This method is no longer abstract
* and instead delegates to createCollator(ULocale).
* @param loc the locale for which this collator is to be created.
* @return the newly created collator.
* @stable ICU 2.6
*/
public Collator createCollator(Locale loc) {
return createCollator(ULocale.forLocale(loc));
}
/**
* Return the name of the collator for the objectLocale, localized for the displayLocale.
* If objectLocale is not visible or not defined by the factory, return null.
* @param objectLocale the locale identifying the collator
* @param displayLocale the locale for which the display name of the collator should be localized
* @return the display name
* @stable ICU 2.6
*/
public String getDisplayName(Locale objectLocale, Locale displayLocale) {
return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
}
/**
* Return the name of the collator for the objectLocale, localized for the displayLocale.
* If objectLocale is not visible or not defined by the factory, return null.
* @param objectLocale the locale identifying the collator
* @param displayLocale the locale for which the display name of the collator should be localized
* @return the display name
* @stable ICU 3.2
*/
public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
if (visible()) {
Set supported = getSupportedLocaleIDs();
String name = objectLocale.getBaseName();
if (supported.contains(name)) {
return objectLocale.getDisplayName(displayLocale);
}
}
return null;
}
/**
* Return an unmodifiable collection of the locale names directly
* supported by this factory.
*
* @return the set of supported locale IDs.
* @stable ICU 2.6
*/
public abstract Set getSupportedLocaleIDs();
/**
* Empty default constructor.
* @stable ICU 2.6
*/
protected CollatorFactory() {
}
}
static abstract class ServiceShim {
abstract Collator getInstance(ULocale l);
abstract Object registerInstance(Collator c, ULocale l);
abstract Object registerFactory(CollatorFactory f);
abstract boolean unregister(Object k);
abstract Locale[] getAvailableLocales(); // TODO remove
abstract ULocale[] getAvailableULocales();
abstract String getDisplayName(ULocale ol, ULocale dl);
}
private static ServiceShim shim;
private static ServiceShim getShim() {
// Note: this instantiation is safe on loose-memory-model configurations
// despite lack of synchronization, since the shim instance has no state--
// it's all in the class init. The worst problem is we might instantiate
// two shim instances, but they'll share the same state so that's ok.
if (shim == null) {
try {
Class> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
shim = (ServiceShim)cls.newInstance();
}
catch (MissingResourceException e)
{
///CLOVER:OFF
throw e;
///CLOVER:ON
}
catch (Exception e) {
///CLOVER:OFF
if(DEBUG){
e.printStackTrace();
}
throw new RuntimeException(e.getMessage());
///CLOVER:ON
}
}
return shim;
}
/**
* {@icu} Returns the Collator for the desired locale.
* @param locale the desired locale.
* @return Collator for the desired locale if it is created successfully.
* Otherwise if there is no Collator
* associated with the current locale, a default UCA collator will
* be returned.
* @see java.util.Locale
* @see java.util.ResourceBundle
* @see #getInstance(Locale)
* @see #getInstance()
* @stable ICU 3.0
*/
public static final Collator getInstance(ULocale locale) {
// fetching from service cache is faster than instantiation
return getShim().getInstance(locale);
}
/**
* Returns the Collator for the desired locale.
* @param locale the desired locale.
* @return Collator for the desired locale if it is created successfully.
* Otherwise if there is no Collator
* associated with the current locale, a default UCA collator will
* be returned.
* @see java.util.Locale
* @see java.util.ResourceBundle
* @see #getInstance(ULocale)
* @see #getInstance()
* @stable ICU 2.8
*/
public static final Collator getInstance(Locale locale) {
return getInstance(ULocale.forLocale(locale));
}
/**
* {@icu} Registers a collator as the default collator for the provided locale. The
* collator should not be modified after it is registered.
*
* @param collator the collator to register
* @param locale the locale for which this is the default collator
* @return an object that can be used to unregister the registered collator.
*
* @stable ICU 3.2
*/
public static final Object registerInstance(Collator collator, ULocale locale) {
return getShim().registerInstance(collator, locale);
}
/**
* {@icu} Registers a collator factory.
*
* @param factory the factory to register
* @return an object that can be used to unregister the registered factory.
*
* @stable ICU 2.6
*/
public static final Object registerFactory(CollatorFactory factory) {
return getShim().registerFactory(factory);
}
/**
* {@icu} Unregisters a collator previously registered using registerInstance.
* @param registryKey the object previously returned by registerInstance.
* @return true if the collator was successfully unregistered.
* @stable ICU 2.6
*/
public static final boolean unregister(Object registryKey) {
if (shim == null) {
return false;
}
return shim.unregister(registryKey);
}
/**
* Returns the set of locales, as Locale objects, for which collators
* are installed. Note that Locale objects do not support RFC 3066.
* @return the list of locales in which collators are installed.
* This list includes any that have been registered, in addition to
* those that are installed with ICU4J.
* @stable ICU 2.4
*/
public static Locale[] getAvailableLocales() {
// TODO make this wrap getAvailableULocales later
if (shim == null) {
return ICUResourceBundle.getAvailableLocales(
ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
}
return shim.getAvailableLocales();
}
/**
* {@icu} Returns the set of locales, as ULocale objects, for which collators
* are installed. ULocale objects support RFC 3066.
* @return the list of locales in which collators are installed.
* This list includes any that have been registered, in addition to
* those that are installed with ICU4J.
* @stable ICU 3.0
*/
public static final ULocale[] getAvailableULocales() {
if (shim == null) {
return ICUResourceBundle.getAvailableULocales(
ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
}
return shim.getAvailableULocales();
}
/**
* The list of keywords for this service. This must be kept in sync with
* the resource data.
* @since ICU 3.0
*/
private static final String[] KEYWORDS = { "collation" };
/**
* The resource name for this service. Note that this is not the same as
* the keyword for this service.
* @since ICU 3.0
*/
private static final String RESOURCE = "collations";
/**
* The resource bundle base name for this service.
* *since ICU 3.0
*/
private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
/**
* {@icu} Returns an array of all possible keywords that are relevant to
* collation. At this point, the only recognized keyword for this
* service is "collation".
* @return an array of valid collation keywords.
* @see #getKeywordValues
* @stable ICU 3.0
*/
public static final String[] getKeywords() {
return KEYWORDS;
}
/**
* {@icu} Given a keyword, returns an array of all values for
* that keyword that are currently in use.
* @param keyword one of the keywords returned by getKeywords.
* @see #getKeywords
* @stable ICU 3.0
*/
public static final String[] getKeywordValues(String keyword) {
if (!keyword.equals(KEYWORDS[0])) {
throw new IllegalArgumentException("Invalid keyword: " + keyword);
}
return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
}
/**
* {@icu} Given a key and a locale, returns an array of string values in a preferred
* order that would make a difference. These are all and only those values where
* the open (creation) of the service with the locale formed from the input locale
* plus input keyword and that value has different behavior than creation with the
* input locale alone.
* @param key one of the keys supported by this service. For now, only
* "collation" is supported.
* @param locale the locale
* @param commonlyUsed if set to true it will return only commonly used values
* with the given locale in preferred order. Otherwise,
* it will return all the available values for the locale.
* @return an array of string values for the given key and the locale.
* @stable ICU 4.2
*/
public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
boolean commonlyUsed) {
// Note: The parameter commonlyUsed is actually not used.
// The switch is in the method signature for consistency
// with other locale services.
// Read available collation values from collation bundles
String baseLoc = locale.getBaseName();
LinkedList values = new LinkedList();
UResourceBundle bundle = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
String defcoll = null;
while (bundle != null) {
UResourceBundle collations = bundle.get("collations");
Enumeration collEnum = collations.getKeys();
while (collEnum.hasMoreElements()) {
String collkey = collEnum.nextElement();
if (collkey.equals("default")) {
if (defcoll == null) {
// Keep the default
defcoll = collations.getString("default");
}
} else if (!values.contains(collkey)) {
values.add(collkey);
}
}
bundle = ((ICUResourceBundle)bundle).getParent();
}
// Reordering
Iterator itr = values.iterator();
String[] result = new String[values.size()];
result[0] = defcoll;
int idx = 1;
while (itr.hasNext()) {
String collKey = itr.next();
if (!collKey.equals(defcoll)) {
result[idx++] = collKey;
}
}
return result;
}
/**
* {@icu} Returns the functionally equivalent locale for the given
* requested locale, with respect to given keyword, for the
* collation service. If two locales return the same result, then
* collators instantiated for these locales will behave
* equivalently. The converse is not always true; two collators
* may in fact be equivalent, but return different results, due to
* internal details. The return result has no other meaning than
* that stated above, and implies nothing as to the relationship
* between the two locales. This is intended for use by
* applications who wish to cache collators, or otherwise reuse
* collators when possible. The functional equivalent may change
* over time. For more information, please see the
* Locales and Services section of the ICU User Guide.
* @param keyword a particular keyword as enumerated by
* getKeywords.
* @param locID The requested locale
* @param isAvailable If non-null, isAvailable[0] will receive and
* output boolean that indicates whether the requested locale was
* 'available' to the collation service. If non-null, isAvailable
* must have length >= 1.
* @return the locale
* @stable ICU 3.0
*/
public static final ULocale getFunctionalEquivalent(String keyword,
ULocale locID,
boolean isAvailable[]) {
return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
keyword, locID, isAvailable, true);
}
/**
* {@icu} Returns the functionally equivalent locale for the given
* requested locale, with respect to given keyword, for the
* collation service.
* @param keyword a particular keyword as enumerated by
* getKeywords.
* @param locID The requested locale
* @return the locale
* @see #getFunctionalEquivalent(String,ULocale,boolean[])
* @stable ICU 3.0
*/
public static final ULocale getFunctionalEquivalent(String keyword,
ULocale locID) {
return getFunctionalEquivalent(keyword, locID, null);
}
/**
* {@icu} Returns the name of the collator for the objectLocale, localized for the
* displayLocale.
* @param objectLocale the locale of the collator
* @param displayLocale the locale for the collator's display name
* @return the display name
* @stable ICU 2.6
*/
static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
return getShim().getDisplayName(ULocale.forLocale(objectLocale),
ULocale.forLocale(displayLocale));
}
/**
* {@icu} Returns the name of the collator for the objectLocale, localized for the
* displayLocale.
* @param objectLocale the locale of the collator
* @param displayLocale the locale for the collator's display name
* @return the display name
* @stable ICU 3.2
*/
static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
return getShim().getDisplayName(objectLocale, displayLocale);
}
/**
* {@icu} Returns the name of the collator for the objectLocale, localized for the
* default DISPLAY
locale.
* @param objectLocale the locale of the collator
* @return the display name
* @see com.ibm.icu.util.ULocale.Category#DISPLAY
* @stable ICU 2.6
*/
static public String getDisplayName(Locale objectLocale) {
return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
}
/**
* {@icu} Returns the name of the collator for the objectLocale, localized for the
* default DISPLAY
locale.
* @param objectLocale the locale of the collator
* @return the display name
* @see com.ibm.icu.util.ULocale.Category#DISPLAY
* @stable ICU 3.2
*/
static public String getDisplayName(ULocale objectLocale) {
return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
}
/**
* Returns this Collator's strength property. The strength property
* determines the minimum level of difference considered significant.
*
* {@icunote} This can return QUATERNARY strength, which is not supported by the
* JDK version.
*
* See the Collator class description for more details.
*
* @return this Collator's current strength property.
* @see #setStrength
* @see #PRIMARY
* @see #SECONDARY
* @see #TERTIARY
* @see #QUATERNARY
* @see #IDENTICAL
* @stable ICU 2.8
*/
public int getStrength()
{
return m_strength_;
}
/**
* Returns the decomposition mode of this Collator. The decomposition mode
* determines how Unicode composed characters are handled.
*
*
* See the Collator class description for more details.
*
* @return the decomposition mode
* @see #setDecomposition
* @see #NO_DECOMPOSITION
* @see #CANONICAL_DECOMPOSITION
* @stable ICU 2.8
*/
public int getDecomposition()
{
return m_decomposition_;
}
// public other methods -------------------------------------------------
/**
* Compares the equality of two text Strings using
* this Collator's rules, strength and decomposition mode. Convenience method.
* @param source the source string to be compared.
* @param target the target string to be compared.
* @return true if the strings are equal according to the collation
* rules, otherwise false.
* @see #compare
* @throws NullPointerException thrown if either arguments is null.
* @stable ICU 2.8
*/
public boolean equals(String source, String target)
{
return (compare(source, target) == 0);
}
/**
* {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
* in this collator.
* @return a pointer to a UnicodeSet object containing all the
* code points and sequences that may sort differently than
* in the UCA.
* @stable ICU 2.4
*/
public UnicodeSet getTailoredSet()
{
return new UnicodeSet(0, 0x10FFFF);
}
/**
* Compares the source text String to the target text String according to
* this Collator's rules, strength and decomposition mode.
* Returns an integer less than,
* equal to or greater than zero depending on whether the source String is
* less than, equal to or greater than the target String. See the Collator
* class description for an example of use.
*
* @param source the source String.
* @param target the target String.
* @return Returns an integer value. Value is less than zero if source is
* less than target, value is zero if source and target are equal,
* value is greater than zero if source is greater than target.
* @see CollationKey
* @see #getCollationKey
* @throws NullPointerException thrown if either argument is null.
* @stable ICU 2.8
*/
public abstract int compare(String source, String target);
/**
* Compares the source Object to the target Object.
*
* @param source the source Object.
* @param target the target Object.
* @return Returns an integer value. Value is less than zero if source is
* less than target, value is zero if source and target are equal,
* value is greater than zero if source is greater than target.
* @throws ClassCastException thrown if either arguments cannot be cast to String.
* @stable ICU 4.2
*/
public int compare(Object source, Object target) {
return compare((String)source, (String)target);
}
/**
*
* Transforms the String into a CollationKey suitable for efficient
* repeated comparison. The resulting key depends on the collator's
* rules, strength and decomposition mode.
*
* See the CollationKey class documentation for more information.
* @param source the string to be transformed into a CollationKey.
* @return the CollationKey for the given String based on this Collator's
* collation rules. If the source String is null, a null
* CollationKey is returned.
* @see CollationKey
* @see #compare(String, String)
* @see #getRawCollationKey
* @stable ICU 2.8
*/
public abstract CollationKey getCollationKey(String source);
/**
* {@icu} Returns the simpler form of a CollationKey for the String source following
* the rules of this Collator and stores the result into the user provided argument
* key. If key has a internal byte array of length that's too small for the result,
* the internal byte array will be grown to the exact required size.
* @param source the text String to be transformed into a RawCollationKey
* @return If key is null, a new instance of RawCollationKey will be
* created and returned, otherwise the user provided key will be
* returned.
* @see #compare(String, String)
* @see #getCollationKey
* @see RawCollationKey
* @stable ICU 2.8
*/
public abstract RawCollationKey getRawCollationKey(String source,
RawCollationKey key);
/**
* {@icu} Variable top is a two byte primary value which causes all the codepoints
* with primary values that are less or equal than the variable top to be
* shifted when alternate handling is set to SHIFTED.
*
*
* Sets the variable top to a collation element value of a string supplied.
*
* @param varTop one or more (if contraction) characters to which the
* variable top should be set
* @return a int value containing the value of the variable top in upper 16
* bits. Lower 16 bits are undefined.
* @throws IllegalArgumentException is thrown if varTop argument is not
* a valid variable top element. A variable top element is
* invalid when it is a contraction that does not exist in the
* Collation order or when the PRIMARY strength collation
* element for the variable top has more than two bytes
* @see #getVariableTop
* @see RuleBasedCollator#setAlternateHandlingShifted
* @stable ICU 2.6
*/
public abstract int setVariableTop(String varTop);
/**
* {@icu} Returns the variable top value of a Collator.
* Lower 16 bits are undefined and should be ignored.
* @return the variable top value of a Collator.
* @see #setVariableTop
* @stable ICU 2.6
*/
public abstract int getVariableTop();
/**
* {@icu} Sets the variable top to a collation element value supplied.
* Variable top is set to the upper 16 bits.
* Lower 16 bits are ignored.
* @param varTop Collation element value, as returned by setVariableTop or
* getVariableTop
* @see #getVariableTop
* @see #setVariableTop
* @stable ICU 2.6
*/
public abstract void setVariableTop(int varTop);
/**
* {@icu} Returns the version of this collator object.
* @return the version object associated with this collator
* @stable ICU 2.8
*/
public abstract VersionInfo getVersion();
/**
* {@icu} Returns the UCA version of this collator object.
* @return the version object associated with this collator
* @stable ICU 2.8
*/
public abstract VersionInfo getUCAVersion();
/**
* Retrieves the reordering codes for this collator.
* These reordering codes are a combination of UScript codes and ReorderCodes.
* @return a copy of the reordering codes for this collator;
* if none are set then returns an empty array
* @see #setReorderCodes
* @see #getEquivalentReorderCodes
* @see Collator.ReorderCodes
* @see UScript
* @stable ICU 4.8
*/
public int[] getReorderCodes()
{
throw new UnsupportedOperationException();
}
/**
* Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
* codes are grouped and must reorder together.
*
* @param reorderCode code for which equivalents to be retrieved
* @return the set of all reorder codes in the same group as the given reorder code.
* @see #setReorderCodes
* @see #getReorderCodes
* @see Collator.ReorderCodes
* @see UScript
* @stable ICU 4.8
*/
public static int[] getEquivalentReorderCodes(int reorderCode)
{
throw new UnsupportedOperationException();
}
// Freezable interface implementation -------------------------------------------------
/**
* Determines whether the object has been frozen or not.
* @stable ICU 4.8
*/
public boolean isFrozen() {
return false;
}
/**
* Freezes the collaotr.
* @return the collator itself.
* @stable ICU 4.8
*/
public Collator freeze() {
throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
}
/**
* Provides for the clone operation. Any clone is initially unfrozen.
* @stable ICU 4.8
*/
public Collator cloneAsThawed() {
throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
}
// protected constructor -------------------------------------------------
/**
* Empty default constructor to make javadocs happy
* @stable ICU 2.4
*/
protected Collator()
{
}
// package private methods -----------------------------------------------
// private data members --------------------------------------------------
/**
* Collation strength
*/
private int m_strength_ = TERTIARY;
/**
* Decomposition mode
*/
private int m_decomposition_ = CANONICAL_DECOMPOSITION;
private static final boolean DEBUG = ICUDebug.enabled("collator");
// private methods -------------------------------------------------------
// end registry stuff
// -------- BEGIN ULocale boilerplate --------
/**
* {@icu} Returns the locale that was used to create this object, or null.
* This may may differ from the locale requested at the time of
* this object's creation. For example, if an object is created
* for locale en_US_CALIFORNIA , the actual data may be
* drawn from en (the actual locale), and
* en_US may be the most specific locale that exists (the
* valid locale).
*
* Note: This method will be implemented in ICU 3.0; ICU 2.8
* contains a partial preview implementation. The * actual
* locale is returned correctly, but the valid locale is
* not, in most cases.
* @param type type of information requested, either {@link
* com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
* com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
* @return the information specified by type , or null if
* this object was not constructed from locale data.
* @see com.ibm.icu.util.ULocale
* @see com.ibm.icu.util.ULocale#VALID_LOCALE
* @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
* @draft ICU 2.8 (retain)
* @provisional This API might change or be removed in a future release.
*/
public final ULocale getLocale(ULocale.Type type) {
return type == ULocale.ACTUAL_LOCALE ?
this.actualLocale : this.validLocale;
}
/*
* Set information about the locales that were used to create this
* object. If the object was not constructed from locale data,
* both arguments should be set to null. Otherwise, neither
* should be null. The actual locale must be at the same level or
* less specific than the valid locale. This method is intended
* for use by factories or other entities that create objects of
* this class.
* @param valid the most specific locale containing any resource
* data, or null
* @param actual the locale containing data used to construct this
* object, or null
* @see com.ibm.icu.util.ULocale
* @see com.ibm.icu.util.ULocale#VALID_LOCALE
* @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
*/
final void setLocale(ULocale valid, ULocale actual) {
// Change the following to an assertion later
///CLOVER:OFF
// The following would not happen since the method is called
// by other protected functions that checks and makes sure that
// valid and actual are not null before passing
if ((valid == null) != (actual == null)) {
throw new IllegalArgumentException();
}
///CLOVER:ON
// Another check we could do is that the actual locale is at
// the same level or less specific than the valid locale.
this.validLocale = valid;
this.actualLocale = actual;
}
/*
* The most specific locale containing any resource data, or null.
* @see com.ibm.icu.util.ULocale
*/
private ULocale validLocale;
/*
* The locale containing data used to construct this object, or
* null.
* @see com.ibm.icu.util.ULocale
*/
private ULocale actualLocale;
// -------- END ULocale boilerplate --------
}