java.text.RuleBasedCollator Maven / Gradle / Ivy
Show all versions of android-all Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.text;
import libcore.icu.RuleBasedCollatorICU;
/**
* A concrete subclass of {@link Collator}.
* It is based on the ICU RuleBasedCollator which implements the
* CLDR and Unicode collation algorithms.
*
* Most of the time, you create a {@link Collator} instance for a {@link java.util.Locale}
* by calling the {@link Collator#getInstance} factory method.
* You can construct a {@code RuleBasedCollator} if you need a custom sort order.
*
*
The root collator's sort order is the CLDR root collation order
* which in turn is the Unicode default sort order with a few modifications.
* A {@code RuleBasedCollator} is built from a rule {@code String} which changes the
* sort order of some characters and strings relative to the default order.
*
*
A rule string usually contains one or more rule chains.
* A rule chain consists of a reset followed by one or more rules.
* The reset anchors the following rules in the default sort order.
* The rules change the order of the their characters and strings
* relative to the reset point.
*
*
A reset is an ampersand {@code &} followed by one or more characters for the reset position.
* A rule is a relation operator, which specifies the level of difference,
* also followed by one or more characters.
* A multi-character rule creates a "contraction".
* A multi-character reset position usually creates "expansions".
*
*
For example, the following rules
* make "ä" sort with a diacritic-like (secondary) difference from "ae"
* (like in German phonebook sorting),
* and make "å" and "aa" sort as a base letter (primary) after "z" (like in Danish).
* Uppercase forms sort with a case-like (tertiary) difference after their lowercase forms.
*
*
*
* &AE<<ä <<<Ä
* &z<å<<<Å<<<aa<<<Aa<<<AA
*
*
*
* For details see
*
* - CLDR Collation Rule Syntax
*
- ICU User Guide Collation Customization
*
*
* Note: earlier versions of {@code RuleBasedCollator} up to and including Android 4.4 (KitKat)
* allowed the omission of the reset from the first rule chain.
* This was interpreted as an implied reset after the last non-Han script in the default order.
* However, this is not a useful reset position, except for large tailorings of
* Han characters themselves.
* Starting with the CLDR 24 collation specification and the ICU 53 implementation,
* the initial reset is required.
*
*
If the rule string does not follow the syntax, then {@code RuleBasedCollator} throws a
* {@code ParseException}.
*/
public class RuleBasedCollator extends Collator {
RuleBasedCollator(RuleBasedCollatorICU wrapper) {
super(wrapper);
}
/**
* Constructs a new instance of {@code RuleBasedCollator} using the
* specified {@code rules}. (See the {@link RuleBasedCollator class description}.)
*
* Note that the {@code rules} are interpreted as a delta to the
* default sort order. This differs
* from other implementations which work with full {@code rules}
* specifications and may result in different behavior.
*
* @param rules
* the collation rules.
* @throws NullPointerException
* if {@code rules == null}.
* @throws ParseException
* if {@code rules} contains rules with invalid collation rule
* syntax.
*/
public RuleBasedCollator(String rules) throws ParseException {
if (rules == null) {
throw new NullPointerException("rules == null");
}
try {
icuColl = new RuleBasedCollatorICU(rules);
} catch (Exception e) {
if (e instanceof ParseException) {
throw (ParseException) e;
}
/*
* -1 means it's not a ParseException. Maybe IOException thrown when
* an error occurred while reading internal data.
*/
throw new ParseException(e.getMessage(), -1);
}
}
/**
* Obtains a {@code CollationElementIterator} for the given
* {@code CharacterIterator}. The source iterator's integrity will be
* preserved since a new copy will be created for use.
*
* @param source
* the source character iterator.
* @return a {@code CollationElementIterator} for {@code source}.
*/
public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
if (source == null) {
throw new NullPointerException("source == null");
}
return new CollationElementIterator(icuColl.getCollationElementIterator(source));
}
/**
* Obtains a {@code CollationElementIterator} for the given string.
*
* @param source
* the source string.
* @return the {@code CollationElementIterator} for {@code source}.
*/
public CollationElementIterator getCollationElementIterator(String source) {
if (source == null) {
throw new NullPointerException("source == null");
}
return new CollationElementIterator(icuColl.getCollationElementIterator(source));
}
/**
* Returns the collation rules of this collator. These {@code rules} can be
* fed into the {@code RuleBasedCollator(String)} constructor.
*
*
The returned string will be empty unless you constructed the instance yourself.
* The string forms of the collation rules are omitted to save space on the device.
*/
public String getRules() {
return icuColl.getRules();
}
/**
* Returns a new collator with the same collation rules, decomposition mode and
* strength value as this collator.
*
* @return a shallow copy of this collator.
* @see java.lang.Cloneable
*/
@Override
public Object clone() {
RuleBasedCollator clone = (RuleBasedCollator) super.clone();
return clone;
}
/**
* Compares the {@code source} text to the {@code target} text according to
* the collation rules, strength and decomposition mode for this
* {@code RuleBasedCollator}. See the {@code Collator} class description
* for an example of use.
*
* @param source
* the source text.
* @param target
* the target text.
* @return an integer which may be a negative value, zero, or else a
* positive value depending on whether {@code source} is less than,
* equivalent to, or greater than {@code target}.
*/
@Override
public int compare(String source, String target) {
if (source == null) {
throw new NullPointerException("source == null");
} else if (target == null) {
throw new NullPointerException("target == null");
}
return icuColl.compare(source, target);
}
/**
* Returns the {@code CollationKey} for the given source text.
*
* @param source
* the specified source text.
* @return the {@code CollationKey} for the given source text.
*/
@Override
public CollationKey getCollationKey(String source) {
return icuColl.getCollationKey(source);
}
@Override
public int hashCode() {
return icuColl.getRules().hashCode();
}
/**
* Compares the specified object with this {@code RuleBasedCollator} and
* indicates if they are equal. In order to be equal, {@code object} must be
* an instance of {@code Collator} with the same collation rules and the
* same attributes.
*
* @param obj
* the object to compare with this object.
* @return {@code true} if the specified object is equal to this
* {@code RuleBasedCollator}; {@code false} otherwise.
* @see #hashCode
*/
@Override
public boolean equals(Object obj) {
if (!(obj instanceof Collator)) {
return false;
}
return super.equals(obj);
}
}