All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.CaseMap Maven / Gradle / Ivy

The newest version!
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.text;

import java.util.Locale;

import com.ibm.icu.impl.CaseMapImpl;
import com.ibm.icu.impl.UCaseProps;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;

/**
 * Low-level case mapping options and methods. Immutable.
 * "Setters" return instances with the union of the current and new options set.
 *
 * This class is not intended for public subclassing.
 *
 * @stable ICU 59
 */
public abstract class CaseMap {
    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    protected int internalOptions;

    private CaseMap(int opt) { internalOptions = opt; }

    private static int getCaseLocale(Locale locale) {
        if (locale == null) {
            locale = Locale.getDefault();
        }
        return UCaseProps.getCaseLocale(locale);
    }

    /**
     * @return Lowercasing object with default options.
     * @stable ICU 59
     */
    public static Lower toLower() { return Lower.DEFAULT; }
    /**
     * @return Uppercasing object with default options.
     * @stable ICU 59
     */
    public static Upper toUpper() { return Upper.DEFAULT; }
    /**
     * @return Titlecasing object with default options.
     * @stable ICU 59
     */
    public static Title toTitle() { return Title.DEFAULT; }
    /**
     * @return Case folding object with default options.
     * @stable ICU 59
     */
    public static Fold fold() { return Fold.DEFAULT; }

    /**
     * Returns an instance that behaves like this one but
     * omits unchanged text when case-mapping with {@link Edits}.
     *
     * @return an options object with this option.
     * @stable ICU 59
     */
    public abstract CaseMap omitUnchangedText();

    /**
     * Lowercasing options and methods. Immutable.
     *
     * @see #toLower()
     * @stable ICU 59
     */
    public static final class Lower extends CaseMap {
        private static final Lower DEFAULT = new Lower(0);
        private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);
        private Lower(int opt) { super(opt); }

        /**
         * {@inheritDoc}
         * @stable ICU 59
         */
        @Override
        public Lower omitUnchangedText() {
            return OMIT_UNCHANGED;
        }

        /**
         * Lowercases a string.
         * Casing is locale-dependent and context-sensitive.
         * The result may be longer or shorter than the original.
         *
         * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
         *                  (See {@link ULocale#toLocale}.)
         * @param src       The original string.
         * @return the result string.
         *
         * @see UCharacter#toLowerCase(Locale, String)
         * @stable ICU 60
         */
        public String apply(Locale locale, CharSequence src) {
            return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src);
        }

        /**
         * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
         * Casing is locale-dependent and context-sensitive.
         * The result may be longer or shorter than the original.
         *
         * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
         *                  (See {@link ULocale#toLocale}.)
         * @param src       The original string.
         * @param dest      A buffer for the result string. Must not be null.
         * @param edits     Records edits for index mapping, working with styled text,
         *                  and getting only changes (if any).
         *                  This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         *
         * @see UCharacter#toLowerCase(Locale, String)
         * @stable ICU 59
         */
         public  A apply(
                 Locale locale, CharSequence src, A dest, Edits edits) {
             return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
         }
    }

    /**
     * Uppercasing options and methods. Immutable.
     *
     * @see #toUpper()
     * @stable ICU 59
     */
    public static final class Upper extends CaseMap {
        private static final Upper DEFAULT = new Upper(0);
        private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);
        private Upper(int opt) { super(opt); }

        /**
         * {@inheritDoc}
         * @stable ICU 59
         */
        @Override
        public Upper omitUnchangedText() {
            return OMIT_UNCHANGED;
        }

        /**
         * Uppercases a string.
         * Casing is locale-dependent and context-sensitive.
         * The result may be longer or shorter than the original.
         *
         * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
         *                  (See {@link ULocale#toLocale}.)
         * @param src       The original string.
         * @return the result string.
         *
         * @see UCharacter#toUpperCase(Locale, String)
         * @stable ICU 60
         */
        public String apply(Locale locale, CharSequence src) {
            return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src);
        }

        /**
         * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
         * Casing is locale-dependent and context-sensitive.
         * The result may be longer or shorter than the original.
         *
         * @param locale    The locale ID. Can be null for {@link Locale#getDefault}.
         *                  (See {@link ULocale#toLocale}.)
         * @param src       The original string.
         * @param dest      A buffer for the result string. Must not be null.
         * @param edits     Records edits for index mapping, working with styled text,
         *                  and getting only changes (if any).
         *                  This function calls edits.reset() first. edits can be null.
         * @return dest with the result string (or only changes) appended.
         *
         * @see UCharacter#toUpperCase(Locale, String)
         * @stable ICU 59
         */
         public  A apply(
                 Locale locale, CharSequence src, A dest, Edits edits) {
             return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
         }
    }

    /**
     * Titlecasing options and methods. Immutable.
     *
     * @see #toTitle()
     * @stable ICU 59
     */
    public static final class Title extends CaseMap {
        private static final Title DEFAULT = new Title(0);
        private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);
        private Title(int opt) { super(opt); }

        /**
         * Returns an instance that behaves like this one but
         * titlecases the string as a whole rather than each word.
         * (Titlecases only the character at index 0, possibly adjusted.)
         *
         * 

It is an error to specify multiple titlecasing iterator options together, * including both an option and an explicit BreakIterator. * * @return an options object with this option. * @see #adjustToCased() * @stable ICU 60 */ public Title wholeString() { return new Title(CaseMapImpl.addTitleIteratorOption( internalOptions, CaseMapImpl.TITLECASE_WHOLE_STRING)); } /** * Returns an instance that behaves like this one but * titlecases sentences rather than words. * (Titlecases only the first character of each sentence, possibly adjusted.) * *

It is an error to specify multiple titlecasing iterator options together, * including both an option and an explicit BreakIterator. * * @return an options object with this option. * @see #adjustToCased() * @stable ICU 60 */ public Title sentences() { return new Title(CaseMapImpl.addTitleIteratorOption( internalOptions, CaseMapImpl.TITLECASE_SENTENCES)); } /** * {@inheritDoc} * @stable ICU 59 */ @Override public Title omitUnchangedText() { if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) { return OMIT_UNCHANGED; } return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT); } /** * Returns an instance that behaves like this one but * does not lowercase non-initial parts of words when titlecasing. * *

By default, titlecasing will titlecase the character at each * (possibly adjusted) BreakIterator index and * lowercase all other characters up to the next iterator index. * With this option, the other characters will not be modified. * * @return an options object with this option. * @see UCharacter#TITLECASE_NO_LOWERCASE * @see #adjustToCased() * @stable ICU 59 */ public Title noLowercase() { return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE); } /** * Returns an instance that behaves like this one but * does not adjust the titlecasing BreakIterator indexes; * titlecases exactly the characters at breaks from the iterator. * *

By default, titlecasing will take each break iterator index, * adjust it to the next relevant character (see {@link #adjustToCased()}), * and titlecase that one. * *

Other characters are lowercased. * * @return an options object with this option. * @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT * @stable ICU 59 */ public Title noBreakAdjustment() { return new Title(CaseMapImpl.addTitleAdjustmentOption( internalOptions, UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)); } /** * Returns an instance that behaves like this one but * adjusts each titlecasing BreakIterator index to the next cased character. * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) * *

This used to be the default index adjustment in ICU. * Since ICU 60, the default index adjustment is to the next character that is * a letter, number, symbol, or private use code point. * (Uncased modifier letters are skipped.) * The difference in behavior is small for word titlecasing, * but the new adjustment is much better for whole-string and sentence titlecasing: * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". * *

It is an error to specify multiple titlecasing adjustment options together. * * @return an options object with this option. * @see #noBreakAdjustment() * @stable ICU 60 */ public Title adjustToCased() { return new Title(CaseMapImpl.addTitleAdjustmentOption( internalOptions, CaseMapImpl.TITLECASE_ADJUST_TO_CASED)); } /** * Titlecases a string. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * *

Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases * all others. (This can be modified with options bits.) * * @param locale The locale ID. Can be null for {@link Locale#getDefault}. * (See {@link ULocale#toLocale}.) * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setText()) * and used one or more times for iteration (first() and next()). * If null, then a word break iterator for the locale is used * (or something equivalent). * @param src The original string. * @return the result string. * * @see UCharacter#toUpperCase(Locale, String) * @stable ICU 60 */ public String apply(Locale locale, BreakIterator iter, CharSequence src) { if (iter == null && locale == null) { locale = Locale.getDefault(); } iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter); iter.setText(src); return CaseMapImpl.toTitle(getCaseLocale(locale), internalOptions, iter, src); } /** * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}). * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. * *

Titlecasing uses a break iterator to find the first characters of words * that are to be titlecased. It titlecases those characters and lowercases * all others. (This can be modified with options bits.) * * @param locale The locale ID. Can be null for {@link Locale#getDefault}. * (See {@link ULocale#toLocale}.) * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setText()) * and used one or more times for iteration (first() and next()). * If null, then a word break iterator for the locale is used * (or something equivalent). * @param src The original string. * @param dest A buffer for the result string. Must not be null. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * This function calls edits.reset() first. edits can be null. * @return dest with the result string (or only changes) appended. * * @see UCharacter#toTitleCase(Locale, String, BreakIterator, int) * @stable ICU 59 */ public A apply( Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) { if (iter == null && locale == null) { locale = Locale.getDefault(); } iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter); iter.setText(src); return CaseMapImpl.toTitle( getCaseLocale(locale), internalOptions, iter, src, dest, edits); } } /** * Case folding options and methods. Immutable. * * @see #fold() * @stable ICU 59 */ public static final class Fold extends CaseMap { private static final Fold DEFAULT = new Fold(0); private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I); private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT); private static final Fold TURKIC_OMIT_UNCHANGED = new Fold( UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT); private Fold(int opt) { super(opt); } /** * {@inheritDoc} * @stable ICU 59 */ @Override public Fold omitUnchangedText() { return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ? OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED; } /** * Returns an instance that behaves like this one but * handles dotted I and dotless i appropriately for Turkic languages (tr, az). * *

Uses the Unicode CaseFolding.txt mappings marked with 'T' that * are to be excluded for default mappings and * included for the Turkic-specific mappings. * * @return an options object with this option. * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I * @stable ICU 59 */ public Fold turkic() { return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0 ? TURKIC : TURKIC_OMIT_UNCHANGED; } /** * Case-folds a string. * The result may be longer or shorter than the original. * *

Case-folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * * @param src The original string. * @return the result string. * * @see UCharacter#foldCase(String, int) * @stable ICU 60 */ public String apply(CharSequence src) { return CaseMapImpl.fold(internalOptions, src); } /** * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}). * The result may be longer or shorter than the original. * *

Case-folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * * @param src The original string. * @param dest A buffer for the result string. Must not be null. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * This function calls edits.reset() first. edits can be null. * @return dest with the result string (or only changes) appended. * * @see UCharacter#foldCase(String, int) * @stable ICU 59 */ public A apply(CharSequence src, A dest, Edits edits) { return CaseMapImpl.fold(internalOptions, src, dest, edits); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy