All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.personname.PersonNameFormatterImpl Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.personname;

import static com.ibm.icu.util.UResourceBundle.ARRAY;
import static com.ibm.icu.util.UResourceBundle.STRING;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;

import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.PersonName;
import com.ibm.icu.text.PersonNameFormatter;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;

/**
 * Actual implementation class for PersonNameFormatter.
 */
public class PersonNameFormatterImpl {
    private final Locale locale;
    private final PersonNamePattern[] gnFirstPatterns;
    private final PersonNamePattern[] snFirstPatterns;
    private final Set gnFirstLocales;
    private final Set snFirstLocales;
    private final String initialPattern;
    private final String initialSequencePattern;
    private final boolean capitalizeSurname;
    private final String foreignSpaceReplacement;
    private final String nativeSpaceReplacement;
    private final PersonNameFormatter.Length length;
    private final PersonNameFormatter.Usage usage;
    private final PersonNameFormatter.Formality formality;
    private final PersonNameFormatter.DisplayOrder displayOrder;

    public PersonNameFormatterImpl(Locale locale,
                                   PersonNameFormatter.Length length,
                                   PersonNameFormatter.Usage usage,
                                   PersonNameFormatter.Formality formality,
                                   PersonNameFormatter.DisplayOrder displayOrder,
                                   boolean surnameAllCaps) {
        // save off our creation parameters (these are only used if we have to create a second formatter)
        this.length = length;
        this.usage = usage;
        this.formality = formality;
        this.displayOrder = displayOrder;
        this.capitalizeSurname = surnameAllCaps;

        // load simple property values from the resource bundle (or the options set)
        ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale);
        this.locale = locale;
        this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial");
        this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
        this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
        this.nativeSpaceReplacement = rb.getStringWithFallback("personNames/nativeSpaceReplacement");

        // asjust for combinations of parameters that don't make sense in practice
        if (usage == PersonNameFormatter.Usage.MONOGRAM) {
            // we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from
            // the options list
            displayOrder = PersonNameFormatter.DisplayOrder.DEFAULT;
        } else if (displayOrder == PersonNameFormatter.DisplayOrder.SORTING) {
            // we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it
            // the same as REFERRING
            usage = PersonNameFormatter.Usage.REFERRING;
        }

        // load the actual formatting patterns-- since we don't know the name order until formatting time (it can be
        // different for different names), load patterns for both given-first and surname-first names.  (If the user has
        // specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.)
        final String RESOURCE_PATH_PREFIX = "personNames/namePattern/";
        String lengthStr = (length != PersonNameFormatter.Length.DEFAULT) ? length.toString().toLowerCase()
                : rb.getStringWithFallback("personNames/parameterDefault/length");
        String formalityStr = (formality != PersonNameFormatter.Formality.DEFAULT) ? formality.toString().toLowerCase()
                : rb.getStringWithFallback("personNames/parameterDefault/formality");
        String resourceNameBody = lengthStr + "-" + usage.toString().toLowerCase() + "-" + formalityStr;
        if (displayOrder != PersonNameFormatter.DisplayOrder.SORTING) {
            ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody);
            ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody);

            gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this);
            snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this);

            gnFirstLocales = new HashSet<>();
            Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst")));
            snFirstLocales = new HashSet<>();
            Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst")));
        } else {
            ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody);

            gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this);
            snFirstPatterns = null;
            gnFirstLocales = null;
            snFirstLocales = null;
        }
    }

    /**
     * THIS IS A DUMMY CONSTRUCTOR JUST FOR THE USE OF THE UNIT TESTS TO CHECK SOME OF THE INTERNAL IMPLEMENTATION!
     */
    public PersonNameFormatterImpl(Locale locale, String[] gnFirstPatterns, String[] snFirstPatterns, String[] gnFirstLocales, String[] snFirstLocales) {
        // first, set dummy values for the other fields
        length = PersonNameFormatter.Length.MEDIUM;
        usage = PersonNameFormatter.Usage.REFERRING;
        formality = PersonNameFormatter.Formality.FORMAL;
        displayOrder = PersonNameFormatter.DisplayOrder.DEFAULT;
        initialPattern = "{0}.";
        initialSequencePattern = "{0} {1}";
        capitalizeSurname = false;
        foreignSpaceReplacement = " ";
        nativeSpaceReplacement = " ";

        // then, set values for the fields we actually care about (all but gnFirstPatterns are optional)
        this.locale = locale;
        this.gnFirstPatterns = PersonNamePattern.makePatterns(gnFirstPatterns, this);
        this.snFirstPatterns = (snFirstPatterns != null) ? PersonNamePattern.makePatterns(snFirstPatterns, this) : null;
        if (gnFirstLocales != null) {
            this.gnFirstLocales = new HashSet<>();
            Collections.addAll(this.gnFirstLocales, gnFirstLocales);
        } else {
            this.gnFirstLocales = null;
        }
        if (snFirstLocales != null) {
            this.snFirstLocales = new HashSet<>();
            Collections.addAll(this.snFirstLocales, snFirstLocales);
        } else {
            this.snFirstLocales = null;
        }
    }

    @Override
    public String toString() {
        return "PersonNameFormatter: " + displayOrder + "-" + length + "-" + usage + "-" + formality + ", " + locale;
    }

    public String formatToString(PersonName name) {
        // TODO: Should probably return a FormattedPersonName object

        Locale nameLocale = getNameLocale(name);
        String nameScript = getNameScript(name);

        if (!nameScriptMatchesLocale(nameScript, this.locale)) {
            Locale newFormattingLocale;
            if (formattingLocaleExists(nameLocale)) {
                newFormattingLocale = nameLocale;
            } else {
                newFormattingLocale = newLocaleWithScript(null, nameScript, nameLocale.getCountry());
            }
            PersonNameFormatterImpl nameLocaleFormatter = new PersonNameFormatterImpl(newFormattingLocale, this.length,
                    this.usage, this.formality, this.displayOrder, this.capitalizeSurname);
            return nameLocaleFormatter.formatToString(name);
        }

        String result = null;

        // choose the GN-first or SN-first pattern based on the name itself and use that to format it
        if (snFirstPatterns == null || nameIsGnFirst(name)) {
            result = getBestPattern(gnFirstPatterns, name).format(name);
        } else {
            result = getBestPattern(snFirstPatterns, name).format(name);
        }

        // if either of the space-replacement characters is something other than a space,
        // check to see if the name locale's language matches the formatter locale's language.
        // If they match, replace all spaces with the native space-replacement character,
        // and if they don't, replace all spaces with the foreign space-replacement character
        if (!nativeSpaceReplacement.equals(" ") || !foreignSpaceReplacement.equals(" ")) {
            if (localesMatch(nameLocale, this.locale)) {
                result = result.replace(" ", nativeSpaceReplacement);
            } else {
                result = result.replace(" ", foreignSpaceReplacement);
            }
        }
        return result;
    }

    public Locale getLocale() {
        return locale;
    }

    public PersonNameFormatter.Length getLength() { return length; }

    public PersonNameFormatter.Usage getUsage() { return usage; }

    public PersonNameFormatter.Formality getFormality() { return formality; }

    public PersonNameFormatter.DisplayOrder getDisplayOrder() { return displayOrder; }
    public boolean getSurnameAllCaps() { return capitalizeSurname; }

    public String getInitialPattern() {
        return initialPattern;
    }

    public String getInitialSequencePattern() {
        return initialSequencePattern;
    }

    public boolean shouldCapitalizeSurname() {
        return capitalizeSurname;
    }

    static final Set NON_DEFAULT_SCRIPTS = new HashSet<>(Arrays.asList("Hani", "Hira", "Kana"));

    /**
     * Returns the value of the resource, as a string array.
     * @param resource An ICUResourceBundle of type STRING or ARRAY.  If ARRAY, this function just returns it
     *                 as a string array.  If STRING, it returns a one-element array containing that string.
     * @return The resource's value, as an array of Strings.
     */
    private String[] asStringArray(ICUResourceBundle resource) {
        if (resource.getType() == STRING) {
            return new String[] { resource.getString() };
        } else if (resource.getType() == ARRAY){
            return resource.getStringArray();
        } else {
            throw new IllegalStateException("Unsupported resource type " + resource.getType());
        }
    }

    /**
     * Returns the field order to use when formatting this name, taking into account the name's preferredOrder
     * field, as well as the name and formatter's respective locales.
     * @param name The name to be formatted.
     * @return If true, use given-first order to format the name; if false, use surname-first order.
     */
    private boolean nameIsGnFirst(PersonName name) {
        // if the formatter has its display order set to one of the "force" values, that overrides
        // all this logic and the name's preferred-order property
        if (this.displayOrder == PersonNameFormatter.DisplayOrder.FORCE_GIVEN_FIRST) {
            return true;
        } else if (this.displayOrder == PersonNameFormatter.DisplayOrder.FORCE_SURNAME_FIRST) {
            return false;
        }

        // the name can declare its order-- check that first (it overrides any locale-based calculation)
        if (name.getPreferredOrder() == PersonName.PreferredOrder.GIVEN_FIRST) {
            return true;
        } else if (name.getPreferredOrder() == PersonName.PreferredOrder.SURNAME_FIRST) {
            return false;
        }

        // Otherwise, search the gnFirstLocales and snFirstLocales for the locale's name.
        // For our purposes, the "locale's name" is the locale the name itself gives us (if it
        // has one), or the locale we guess for the name (if it doesn't).
        Locale nameLocale = name.getNameLocale();
        if (nameLocale == null) {
            nameLocale = getNameLocale(name);
        }

        // this is a hack to deal with certain script codes that are valid, but not the default, for their locales--
        // to make the parent-chain lookup work right, we need to replace any of those script codes (in the name's locale)
        // with the appropriate default script for whatever language and region we have
        ULocale nameULocale = ULocale.forLocale(nameLocale);
        if (NON_DEFAULT_SCRIPTS.contains(nameULocale.getScript())) {
            ULocale.Builder builder = new ULocale.Builder();
            builder.setLocale(nameULocale);
            builder.setScript(null);
            nameULocale = ULocale.addLikelySubtags(builder.build());
        }

        // now search for the locale in the gnFirstLocales and snFirstLocales lists...
        String localeStr = nameULocale.getName();
        String origLocaleStr = localeStr;
        String languageCode = nameULocale.getLanguage();

        do {
            // first check if the locale is in one of those lists
            if (gnFirstLocales.contains(localeStr)) {
                return true;
            } else if (snFirstLocales.contains(localeStr)) {
                return false;
            }

            // if not, try again with "und" in place of the language code (this lets us use "und_CN" to match
            // all locales with a region code of "CN" and makes sure the last thing we try is always "und", which
            // is required to be in gnFirstLocales or snFirstLocales)
            String undStr = localeStr.replaceAll("^" + languageCode, "und");
            if (gnFirstLocales.contains(undStr)) {
                return true;
            } else if (snFirstLocales.contains(undStr)) {
                return false;
            }

            // if we haven't found the locale ID yet, look up its parent locale ID and try again-- if getParentLocaleID()
            // returns null (i.e., we have a locale ID, such as "zh_Hant", that inherits directly from "root"), try again
            // with just the locale ID's language code (this fixes it so that "zh_Hant" matches "zh", even though "zh" isn't,
            // strictly speaking, its parent locale)
            String parentLocaleStr = ICUResourceBundle.getParentLocaleID(localeStr, origLocaleStr, ICUResourceBundle.OpenType.LOCALE_DEFAULT_ROOT);
            localeStr = (parentLocaleStr != null) ? parentLocaleStr : languageCode;
        } while (localeStr != null);

        // should never get here ("und" should always be in gnFirstLocales or snFirstLocales), but if we do...
        return true;
    }

    private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonName name) {
        // early out if there's only one pattern
        if (patterns.length == 1) {
            return patterns[0];
        } else {
            // if there's more than one pattern, return the one that contains the greatest number of fields that
            // actually have values in `name`.  If there's a tie, return the pattern that contains the lowest number
            // of fields that DON'T have values in `name`.
            int maxPopulatedFields = 0;
            int minEmptyFields = Integer.MAX_VALUE;
            PersonNamePattern bestPattern = null;

            for (PersonNamePattern pattern : patterns) {
                int populatedFields = pattern.numPopulatedFields(name);
                int emptyFields = pattern.numEmptyFields(name);
                if (populatedFields > maxPopulatedFields) {
                    maxPopulatedFields = populatedFields;
                    minEmptyFields = emptyFields;
                    bestPattern = pattern;
                } else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) {
                    minEmptyFields = emptyFields;
                    bestPattern = pattern;
                }
            }
            return bestPattern;
        }
    }

    /**
     * Internal function to figure out the name's script by examining its characters.
     * @param name The name for which we need the script
     * @return The four-letter script code for the name.
     */
    private String getNameScript(PersonName name) {
        // Rather than exhaustively checking all the fields in the name, we just check the given-name
        // and surname fields, giving preference to the script of the surname if they're different
        // (we concatenate them into one string for simplicity).  The "name script" is the script
        // of the first character we find whose script isn't "common".  If that script is one
        // of the scripts used by the specified locale, we have a match.
        String givenName = name.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet());
        String surname = name.getFieldValue(PersonName.NameField.GIVEN, Collections.emptySet());
        String nameText = ((surname != null) ? surname : "") + ((givenName != null) ? givenName : "");
        int stringScript = UScript.UNKNOWN;
        for (int i = 0; stringScript == UScript.UNKNOWN && i < nameText.length(); i++) {
            int c = nameText.codePointAt(i);
            int charScript = UScript.getScript(c);
            if (charScript != UScript.COMMON && charScript != UScript.INHERITED && charScript != UScript.UNKNOWN) {
                stringScript = charScript;
            }
        }
        return UScript.getShortName(stringScript);
    }

    private Locale newLocaleWithScript(Locale oldLocale, String scriptCode, String regionCode) {
        Locale workingLocale;
        String localeScript;

        // if we got the "unknown" script code, don't do anything with it-- just return the original locale
        if (scriptCode.equals("Zzzz")) {
            return oldLocale;
        }

        Locale.Builder builder = new Locale.Builder();
        if (oldLocale != null) {
            workingLocale = oldLocale;
            builder.setLocale(oldLocale);
            localeScript = ULocale.addLikelySubtags(ULocale.forLocale(oldLocale)).getScript();
        } else {
            ULocale tmpLocale = ULocale.addLikelySubtags(new ULocale("und_" + scriptCode));
            builder.setLanguage(tmpLocale.getLanguage());
            workingLocale = ULocale.addLikelySubtags(new ULocale(tmpLocale.getLanguage())).toLocale();
            localeScript = workingLocale.getScript();

            if (regionCode != null) {
                builder.setRegion(regionCode);
            }
        }

        // if the detected character script matches one of the default scripts for the name's locale,
        // use the name locale's default script code in the locale ID we return (this converts a detected
        // script of "Hani" to "Hans" for "zh", "Hant" for "zh_Hant", and "Jpan" for "ja")
        if (!scriptCode.equals(localeScript) && nameScriptMatchesLocale(scriptCode, workingLocale)) {
            scriptCode = localeScript;
        }

        builder.setScript(scriptCode);
        return builder.build();
    }

    /**
     * Internal function to figure out the name's locale when the name doesn't specify it.
     * (Note that this code assumes that if the locale is specified, it includes a language
     * code.)
     * @param name The name for which we need the locale
     * @return The name's (real or guessed) locale.
     */
    private Locale getNameLocale(PersonName name) {
        return newLocaleWithScript(name.getNameLocale(), getNameScript(name), null);
    }

    /**
     * Returns true if the characters in the name match one of the scripts for the specified locale.
     */
    private boolean nameScriptMatchesLocale(String nameScriptID, Locale formatterLocale) {
        // if the script code is the "unknown" script, pretend it matches everything
        if (nameScriptID.equals("Zzzz")) {
            return true;
        }

        int[] localeScripts = UScript.getCode(formatterLocale);
        int nameScript = UScript.getCodeFromName(nameScriptID);

        for (int localeScript : localeScripts) {
            if (localeScript == nameScript || (localeScript == UScript.SIMPLIFIED_HAN && nameScript == UScript.HAN) || (localeScript == UScript.TRADITIONAL_HAN && nameScript == UScript.HAN)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Returns true if there's actual name formatting data for the specified locale (i.e., when
     * we fetch the resource data, we don't fall back to root).
     */
    private boolean formattingLocaleExists(Locale formattingLocale) {
        // NOTE: What we really want to test for here is whether we're falling back to root for either the resource bundle itself
        // or for the personNames/nameOrderLocales/givenFirst and personNames/nameOrderLocales/surnameFirst resources.
        // The problem is that getBundleInstance() doesn't return root when it can't find what it's looking for; it returns
        // ULocale.getDefault().  We could theoretically get around this by passing OpenType.LOCALE_ROOT, but this
        // bypasses the parent-locale table, so fallback across script can happen (ja_Latn falls back to ja instead of root).
        // So I'm checking to see if the language code got changed and using that as a surrogate for falling back to root.
        String formattingLanguage = formattingLocale.getLanguage();
        ICUResourceBundle mainRB = ICUResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, ULocale.forLocale(formattingLocale), ICUResourceBundle.OpenType.LOCALE_DEFAULT_ROOT);
        if (!mainRB.getULocale().getLanguage().equals(formattingLanguage)) {
            return false;
        }

        ICUResourceBundle gnFirstResource = mainRB.getWithFallback("personNames/nameOrderLocales/givenFirst");
        ICUResourceBundle snFirstResource = mainRB.getWithFallback("personNames/nameOrderLocales/surnameFirst");

        return gnFirstResource.getULocale().getLanguage().equals(formattingLanguage) || snFirstResource.getULocale().getLanguage().equals(formattingLanguage);
    }

    /**
     * Returns true if the two locales should be considered equivalent for space-replacement purposes.
     */
    private boolean localesMatch(Locale nameLocale, Locale formatterLocale) {
        String nameLanguage = nameLocale.getLanguage();
        String formatterLanguage = formatterLocale.getLanguage();

        if (nameLanguage.equals(formatterLanguage)) {
            return true;
        }

        // HACK to make Japanese and Chinese names use the native format and native space replacement
        // (do we want to do something more general here?)
        if ((nameLanguage.equals("ja") || nameLanguage.equals("zh")) && (formatterLanguage.equals("ja") || formatterLanguage.equals("zh"))) {
            return true;
        }

        return false;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy