All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.locale.XLocaleMatcher Maven / Gradle / Ivy

There is a newer version: 2.12.15
Show newest version
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;

import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;

/**
 * Immutable class that picks best match between user's desired locales and application's supported locales.
 * @author markdavis
 */
public class XLocaleMatcher {
    private static final LSR UND = new LSR("und","","");
    private static final ULocale UND_LOCALE = new ULocale("und");

    // normally the default values, but can be set via constructor

    private final XLocaleDistance localeDistance;
    private final int thresholdDistance;
    private final int demotionPerAdditionalDesiredLocale;
    private final DistanceOption distanceOption;

    // built based on application's supported languages in constructor

    private final Map> supportedLanguages; // the locales in the collection are ordered!
    private final Set exactSupportedLocales; // the locales in the collection are ordered!
    private final ULocale defaultLanguage;


    public static class Builder {
        private Set supportedLanguagesList;
        private int thresholdDistance = -1;
        private int demotionPerAdditionalDesiredLocale = -1;;
        private ULocale defaultLanguage;
        private XLocaleDistance localeDistance;
        private DistanceOption distanceOption;
        /**
         * @param languagePriorityList the languagePriorityList to set
         * @return this Builder object
         */
        public Builder setSupportedLocales(String languagePriorityList) {
            this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
            return this;
        }
        public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
            this.supportedLanguagesList = asSet(languagePriorityList);
            return this;
        }
        public Builder setSupportedLocales(Set languagePriorityList) {
            this.supportedLanguagesList = languagePriorityList;
            return this;
        }

        /**
         * @param thresholdDistance the thresholdDistance to set, with -1 = default
         * @return this Builder object
         */
        public Builder setThresholdDistance(int thresholdDistance) {
            this.thresholdDistance = thresholdDistance;
            return this;
        }
        /**
         * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
         * @return this Builder object
         */
        public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
            this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
            return this;
        }

        /**
         * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
         * @return this Builder object
         */
        public Builder setLocaleDistance(XLocaleDistance localeDistance) {
            this.localeDistance = localeDistance;
            return this;
        }

        /**
         * Set the default language, with null = default = first supported language
         * @param defaultLanguage the default language
         * @return this Builder object
         */
        public Builder setDefaultLanguage(ULocale defaultLanguage) {
            this.defaultLanguage = defaultLanguage;
            return this;
        }

        /**
         * If true, then the language differences are smaller than than script differences.
         * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
         * @param distanceOption the distance option
         * @return this Builder object
         */
        public Builder setDistanceOption(DistanceOption distanceOption) {
            this.distanceOption = distanceOption;
            return this;
        }

        public XLocaleMatcher build() {
            return new XLocaleMatcher(this);
        }
    }

    /**
     * Returns a builder used in chaining parameters for building a Locale Matcher.
     * @return this Builder object
     */
    public static Builder builder() {
        return new Builder();
    }

    /** Convenience method */
    public XLocaleMatcher(String supportedLocales) {
        this(builder().setSupportedLocales(supportedLocales));
    }
    /** Convenience method */
    public XLocaleMatcher(LocalePriorityList supportedLocales) {
        this(builder().setSupportedLocales(supportedLocales));
    }
    /** Convenience method */
    public XLocaleMatcher(Set supportedLocales) {
        this(builder().setSupportedLocales(supportedLocales));
    }

    /**
     * Create a locale matcher with the given parameters.
     * @param supportedLocales
     * @param thresholdDistance
     * @param demotionPerAdditionalDesiredLocale
     * @param localeDistance
     * @param likelySubtags
     */
    private XLocaleMatcher(Builder builder) {
        localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
            : builder.localeDistance;
        thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
            : builder.thresholdDistance;
        // only do AFTER above are set
        Set paradigms = extractLsrSet(localeDistance.getParadigms());
        final Multimap temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
        supportedLanguages = temp2.asMap();
        exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
        defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
            : supportedLanguages.isEmpty() ? null
                : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
        demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
            : builder.demotionPerAdditionalDesiredLocale;
        distanceOption = builder.distanceOption;
    }

    // Result is not immutable!
    private Set extractLsrSet(Set languagePriorityList) {
        Set result = new LinkedHashSet();
        for (ULocale item : languagePriorityList) {
            final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
            result.add(max);
        }
        return result;
    }

    private Multimap extractLsrMap(Set languagePriorityList, Set priorities) {
        Multimap builder = LinkedHashMultimap.create();
        for (ULocale item : languagePriorityList) {
            final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
            builder.put(max, item);
        }
        if (builder.size() > 1 && priorities != null) {
            // for the supported list, we put any priorities before all others, except for the first.
            Multimap builder2 = LinkedHashMultimap.create();

            // copy the long way so the priorities are in the same order as in the original
            boolean first = true;
            for (Entry> entry : builder.asMap().entrySet()) {
                final LSR key = entry.getKey();
                if (first || priorities.contains(key)) {
                    builder2.putAll(key, entry.getValue());
                    first = false;
                }
            }
            // now copy the rest
            builder2.putAll(builder);
            if (!builder2.equals(builder)) {
                throw new IllegalArgumentException();
            }
            builder = builder2;
        }
        return ImmutableMultimap.copyOf(builder);
    }


    /** Convenience method */
    public ULocale getBestMatch(ULocale ulocale) {
        return getBestMatch(ulocale, null);
    }
    /** Convenience method */
    public ULocale getBestMatch(String languageList) {
        return getBestMatch(LocalePriorityList.add(languageList).build(), null);
    }
    /** Convenience method */
    public ULocale getBestMatch(ULocale... locales) {
        return getBestMatch(new LinkedHashSet(Arrays.asList(locales)), null);
    }
    /** Convenience method */
    public ULocale getBestMatch(Set desiredLanguages) {
        return getBestMatch(desiredLanguages, null);
    }
    /** Convenience method */
    public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
        return getBestMatch(desiredLanguages, null);
    }
    /** Convenience method */
    public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output outputBestDesired) {
        return getBestMatch(asSet(desiredLanguages), outputBestDesired);
    }

    // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
    private static Set asSet(LocalePriorityList languageList) {
        Set temp = new LinkedHashSet(); // maintain order
        for (ULocale locale : languageList) {
            temp.add(locale);
        };
        return temp;
    }

    /**
     * Get the best match between the desired languages and supported languages
     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
     * @param outputBestDesired The one of the desired languages that matched best.
     * Set to null if the best match was not below the threshold distance.
     * @return the best match.
     */
    public ULocale getBestMatch(Set desiredLanguages, Output outputBestDesired) {
        // fast path for singleton
        if (desiredLanguages.size() == 1) {
            return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
        }
        // TODO produce optimized version for single desired ULocale
        Multimap desiredLSRs = extractLsrMap(desiredLanguages,null);
        int bestDistance = Integer.MAX_VALUE;
        ULocale bestDesiredLocale = null;
        Collection bestSupportedLocales = null;
        int delta = 0;
        mainLoop:
            for (final Entry desiredLsrAndLocale : desiredLSRs.entries()) {
                // quick check for exact match
                ULocale desiredLocale = desiredLsrAndLocale.getValue();
                LSR desiredLSR = desiredLsrAndLocale.getKey();
                if (delta < bestDistance) {
                    if (exactSupportedLocales.contains(desiredLocale)) {
                        if (outputBestDesired != null) {
                            outputBestDesired.value = desiredLocale;
                        }
                        return desiredLocale;
                    }
                    // quick check for maximized locale
                    Collection found = supportedLanguages.get(desiredLSR);
                    if (found != null) {
                        // if we find one in the set, return first (lowest). We already know the exact one isn't there.
                        if (outputBestDesired != null) {
                            outputBestDesired.value = desiredLocale;
                        }
                        return found.iterator().next();
                    }
                }
                for (final Entry> supportedLsrAndLocale : supportedLanguages.entrySet()) {
                    int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
                        thresholdDistance, distanceOption);
                    if (distance < bestDistance) {
                        bestDistance = distance;
                        bestDesiredLocale = desiredLocale;
                        bestSupportedLocales = supportedLsrAndLocale.getValue();
                        if (distance == 0) {
                            break mainLoop;
                        }
                    }
                }
                delta += demotionPerAdditionalDesiredLocale;
            }
        if (bestDistance >= thresholdDistance) {
            if (outputBestDesired != null) {
                outputBestDesired.value = null;
            }
            return defaultLanguage;
        }
        if (outputBestDesired != null) {
            outputBestDesired.value = bestDesiredLocale;
        }
        // pick exact match if there is one
        if (bestSupportedLocales.contains(bestDesiredLocale)) {
            return bestDesiredLocale;
        }
        // otherwise return first supported, combining variants and extensions from bestDesired
        return bestSupportedLocales.iterator().next();
    }

    /**
     * Get the best match between the desired languages and supported languages
     * @param desiredLocale the supplied user's language.
     * @param outputBestDesired The one of the desired languages that matched best.
     * Set to null if the best match was not below the threshold distance.
     * @return the best match.
     */
    public ULocale getBestMatch(ULocale desiredLocale, Output outputBestDesired) {
        int bestDistance = Integer.MAX_VALUE;
        ULocale bestDesiredLocale = null;
        Collection bestSupportedLocales = null;

        // quick check for exact match, with hack for und
        final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);

        if (exactSupportedLocales.contains(desiredLocale)) {
            if (outputBestDesired != null) {
                outputBestDesired.value = desiredLocale;
            }
            return desiredLocale;
        }
        // quick check for maximized locale
        if (distanceOption == DistanceOption.NORMAL) {
            Collection found = supportedLanguages.get(desiredLSR);
            if (found != null) {
                // if we find one in the set, return first (lowest). We already know the exact one isn't there.
                if (outputBestDesired != null) {
                    outputBestDesired.value = desiredLocale;
                }
                return found.iterator().next();
            }
        }
        for (final Entry> supportedLsrAndLocale : supportedLanguages.entrySet()) {
            int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
                thresholdDistance, distanceOption);
            if (distance < bestDistance) {
                bestDistance = distance;
                bestDesiredLocale = desiredLocale;
                bestSupportedLocales = supportedLsrAndLocale.getValue();
                if (distance == 0) {
                    break;
                }
            }
        }
        if (bestDistance >= thresholdDistance) {
            if (outputBestDesired != null) {
                outputBestDesired.value = null;
            }
            return defaultLanguage;
        }
        if (outputBestDesired != null) {
            outputBestDesired.value = bestDesiredLocale;
        }
        // pick exact match if there is one
        if (bestSupportedLocales.contains(bestDesiredLocale)) {
            return bestDesiredLocale;
        }
        // otherwise return first supported, combining variants and extensions from bestDesired
        return bestSupportedLocales.iterator().next();
    }

    /** Combine features of the desired locale into those of the supported, and return result. */
    public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
        // for examples of extensions, variants, see
        //  http://unicode.org/repos/cldr/tags/latest/common/bcp47/
        //  http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml

        if (!bestSupported.equals(bestDesired) && bestDesired != null) {
            // add region, variants, extensions
            ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);

            // copy the region from the desired, if there is one
            String region = bestDesired.getCountry();
            if (!region.isEmpty()) {
                b.setRegion(region);
            }

            // copy the variants from desired, if there is one
            // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
            String variants = bestDesired.getVariant();
            if (!variants.isEmpty()) {
                b.setVariant(variants);
            }

            // copy the extensions from desired, if there are any
            // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
            for (char extensionKey : bestDesired.getExtensionKeys()) {
                b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
            }
            bestSupported = b.build();
        }
        return bestSupported;
    }

    /** Returns the distance between the two languages. The values are not necessarily symmetric.
     * @param desired A locale desired by the user
     * @param supported A locale supported by a program.
     * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
     * A language is first maximized with add likely subtags, then compared.
     */
    public int distance(ULocale desired, ULocale supported) {
        return localeDistance.distanceRaw(
            LSR.fromMaximalized(desired),
            LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
    }

    /** Convenience method */
    public int distance(String desiredLanguage, String supportedLanguage) {
        return localeDistance.distanceRaw(
            LSR.fromMaximalized(new ULocale(desiredLanguage)),
            LSR.fromMaximalized(new ULocale(supportedLanguage)),
            thresholdDistance, distanceOption);
    }

    @Override
    public String toString() {
        return exactSupportedLocales.toString();
    }

    /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
    public double match(ULocale desired, ULocale supported) {
        return (100-distance(desired, supported))/100.0;
    }

    /**
     * Returns a fraction between 0 and 1, where 1 means that the languages are a
     * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
     * 
Note that * the precise values may change over time; no code should be made dependent * on the values remaining constant. * @param desired Desired locale * @param desiredMax Maximized locale (using likely subtags) * @param supported Supported locale * @param supportedMax Maximized locale (using likely subtags) * @return value between 0 and 1, inclusive. * @deprecated Use the form with 2 parameters instead. */ @Deprecated public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { return match(desired, supported); } /** * Canonicalize a locale (language). Note that for now, it is canonicalizing * according to CLDR conventions (he vs iw, etc), since that is what is needed * for likelySubtags. * @param ulocale language/locale code * @return ULocale with remapped subtags. * @stable ICU 4.4 */ public ULocale canonicalize(ULocale ulocale) { // TODO return null; } /** * @return the thresholdDistance. Any distance above this value is treated as a match failure. */ public int getThresholdDistance() { return thresholdDistance; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy