
com.ibm.icu.impl.locale.XLocaleMatcher Maven / Gradle / Ivy
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
/**
* Immutable class that picks best match between user's desired locales and application's supported locales.
* @author markdavis
*/
public final class XLocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
private static final ULocale UND_LOCALE = new ULocale("und");
private static final Iterator NULL_ITERATOR = null;
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
// List of indexes, optimized for one or two.
private static final class Indexes {
// Some indexes without further object creation and auto-boxing.
int first, second = -1;
// We could turn the List into an int array + length and manage its growth.
List remaining;
Indexes(int firstIndex) {
first = firstIndex;
}
void add(int i) {
if (second < 0) {
second = i;
} else {
if (remaining == null) {
remaining = new ArrayList<>();
}
remaining.add(i);
}
}
int getFirst() { return first; }
int get(int i) { // returns -1 when i >= length
if (i == 0) {
return first;
} else if (i == 1) {
return second;
} else if (remaining != null && (i -= 2) < remaining.size()) {
return remaining.get(i);
} else {
return -1;
}
}
}
// TODO: Make public, and add public methods that return it.
private static final class Result {
private Result(ULocale desired, ULocale supported,
/* Locale jdesired, */ Locale jsupported,
int desIndex, int suppIndex) {
desiredLocale = desired;
supportedLocale = supported;
// desiredJavaLocale = jdesired;
supportedJavaLocale = jsupported;
desiredIndex = desIndex;
supportedIndex = suppIndex;
}
ULocale desiredLocale;
ULocale supportedLocale;
// Locale desiredJavaLocale;
Locale supportedJavaLocale;
int desiredIndex;
@SuppressWarnings("unused") // unused until public, for other wrappers
int supportedIndex;
}
// normally the default values, but can be set via constructor
private final int thresholdDistance;
private final int demotionPerAdditionalDesiredLocale;
private final DistanceOption distanceOption;
// built based on application's supported languages in constructor
private final ULocale[] supportedLocales;
private final Locale[] supportedJavaLocales;
private final Map supportedToIndex;
private final Map supportedLsrToIndexes;
// Array versions of the supportedLsrToIndexes keys and values.
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
private final LSR[] supportedLsrs;
private final Indexes[] supportedIndexes;
private final ULocale defaultLocale;
private final Locale defaultJavaLocale;
private final int defaultLocaleIndex;
public static class Builder {
/**
* Supported locales. A Set, to avoid duplicates.
* Maintains iteration order for consistent matching behavior (first best match wins).
*/
private Set supportedLocales;
private int thresholdDistance = -1;
private int demotionPerAdditionalDesiredLocale = -1;;
private ULocale defaultLocale;
private DistanceOption distanceOption;
/**
* @param locales the languagePriorityList to set
* @return this Builder object
*/
public Builder setSupportedLocales(String locales) {
return setSupportedLocales(LocalePriorityList.add(locales).build());
}
public Builder setSupportedLocales(Iterable locales) {
supportedLocales = new LinkedHashSet<>(); // maintain order
for (ULocale locale : locales) {
supportedLocales.add(locale);
}
return this;
}
public Builder setSupportedLocales(Collection locales) {
supportedLocales = new LinkedHashSet<>(locales); // maintain order
return this;
}
public Builder setSupportedJavaLocales(Collection locales) {
supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
for (Locale locale : locales) {
supportedLocales.add(ULocale.forLocale(locale));
}
return this;
}
public Builder addSupportedLocale(ULocale locale) {
if (supportedLocales == null) {
supportedLocales = new LinkedHashSet<>();
}
supportedLocales.add(locale);
return this;
}
public Builder addSupportedLocale(Locale locale) {
return addSupportedLocale(ULocale.forLocale(locale));
}
/**
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
*/
public Builder setThresholdDistance(int thresholdDistance) {
if (thresholdDistance > 100) {
thresholdDistance = 100;
}
this.thresholdDistance = thresholdDistance;
return this;
}
/**
* @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
* @return this Builder object
*/
public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
return this;
}
/**
* Set the default language, with null = default = first supported language
* @param defaultLocale the default language
* @return this Builder object
*/
public Builder setDefaultLanguage(ULocale defaultLocale) {
this.defaultLocale = defaultLocale;
return this;
}
/**
* If true, then the language differences are smaller than than script differences.
* This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
* @param distanceOption the distance option
* @return this Builder object
*/
public Builder setDistanceOption(DistanceOption distanceOption) {
this.distanceOption = distanceOption;
return this;
}
public XLocaleMatcher build() {
return new XLocaleMatcher(this);
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
if (!supportedLocales.isEmpty()) {
s.append(" supported={").append(supportedLocales.toString()).append('}');
}
if (defaultLocale != null) {
s.append(" default=").append(defaultLocale.toString());
}
if (distanceOption != null) {
s.append(" distance=").append(distanceOption.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
if (demotionPerAdditionalDesiredLocale >= 0) {
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
}
return s.append('}').toString();
}
}
/**
* Returns a builder used in chaining parameters for building a Locale Matcher.
* @return this Builder object
*/
public static Builder builder() {
return new Builder();
}
/** Convenience method */
public XLocaleMatcher(String supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/** Convenience method */
public XLocaleMatcher(LocalePriorityList supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/** Convenience method */
public XLocaleMatcher(Set supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/**
* Creates a locale matcher with the given Builder parameters.
*/
private XLocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales.size();
supportedLocales = new ULocale[supportedLocalesLength];
supportedJavaLocales = new Locale[supportedLocalesLength];
supportedToIndex = new HashMap<>(supportedLocalesLength);
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, Indexes).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// 1. First supported locale.
// 2. Priority locales in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
Map otherLsrToIndexes = null;
LSR firstLSR = null;
int i = 0;
for (ULocale locale : builder.supportedLocales) {
supportedLocales[i] = locale;
supportedJavaLocales[i] = locale.toLocale();
// supportedToIndex.putIfAbsent(locale, i)
Integer oldIndex = supportedToIndex.get(locale);
if (oldIndex == null) {
supportedToIndex.put(locale, i);
}
LSR lsr = getMaximalLsrOrUnd(locale);
if (i == 0) {
firstLSR = lsr;
supportedLsrToIndexes.put(lsr, new Indexes(0));
} else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
addIndex(supportedLsrToIndexes, lsr, i);
} else {
if (otherLsrToIndexes == null) {
otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
}
addIndex(otherLsrToIndexes, lsr, i);
}
++i;
}
if (otherLsrToIndexes != null) {
supportedLsrToIndexes.putAll(otherLsrToIndexes);
}
int numSuppLsrs = supportedLsrToIndexes.size();
supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
ULocale def;
Locale jdef = null;
int idef = -1;
if (builder.defaultLocale != null) {
def = builder.defaultLocale;
} else if (supportedLocalesLength > 0) {
def = supportedLocales[0]; // first language
jdef = supportedJavaLocales[0];
idef = 0;
} else {
def = null;
}
if (jdef == null && def != null) {
jdef = def.toLocale();
}
defaultLocale = def;
defaultJavaLocale = jdef;
defaultLocaleIndex = idef;
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
builder.demotionPerAdditionalDesiredLocale;
distanceOption = builder.distanceOption;
}
private static final void addIndex(Map lsrToIndexes, LSR lsr, int i) {
Indexes indexes = lsrToIndexes.get(lsr);
if (indexes == null) {
lsrToIndexes.put(lsr, new Indexes(i));
} else {
indexes.add(i);
}
}
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
if (locale.equals(UND_LOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
/** Convenience method */
public ULocale getBestMatch(ULocale ulocale) {
return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
}
/** Convenience method */
public ULocale getBestMatch(String languageList) {
return getBestMatch(LocalePriorityList.add(languageList).build(), null);
}
/** Convenience method */
public ULocale getBestMatch(ULocale... locales) {
return getBestMatch(Arrays.asList(locales), null);
}
/** Convenience method */
public ULocale getBestMatch(Iterable desiredLocales) {
return getBestMatch(desiredLocales, null);
}
/**
* Get the best match between the desired languages and supported languages
* @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
* @param outputBestDesired The one of the desired languages that matched best (can be null).
* Set to null if the best match was not below the threshold distance.
* @return the best match.
*/
public ULocale getBestMatch(Iterable desiredLocales, Output outputBestDesired) {
Iterator desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
if (outputBestDesired != null) {
outputBestDesired.value = null;
}
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
}
return defaultLocale;
}
ULocale desiredLocale = desiredIter.next();
return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
}
/**
* @param desiredLocale First desired locale.
* @param remainingIter Remaining desired locales, null or empty if none.
* @param outputBestDesired If not null,
* will be set to the desired locale that matches the best supported one.
* @return the best supported locale.
*/
private ULocale getBestMatch(ULocale desiredLocale, Iterator remainingIter,
Output outputBestDesired) {
Result result = getBestMatch(desiredLocale, remainingIter);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredLocale;
}
return result.supportedLocale;
}
private Result getBestMatch(ULocale desiredLocale, Iterator remainingIter) {
int desiredIndex = 0;
int bestDesiredIndex = -1;
ULocale bestDesiredLocale = null;
int bestSupportedLsrIndex = 0;
for (int bestDistance = thresholdDistance; bestDistance > 0;
bestDistance -= demotionPerAdditionalDesiredLocale) {
// Quick check for exact locale match.
Integer supportedIndex = supportedToIndex.get(desiredLocale);
if (supportedIndex != null) {
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desired=supported\n", desiredLocale);
}
int suppIndex = supportedIndex;
return new Result(desiredLocale, supportedLocales[suppIndex],
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
}
// Quick check for exact maximized LSR.
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
if (indexes != null) {
// If this is a supported LSR, return the first locale.
// We already know the exact locale isn't there.
int suppIndex = indexes.getFirst();
ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
}
return new Result(desiredLocale, result,
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
desiredLSR, supportedLsrs, bestDistance, distanceOption);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
bestDesiredIndex = desiredIndex;
bestDesiredLocale = desiredLocale;
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
if (bestDistance == 0) {
break;
}
}
if (remainingIter == null || !remainingIter.hasNext()) {
break;
}
desiredLocale = remainingIter.next();
++desiredIndex;
}
if (bestDesiredIndex < 0) {
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no good match\n", defaultLocale);
}
return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
}
// Pick exact match if there is one.
// The length of the list is normally 1.
Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
int suppIndex;
for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
ULocale locale = supportedLocales[suppIndex];
if (bestDesiredLocale.equals(locale)) {
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desired=best matching supported language\n",
bestDesiredLocale);
}
return new Result(bestDesiredLocale, locale,
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
}
// Otherwise return the first of the supported languages that share the best-matching LSR.
suppIndex = bestSupportedIndexes.getFirst();
ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
System.err.printf("Returning %s: first best matching supported language\n", result);
}
return new Result(bestDesiredLocale, result,
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
/**
* Get the best match between the desired languages and supported languages
* @param desiredLocale the supplied user's language.
* @param outputBestDesired The one of the desired languages that matched best.
* Set to null if the best match was not below the threshold distance.
* @return the best match.
*/
public ULocale getBestMatch(ULocale desiredLocale, Output outputBestDesired) {
return getBestMatch(desiredLocale, null, outputBestDesired);
}
/**
* Converts Locales to ULocales on the fly.
*/
private static final class LocalesWrapper implements Iterator {
private Iterator locales;
// Cache locales to avoid conversion of the result.
private Locale first, second;
private List remaining;
LocalesWrapper(Iterator locales) {
this.locales = locales;
}
@Override
public boolean hasNext() {
return locales.hasNext();
}
@Override
public ULocale next() {
Locale locale = locales.next();
if (first == null) {
first = locale;
} else if (second == null) {
second = locale;
} else {
if (remaining == null) {
remaining = new ArrayList<>();
}
remaining.add(locale);
}
return ULocale.forLocale(locale);
}
Locale getJavaLocale(int i) {
if (i == 0) {
return first;
} else if (i == 1) {
return second;
} else {
// TODO: test code coverage
return remaining.get(i - 2);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
public Locale getBestJavaMatch(Iterable desiredLocales, Output outputBestDesired) {
Iterator desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
if (outputBestDesired != null) {
outputBestDesired.value = null;
}
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
}
return defaultJavaLocale;
}
LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
ULocale desiredLocale = wrapper.next();
Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredIndex >= 0 ?
wrapper.getJavaLocale(result.desiredIndex) : null;
}
return result.supportedJavaLocale;
}
public Locale getBestJavaMatch(Locale desiredLocale, Output outputBestDesired) {
ULocale desiredULocale = ULocale.forLocale(desiredLocale);
Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
}
return result.supportedJavaLocale;
}
/** Combine features of the desired locale into those of the supported, and return result. */
public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
// for examples of extensions, variants, see
// http://unicode.org/repos/cldr/tags/latest/common/bcp47/
// http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
if (!bestSupported.equals(bestDesired) && bestDesired != null) {
// add region, variants, extensions
ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
// copy the region from the desired, if there is one
String region = bestDesired.getCountry();
if (!region.isEmpty()) {
b.setRegion(region);
}
// copy the variants from desired, if there is one
// note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
String variants = bestDesired.getVariant();
if (!variants.isEmpty()) {
b.setVariant(variants);
}
// copy the extensions from desired, if there are any
// note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
for (char extensionKey : bestDesired.getExtensionKeys()) {
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
bestSupported = b.build();
}
return bestSupported;
}
/** Returns the distance between the two languages. The values are not necessarily symmetric.
* @param desired A locale desired by the user
* @param supported A locale supported by a program.
* @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
* A language is first maximized with add likely subtags, then compared.
*/
public int distance(ULocale desired, ULocale supported) {
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
thresholdDistance, distanceOption) & 0xff;
}
/** Convenience method */
public int distance(String desiredLanguage, String supportedLanguage) {
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
thresholdDistance, distanceOption) & 0xff;
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
if (supportedLocales.length > 0) {
s.append(" supported={").append(supportedLocales[0].toString());
for (int i = 1; i < supportedLocales.length; ++i) {
s.append(", ").append(supportedLocales[1].toString());
}
s.append('}');
}
s.append(" default=").append(Objects.toString(defaultLocale));
if (distanceOption != null) {
s.append(" distance=").append(distanceOption.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
return s.append('}').toString();
}
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
public double match(ULocale desired, ULocale supported) {
return (100-distance(desired, supported))/100.0;
}
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
*
Note that
* the precise values may change over time; no code should be made dependent
* on the values remaining constant.
* @param desired Desired locale
* @param desiredMax Maximized locale (using likely subtags)
* @param supported Supported locale
* @param supportedMax Maximized locale (using likely subtags)
* @return value between 0 and 1, inclusive.
* @deprecated Use the form with 2 parameters instead.
*/
@Deprecated
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
return match(desired, supported);
}
/**
* Canonicalize a locale (language). Note that for now, it is canonicalizing
* according to CLDR conventions (he vs iw, etc), since that is what is needed
* for likelySubtags.
* @param ulocale language/locale code
* @return ULocale with remapped subtags.
* @stable ICU 4.4
*/
public ULocale canonicalize(ULocale ulocale) {
// TODO
return null;
}
/**
* @return the thresholdDistance. Any distance above this value is treated as a match failure.
*/
public int getThresholdDistance() {
return thresholdDistance;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy