All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openstreetmap.atlas.locale.IsoCountryFuzzyMatcher Maven / Gradle / Ivy

There is a newer version: 7.0.8
Show newest version
package org.openstreetmap.atlas.locale;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;

import org.apache.commons.text.similarity.LevenshteinDistance;
import org.openstreetmap.atlas.exception.CoreException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Extends {@link IsoCountry} functionality with fuzzy matching for display names. Note that this
 * functionality is very simple, and assumes dependence on English country name representations.
 *
 * @author lcram
 */
public final class IsoCountryFuzzyMatcher
{
    private static final Logger logger = LoggerFactory.getLogger(IsoCountryFuzzyMatcher.class);

    /**
     * Provides closest IsoCountries for a country display name. If the given display name does not
     * perfectly match a valid IsoCountry, this will return the closest string match up to number of
     * matches.
     *
     * @param number
     *            the number of matches to show
     * @param displayCountry
     *            the display country name, e.g. "united stats" (which does not match any display
     *            country)
     * @return an Optional containing the IsoCountry if present
     */
    public static List forDisplayCountryTopMatches(final int number,
            final String displayCountry)
    {
        if (displayCountry != null)
        {
            final List results = new ArrayList<>();
            if (IsoCountry.DISPLAY_COUNTRY_TO_ISO2.containsKey(displayCountry))
            {

                results.add(IsoCountry.ISO_COUNTRIES
                        .get(IsoCountry.DISPLAY_COUNTRY_TO_ISO2.get(displayCountry)));
            }
            else
            {
                final List closestCountries = closestIsoCountries(number, displayCountry);
                if (!closestCountries.isEmpty())
                {
                    logger.debug(
                            "Exact match for {} was not found, returning closest {} matches {}",
                            displayCountry, number, closestCountries);
                    results.addAll(closestCountries.stream()
                            .map(countryString -> IsoCountry.ISO_COUNTRIES
                                    .get(IsoCountry.DISPLAY_COUNTRY_TO_ISO2.get(countryString)))
                            .collect(Collectors.toList()));
                }
            }
            return results;
        }
        return new ArrayList<>();
    }

    /*
     * This algorithm could definitely be improved for cases with multi-word country matching.
     */
    private static List closestIsoCountries(final int number, final String displayCountry)
    {
        if (number <= 0 || number > IsoCountry.ALL_DISPLAY_COUNTRIES.size())
        {
            throw new CoreException("number " + number + " out of range (0, "
                    + IsoCountry.ALL_DISPLAY_COUNTRIES.size() + "]");
        }
        final Map countryRankings = new HashMap<>();
        for (final String countryName : IsoCountry.ALL_DISPLAY_COUNTRIES)
        {
            final String countryNameLowerCase = countryName.toLowerCase();
            final String[] nameComponents = countryNameLowerCase.split("\\s+");
            if (nameComponents.length > 1)
            {
                int bestInterCountryDistance = Integer.MAX_VALUE;
                for (final String nameComponent : nameComponents)
                {
                    final int distance = LevenshteinDistance.getDefaultInstance()
                            .apply(displayCountry, nameComponent);
                    if (distance < bestInterCountryDistance)
                    {
                        bestInterCountryDistance = distance;
                    }
                    countryRankings.put(countryName, bestInterCountryDistance);
                }
            }
            else
            {
                final int distance = LevenshteinDistance.getDefaultInstance().apply(displayCountry,
                        countryNameLowerCase);
                countryRankings.put(countryName, distance);
            }
        }
        final List> entries = new ArrayList<>(countryRankings.entrySet());
        entries.sort(Entry.comparingByValue());

        return entries.subList(0, number).stream().map(Entry::getKey).collect(Collectors.toList());
    }

    private IsoCountryFuzzyMatcher()
    {

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy