All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.squarespace.cldrengine.locale.LocaleResolver Maven / Gradle / Ivy

The newest version!
package com.squarespace.cldrengine.locale;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.squarespace.cldrengine.api.LanguageTag;
import com.squarespace.cldrengine.api.Pair;
import com.squarespace.cldrengine.internal.LocaleExternalData;
import com.squarespace.cldrengine.utils.StringUtils;

public class LocaleResolver {

  private static final LanguageTag UND = new LanguageTag();

  // Markers that let us quickly determine that a given FastTag field is undefined,
  // while still having the ability to call equals() and hashCode() on it.
  private static final Integer LANGUAGE = 0;
  private static final Integer SCRIPT = 1;
  private static final Integer REGION = 2;

  private static Map LIKELY_SUBTAGS_MAP = loadLikelySubtags();
  private static Map>> LANGUAGE_ALIAS_MAP = loadLanguageAliases();

  // Field flags for match order
  private static int F_LANGUAGE = 1;
  private static int F_SCRIPT = 2;
  private static int F_REGION = 4;

  // Order to match subtags for "add likely subtags" process
  private static int[] MATCH_ORDER = new int[] {
      F_LANGUAGE | F_SCRIPT | F_REGION,
      F_LANGUAGE | F_REGION,
      F_LANGUAGE | F_SCRIPT,
      F_LANGUAGE,
      F_SCRIPT
  };

  /**
   * Substitute all relevant language and region aliases, and then add likely subtags.
   */
  public static LanguageTag resolve(String str) {
    LanguageTag tag = LanguageTagParser.parse(str);
    return resolve(tag);
  }

  /**
   * Substitute all relevant language and region aliases, and then add likely subtags.
   */
  public static LanguageTag resolve(LanguageTag tag) {
    FastTag fast = new FastTag(tag);
    substituteLanguageAliases(fast);
    substituteRegionAliases(fast);
    addLikelySubtags(fast);
    return returnTag(tag, fast);
  }

  /**
   * Add any missing subtags using the likely subtags mapping. For example, this would convert "en" to "en-Latn-US".
   */
  public static LanguageTag addLikelySubtags(String str) {
    LanguageTag tag = LanguageTagParser.parse(str);
    return addLikelySubtags(tag);
  }

  /**
   * Add any missing subtags using the likely subtags mapping. For example, this would convert "en" to "en-Latn-US".
   */
  public static LanguageTag addLikelySubtags(LanguageTag tag) {
    FastTag fast = new FastTag(tag);
    addLikelySubtags(fast);
    return returnTag(tag, fast);
  }

  /**
   * Remove any subtags that would be added by addLikelySubtags(). For example, this would convert "en-Latn-US" to "en".
   */
  public static LanguageTag removeLikelySubtags(String str) {
    LanguageTag tag = LanguageTagParser.parse(str);
    return removeLikelySubtags(tag);
  }

  /**
   * Remove any subtags that would be added by addLikelySubtags(). For example, this would convert "en-Latn-US" to "en".
   */
  public static LanguageTag removeLikelySubtags(LanguageTag tag) {
    FastTag max = new FastTag(tag);
    if (max.language == LANGUAGE || max.script == SCRIPT || max.region == REGION) {
      addLikelySubtags(max);
    }

    FastTag tmp = new FastTag(UND);

    // Using "en-Latn-US" as an example...
    // 1. Match "en-Zzzz-ZZ"
    tmp.language = max.language;
    FastTag match = new FastTag(tmp);
    addLikelySubtags(match);
    if (max.equals(match)) {
      return returnTag(tag, tmp);
    }

    // 2. Match "en-Zzzz-US"
    tmp.region = max.region;
    match = new FastTag(tmp);
    addLikelySubtags(match);
    if (max.equals(match)) {
      tmp.language = max.language;
      return returnTag(tag, tmp);
    }

    // 3. Match "en-Latn-ZZ"
    tmp.region = REGION;
    tmp.script = max.script;
    match = new FastTag(tmp);
    addLikelySubtags(match);
    if (max.equals(match)) {
      return returnTag(tag, tmp);
    }

    // 4. Nothing matched, so return a copy of the original tag.
    return returnTag(tag, max);
  }

  /**
   * Add any missing subtags using the likely subtags mapping. For example, this would convert "en" to "en-Latn-US".
   */
  private static void addLikelySubtags(FastTag fast) {
    FastTag tmp = new FastTag(fast);
    for (int i = 0; i < MATCH_ORDER.length; i++) {
      int flags = MATCH_ORDER[i];
      tmp.setFields(fast, flags);
      FastTag match = LIKELY_SUBTAGS_MAP.get(tmp);
      if (match != null) {
        if (fast.language == LANGUAGE) {
          fast.language = match.language;
        }
        if (fast.script == SCRIPT) {
          fast.script = match.script;
        }
        if (fast.region == REGION) {
          fast.region = match.region;
        }
        break;
      }
    }
  }

  /**
   * Return a new language tag that combines the core fields of the fast tag, with the variant, extensions, and private
   * use field of the original.
   */
  private static LanguageTag returnTag(LanguageTag orig, FastTag fast) {
    return new LanguageTag(
        fast.language == LANGUAGE ? null : (String)fast.language,
        fast.script == SCRIPT ? null : (String)fast.script,
        fast.region == REGION ? null : (String)fast.region,
        orig.variant(),
        orig.extensions(),
        orig.privateUse(),
        orig.extlangs());
  }

  /**
   * Lookup any aliases that match this tag, and replace any undefined subtags.
   */
  private static void substituteLanguageAliases(FastTag dst) {
    List> aliases = LANGUAGE_ALIAS_MAP.get(dst.language);
    if (aliases == null) {
      return;
    }

    for (int i = 0; i < aliases.size(); i++) {
      Pair alias = aliases.get(i);
      FastTag type = alias._1;
      FastTag repl = alias._2;
      boolean exact = type.language.equals(repl.language)
          && type.script.equals(repl.script)
          && type.region.equals(repl.region);
      if ((type.script == SCRIPT && type.region == REGION) || exact) {
        dst.language = repl.language;
        if (dst.script == SCRIPT) {
          dst.script = repl.script;
        }
        if (dst.region == REGION) {
          dst.region = repl.region;
        }
        break;
      }
    }
  }

  /**
   * Replace the tag's region if it has a preferred value.
   */
  private static void substituteRegionAliases(FastTag dst) {
    if (dst.region != REGION) {
      List regions = Utils.REGION_ALIAS_MAP.get(dst.region);
      if (regions != null) {
        // TODO: we currently use only the first region. See note in Typescript
        // cldr-engine project.
        dst.region = regions.get(0);
      }
    }
  }

  private static FastTag parseFastTag(String str) {
    List p = StringUtils.split(str, '-');
    String language = p.get(0);
    String script = "1";
    String region = "2";
    if (p.size() > 1) {
      String e = p.get(1);
      if (!StringUtils.isEmpty(e)) {
        script = e;
      }
    }
    if (p.size() > 2) {
      String e = p.get(2);
      if (!StringUtils.isEmpty(e)) {
        region = e;
      }
    }
    return new FastTag(language, script, region);
  }

  /**
   * Holds core fields of a language tag for faster manipulation.
   */
  static class FastTag {
    Object language;
    Object script;
    Object region;

    FastTag(LanguageTag tag) {
      this.language = tag.hasLanguage() ? tag.language() : LANGUAGE;
      this.script = tag.hasScript() ? tag.script() : SCRIPT;
      this.region = tag.hasRegion() ? tag.region() : REGION;

      // If an extlang subtag exists, replace the language subtag with the first
      // extlang value.
      List extlangs = tag.extlangs();
      if (!extlangs.isEmpty()) {
        this.language = extlangs.get(0);
      }
    }

    FastTag(String language, String script, String region) {
      this.language = language.equals("0") ? LANGUAGE : language;
      this.script = script.equals("1") ? SCRIPT : script;
      this.region = region.equals("2") ? REGION : region;
    }

    FastTag(FastTag tag) {
      this.language = tag.language;
      this.script = tag.script;
      this.region = tag.region;
    }

    void setFields(FastTag src, int flags) {
      this.language = (flags & F_LANGUAGE) == 0 ? LANGUAGE : src.language;
      this.script = (flags & F_SCRIPT) == 0 ? SCRIPT : src.script;
      this.region = (flags & F_REGION) == 0 ? REGION : src.region;
    }

    @Override
    public boolean equals(Object other) {
      if (other instanceof FastTag) {
        FastTag o = (FastTag)other;
        return this.language.equals(o.language)
            && this.script.equals(o.script)
            && this.region.equals(o.region);
      }
      return false;
    }

    @Override
    public int hashCode() {
      int result = 1;
      result = 31 * result + this.language.hashCode();
      result = 31 * result + this.script.hashCode();
      result = 31 * result + this.region.hashCode();
      return result;
    }

    @Override
    public String toString() {
      return "FastTag(" + this.language + ", " + this.script + ", " + this.region + ")";
    }
  }

  private static Map loadLikelySubtags() {
    Map map = new HashMap<>();

    JsonObject root = JsonParser.parseString(LocaleExternalData.LIKELYSUBTAGS).getAsJsonObject();
    JsonArray scriptnames = root.get("_").getAsJsonArray();
    for (String language : root.keySet()) {
      if (language.equals("_")) {
        continue;
      }
      JsonObject scripts = root.get(language).getAsJsonObject();
      for (String script : scripts.keySet()) {
        JsonObject regions = scripts.get(script).getAsJsonObject();
        for (String region : regions.keySet()) {
          String raw = regions.get(region).getAsString();
          List parts = StringUtils.split(raw, '-');
          String l = parts.get(0);
          String s = parts.get(1);
          String r = parts.get(2);
          if (l.isEmpty()) {
            l = language;
          }
          if (r.isEmpty()) {
            r = region;
          }
          if (!s.isEmpty()) {
            s = scriptnames.get(Integer.parseInt(s, 10)).getAsString();
          }
          FastTag key = new FastTag(language, script, region);
          FastTag val = new FastTag(l, s, r);
          map.put(key, val);
        }
      }
    }
    return map;
  }

  private static Map>> loadLanguageAliases() {
    Map>> map = new HashMap<>();
    for (String row : LocaleExternalData.LANGUAGEALIASRAW.split("\\|")) {
      String[] parts = row.split(":");
      FastTag type = parseFastTag(parts[0]);
      FastTag repl = parseFastTag(parts[1]);
      String language = type.language.toString();
      List> pairs = map.get(language);
      if (pairs == null) {
        pairs = new ArrayList<>();
        map.put(language, pairs);
      }
      pairs.add(Pair.of(type, repl));
    }
    return map;
  }



}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy