All Downloads are FREE. Search and download functionalities are using the official Maven repository.

locales.BCP47.scala Maven / Gradle / Ivy

The newest version!
package locales

object BCP47 {
  // The regular experssions are carefully curated to work both in
  // JVM and JS(rhino/node), be careful when editing them
  lazy val extlang = "(?:-[A-Za-z]{3}){0,3}"
  lazy val language = s"([A-Za-z]{2,3})($extlang)?|[A-Za-z]{4}|[A-Za-z]{5,8}"
  lazy val script = "[A-Za-z]{4}"
  lazy val region = "([A-Za-z]{2}|[0-9]{3})"
  lazy val variant = "([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
  lazy val variantR = s"($variant)".r
  lazy val singleton = "[0-9A-WY-Za-wy-z]"
  lazy val extension = s"$singleton(?:-[A-Za-z0-9]{2,8})+"
  lazy val privateUse = "x(?:-[A-Za-z0-9]{1,8})+"
  lazy val langtag =
    s"^(?:$language)(-$script)?(?:-$region)?((?:-$variant)*)((?:-$extension)*)(-$privateUse)?$$"
  lazy val regular = List("art-lojban", "cel-gaulish", "no-bok", "no-nyn",
      "zh-guoyu", "zh-hakka", "zh-min-nan", "zh-min", "zh-xiang").mkString("|")
  lazy val irregular =
    List("en-GB-oed", "i-ami", "i-bnn", "i-default", "i-enochian", "i-hak",
        "i-klingon", "i-lux", "i-mingo", "i-navajo", "i-pwn", "i-tao", "i-tay",
        "i-tsu", "sgn-BE-FR", "sgn-BE-NL", "sgn-CH-DE").mkString("|")
  lazy val grandfathered = s"($irregular|$regular)"
  lazy val langtagRegex = s"$grandfathered|$langtag|($privateUse)".r

  // ADT for BCP47 results
  sealed trait BCP47Tag
  case class LanguageTag(language: String, extendedLanguag: Option[String],
      script: Option[String], region: Option[String], variant: List[String],
      extension: List[String], privateUse: Option[String])
      extends BCP47Tag
  case class GrandfatheredTag(language: String) extends BCP47Tag
  case class PrivateUseTag(privateUse: String) extends BCP47Tag

  // Remove the initial dash
  @inline private def rd(l: String): Option[String] =
    Option(l).filter(_.nonEmpty).map(_.substring(1))

  // Convert to list removing dashes
  @inline private def rdl(l: String): List[String] =
    Option(l)
      .filter(_.nonEmpty)
      .map(_.substring(1).split("-").toList)
      .getOrElse(Nil)

  // Convert to list of extensions
  @inline private def rde(l: String): List[String] = {
    Option(l)
      .filter(_.nonEmpty)
      .map(_.split(s"-").toList.filter(_.nonEmpty).sliding(2, 2).collect { case List(a, b) => s"$a-$b" }.toList)
      .getOrElse(Nil)
  }

  // Cleans up the private use tag
  @inline private def puc(l: String): Option[String] =
    Option(l).filter(_.nonEmpty).map(_.replaceFirst("-x-", ""))

  def parseTag(tag: String): Option[BCP47Tag] = {
    // https://tools.ietf.org/html/bcp47#section-2.1
    tag.trim match {
      // Groups:
      // g: grandfathered
      // l: language
      // el: extended language (including language)
      // s: script
      // r: region
      // v: variants (separated by dash)
      // x: Extensions
      // p: private use subtag
      case langtagRegex(_, l, el, s, r, v, _, x, p, _) if l != null =>
        // a bug on js regex doesn't extract the lang directly, but we can
        // extract it from the ext-lang value
        //val lang = Option(el).map(l.replace(rdl, "")).getOrElse(l)
        Some(LanguageTag(l, rd(el), rd(s), Option(r), rdl(v), rde(x), puc(p)))

      case langtagRegex(g, _, _, _, _, _, _, _, _, _) if g != null =>
        Some(GrandfatheredTag(g))

      case langtagRegex(_, _, _, _, _, _, _, _, _, p) if p != null =>
        Some(PrivateUseTag(p.replaceFirst("x-", "")))

      case _ =>
        None
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy