All Downloads are FREE. Search and download functionalities are using the official Maven repository.

translit.Russian.scala Maven / Gradle / Ivy

The newest version!
package translit

import translit.Helpers._

object Russian extends Language {
  val uniGrams = Map(
    'a' -> 'а',
    'b' -> 'б',
    'c' -> 'ц',
    'd' -> 'д',
    'e' -> 'е',
    'f' -> 'ф',
    'g' -> 'г',
    'h' -> 'х',
    'i' -> 'и',
    'j' -> 'й',
    'k' -> 'к',
    'l' -> 'л',
    'm' -> 'м',
    'n' -> 'н',
    'o' -> 'о',
    'p' -> 'п',
    'q' -> 'щ',
    'r' -> 'р',
    's' -> 'с',
    't' -> 'т',
    'u' -> 'у',
    'v' -> 'в',
    'w' -> 'ш',
    'x' -> 'ж',
    'y' -> 'ы',
    'z' -> 'з'
  )

  // Infer case from previous character
  val uniGramsSpecial = Map(
    '`' -> 'ь',
    '~' -> 'ъ'
  )

  val biGrams = Map(
    "ch" -> 'ч',
    "sh" -> 'ш',
    "zh" -> 'ж',
    "ya" -> 'я',
    "ye" -> 'э',
    "yo" -> 'ё',
    "yu" -> 'ю',
    "kh" -> 'х'
  )

  val triGrams = Map[String, Char]()

  val fourGrams = Map(
    "shch" -> 'щ'
  )

  val escape = Map(
    "ya" -> "ыа",
    "ye" -> "ые",
    "yo" -> "ыо",
    "yu" -> "ыу",
    "shch" -> "шч"
  )
  val escapeCharacter = '\\'

  val uniGramsInv = uniGrams.toList.map(_.swap).toMap
  val uniGramsSpecialInv = uniGramsSpecial.toList.map(_.swap).toMap
  val biGramsInv = biGrams.toList.map(_.swap).toMap
  val triGramsInv = triGrams.toList.map(_.swap).toMap
  val fourGramsInv = fourGrams.toList.map(_.swap).toMap

  // y after m/n/r/t/v will be rendered as ы unless it is iotated
  val yLetters = Set("my", "ny", "ry", "ty", "vy")

  // If the y is iotated, render it as я, э, ё or ю
  val iotatedLetters = Set("ya", "ye", "yo", "yu")

  override def latinToCyrillicIncremental(
    latin: String, cyrillic: String, append: Char
  ): (Int, String) = {
    val text = latin + append
    val ofs = text.length
    val result =
      if (ofs >= 5 && text.takeRight(5).toLowerCase == "sh\\ch") {
        val cyrillic = 'ч'
        val result   = if (text(ofs - 1).isUpper) cyrillic.toUpper else cyrillic
        (-2, result.toString)
      } else if (ofs >= 4 &&
          fourGrams.contains(text.substring(ofs - 4, ofs).toLowerCase)) {
        val chars = text.substring(ofs - 4, ofs)
        val cyrillic = fourGrams(chars.toLowerCase)
        (-2, restoreCaseFirst(chars, cyrillic).toString)
      } else if (ofs >= 3 &&
        escape.keySet.contains(
          (text(ofs - 3).toString + text(ofs - 1)).toLowerCase
        ) && text(ofs - 2) == escapeCharacter
      ) {
        val cyrillic = uniGrams.getOrElse(text(ofs - 1).toLower, text(ofs - 1))
        val result = if (text(ofs - 1).isUpper) cyrillic.toUpper else cyrillic
        (-1, result.toString)
      } else if (ofs >= 3
        && yLetters.contains(text.substring(ofs - 3, ofs - 1).toLowerCase)
        && !iotatedLetters.contains(text.substring(ofs - 2, ofs).toLowerCase)
      ) {
        val cyrillic = uniGrams.getOrElse(text(ofs - 1).toLower, text(ofs - 1))
        (0, (if (text(ofs - 1).isUpper) cyrillic.toUpper else cyrillic).toString)
      } else if (ofs >= 3 &&
                 triGrams.contains(text.substring(ofs - 3, ofs).toLowerCase)) {
        val chars = text.substring(ofs - 3, ofs)
        val cyrillic = triGrams(chars.toLowerCase)
        (-2, restoreCaseFirst(chars, cyrillic).toString)
      } else if (ofs >= 2 &&
                 biGrams.contains(text.substring(ofs - 2, ofs).toLowerCase)) {
        val chars = text.substring(ofs - 2, ofs)
        val cyrillic = biGrams(chars.toLowerCase)
        (-1, restoreCaseFirst(chars, cyrillic).toString)
      } else if (uniGrams.contains(text(ofs - 1).toLower)) {
        val cyrillic = uniGrams(text(ofs - 1).toLower)
        (0, (if (text(ofs - 1).isUpper) cyrillic.toUpper else cyrillic).toString)
      } else if (ofs >= 2 && uniGramsSpecial.contains(text(ofs - 1))) {
        val result =
          if (ofs >= 3 && text(ofs - 2).isUpper && text(ofs - 3).isUpper)
            uniGramsSpecial(text(ofs - 1)).toUpper
          else uniGramsSpecial(text(ofs - 1))
        (0, result.toString)
      } else {
        (0, text(ofs - 1).toString)
      }

    if (ofs >= 3 && uniGramsSpecial.contains(text(ofs - 2))) {
      val (l, r) = (text(ofs - 3), text(ofs - 1))

      val letter      = uniGramsSpecial(text(ofs - 2))
      val replace     =
        if (l.isUpper && r.isUpper) letter.toUpper else letter
      val cyrillicOfs = cyrillic.length - 1

      if (replace == cyrillic(cyrillicOfs)) result
      else {
        val updated = replace + cyrillic.substring(
          cyrillicOfs + 1, cyrillic.length + result._1)
        (-updated.length + result._1, updated + result._2)
      }
    } else result
  }

  private def toLatin(letter: Char): String = {
    val isUpper = letter.isUpper
    val letterLc = letter.toLower
    fourGramsInv.get(letterLc).map(applyCase(_, isUpper))
      .orElse(triGramsInv.get(letterLc).map(applyCase(_, isUpper)))
      .orElse(biGramsInv.get(letterLc).map(applyCase(_, isUpper)))
      .orElse(uniGramsInv.get(letterLc).map(x => applyCase(x.toString, isUpper)))
      .orElse(uniGramsSpecialInv.get(letterLc).map(x => applyCase(x.toString, isUpper)))
      .getOrElse(letter.toString)
  }

  override def cyrillicToLatinIncremental(
    cyrillic: String, letter: Char
  ): (Int, String) = {
    val current  = toLatin(letter)
    val toEscape = cyrillic.lastOption
      .map(_.toLower.toString + letter.toLower)
      .exists(escape.values.toList.contains)

    if (toEscape) (0, escapeCharacter + current)
    else {
      val changeCase =
        letter.isUpper &&
        (cyrillic.length == 1 || cyrillic.lastOption.exists(_.isUpper))

      if (!changeCase) (0, current)
      else {
        val mapped = toLatin(cyrillic.last)
        val rest   = mapped.tail
        (-rest.length, rest.toUpperCase + current.toUpperCase)
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy