All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kr.bydelta.koala.twt.package.scala Maven / Gradle / Ivy

The newest version!
package kr.bydelta.koala

/**
  * Package for OpenKoreanText tagger
  *
  * @see [[kr.bydelta.koala.twt.SentenceSplitter]] for segmenting sentences
  * @see [[kr.bydelta.koala.twt.Tagger]] for POS tagging
  * @see [[kr.bydelta.koala.twt.Dictionary]] for using user-defined dictionary.
  * @note Dependencies: OpenKoreanText v2.1.2 (For Scala 2.12+) or TwitterKoreanProcessor v4.4.4 (For Scala 2.11)
  */
package object twt {
  /**
    * Convert tag: from Sejong (Standard) POS tag, to OpenKoreanText's POS tag
    *
    * @see [[https://docs.google.com/spreadsheets/d/1OGM4JDdLk6URuegFKXg1huuKWynhg_EQnZYgTmG4h0s Conversion Table (Korean)]]
    * @param tag The POS Tag value to be converted (POSTag type)
    * @return The name of POS tag which is used in OpenKoreanText (String)
    */
  def fromSejongPOS(tag: POS.Value): String = {
    tag match {
      case POS.NNG | POS.NNB |
           POS.NNM | POS.NP => "Noun"
      case POS.NNP => "ProperNoun"
      case POS.NR | POS.SN => "Number"
      case POS.VV | POS.VX |
           POS.VCP | POS.VCN => "Verb"
      case POS.VA => "Adjective"
      case POS.MM =>
        // Before Scala 2.11, OpenKoreanText(TwitterKorean) used different name for determiner.
        if (scala.util.Properties.versionNumberString.startsWith("2.11"))
          "Determiner"
        else
          "Modifier"
      case POS.MAG | POS.MAJ => "Adverb"
      case POS.IC => "Exclamation"
      case POS.JKB | POS.JKC |
           POS.JKG | POS.JKO |
           POS.JKQ | POS.JKS |
           POS.JKV | POS.JX => "Josa"
      case POS.JC => "Conjunction"
      case POS.EP => "PreEomi"
      case POS.EF | POS.EC |
           POS.ETM | POS.ETN => "Eomi"
      case POS.XPN => "Unknown"
      case POS.XPV => "VerbPrefix"
      case POS.XSA | POS.XSM |
           POS.XSN | POS.XSO | POS.XSV => "Suffix"
      case POS.SF => "Punctuation"
      case POS.SS | POS.SP |
           POS.SE | POS.SO | POS.SW | POS.XR => "Others"
      case POS.NF | POS.NV | POS.NA => "Unknown"
      case POS.SL | POS.SH => "Foreign"
    }
  }

  /**
    * Convert tag: from OpenKoreanText's POS tag, to Sejong (Standard) POS tag
    *
    * @see [[https://docs.google.com/spreadsheets/d/1OGM4JDdLk6URuegFKXg1huuKWynhg_EQnZYgTmG4h0s Conversion Table (Korean)]]
    * @param tag The name of POS Tag to be converted (String)
    * @return The POS Tag value which is the most suitable (POSTag type)
    */
  def toSejongPOS(tag: String): POS.Value = {
    tag match {
      case "Noun" => POS.NNG
      case "ProperNoun" => POS.NNP
      case "Number" => POS.NR
      case "Verb" => POS.VV
      case "Adjective" => POS.VA
      case "Determiner" | "Modifier" => POS.MM
      case "Adverb" => POS.MAG
      case "Exclamation" => POS.IC
      case "Josa" => POS.JX
      case "Conjunction" => POS.JC
      case "PreEomi" => POS.EP
      case "Eomi" => POS.EF
      case "VerbPrefix" => POS.XPV
      case "Suffix" => POS.XSO
      case "Punctuation" => POS.SF
      case "Unknown" => POS.NA
      case "Foreign" | "Alpha" => POS.SL
      case _ => POS.SW
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy