All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kr.bydelta.koala.kkma.Parser.scala Maven / Gradle / Ivy

The newest version!
package kr.bydelta.koala.kkma

import kr.bydelta.koala.data.{Sentence => KSent}
import kr.bydelta.koala.traits.CanDepParse
import org.snu.ids.kkma.ma.{Eojeol, MCandidate, MExpression, Sentence}
import org.snu.ids.kkma.sp.{Parser => KParser}

import scala.collection.JavaConverters._

/**
  * 꼬꼬마 의존구문분석기.
  */
class Parser extends CanDepParse {
  /** 의존관계분석기 **/
  lazy val parser = new KParser()
  /** 형태소분석기 **/
  lazy val tagger = new Tagger

  override def parse(sentence: String): Seq[KSent] =
    tagger.tagParagraphOriginal(sentence).map {
      rawSentence =>
        val tagged = tagger.convertSentence(rawSentence)
        parseRaw(rawSentence, tagged)
    }

  override def parse(sentence: KSent): KSent = {
    parseRaw(deParse(sentence), sentence)
  }

  /**
    * 꼬꼬마 형태소분석 결과를 토대로 의존구문분석 진행
    *
    * @param rawSentence 분석할 문장 (꼬꼬마 문장)
    * @param tagged      분석 결과를 입력할 문장 (통합 문장)
    * @return 분석된 결과 (통합 문장).
    */
  private def parseRaw(rawSentence: Sentence, tagged: KSent): KSent = {
    rawSentence.asScala.zipWithIndex.foreach {
      case (e, idx) => e.getFirstMorp.setIndex(idx)
    }
    val morphIdxs = rawSentence.asScala.indices
    val parse = parser.parse(rawSentence)
    val edgeList = parse.getEdgeList
    val nodeList = parse.getNodeList

    edgeList.asScala.foreach {
      e =>
        val from: Int =
          nodeList.get(e.getFromId).getEojeol match {
            case null => -1
            case x => morphIdxs.indexOf(x.getStartIndex)
          }
        val to: Int =
          nodeList.get(e.getToId).getEojeol match {
            case null => -1
            case x => morphIdxs.indexOf(x.getStartIndex)
          }

        val thisWord = to
        val headWord = tagged.applyOrElse(from, (_: Int) => tagged.root)
        val rawTag = e.getRelation
        val tag = KKMAdepTag(rawTag)
        headWord.addDependant(thisWord, tag, rawTag)
    }
    tagged
  }

  /**
    * 통합 분석결과를 꼬꼬마 분석결과로 복원.
    *
    * @param result 복원할 통합 분석결과.
    * @return 복원된 문장.
    */
  private[koala] def deParse(result: KSent): Sentence = {
    val sent = new Sentence
    result.foreach {
      word =>
        val mexp = new MExpression(word.surface,
          new MCandidate(word.head.surface, fromSejongPOS(word.head.tag)))
        word.tail.foreach {
          morph =>
            mexp.add(new MCandidate(morph.surface, fromSejongPOS(morph.tag)))
        }
        sent.add(new Eojeol(mexp))
    }
    sent
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy