All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bitbucket.eunjeon.seunjeon.LNode.scala Maven / Gradle / Ivy

The newest version!
package org.bitbucket.eunjeon.seunjeon

import scala.collection.JavaConverters._


object LNode {
  def dePreAnalysis(node: LNode): Seq[LNode] =
    if (node.morpheme.getMType == MorphemeType.PREANALYSIS) {
      deComposite(node)
    } else {
      Seq(node)
    }

  def deCompound(node: LNode): Seq[LNode] =
    if (node.morpheme.getMType == MorphemeType.COMPOUND) {
      deComposite(node)
    } else {
      Seq(node)
    }

  def deInflect(node: LNode): Seq[LNode] =
    if (node.morpheme.getMType == MorphemeType.INFLECT) {
      deComposite(node)
    } else {
      Seq(node)
    }

  def deComposite(node: LNode): Seq[LNode] = {
    var nextPos = node.beginOffset
    try {
      val result = node.morpheme.deComposite().
        filterNot(m => isHideMorpheme(m)).
        map { morpheme =>
          val morphemeStartPos = if (isJamo(morpheme.getSurface.head)) nextPos - 1 else nextPos
          val morphemeEndPos = morphemeStartPos + morpheme.getSurface.length
          nextPos = morphemeEndPos
          LNode(morpheme, morphemeStartPos, morphemeEndPos, node.accumulatedCost)
        }
      // 방어코드
      if ((nextPos - node.beginOffset) > node.morpheme.getSurface.length) {
        result.dropRight(1)
      } else result
    } catch {
      // TODO: warning 출력해줄까?
      case _:Throwable => Seq(node)
    }
  }

  // TODO: remove it from dict. and no see feature(0) in run-time.
  //  its cause of resource(processing, usage memory)
  private def isHideMorpheme(morpheme: Morpheme): Boolean = {
    morpheme.getSurface == "아" && morpheme.getFeatureHead == "EC"
  }

  private def isJamo(char:Char): Boolean = {
    ('\u1100' <= char && char <= '\u11FF' /* Hangul Jamo */) ||
      ('\u3130' <= char && char <= '\u318F' /* Hangul Compatibility Jamo */)
  }
}

/**
  * Lattice 노드
  * @param morpheme   Morpheme
  * @param beginOffset  시작 offset
  * @param endOffset   끝 offset
  * @param accumulatedCost  누적비용
  */
case class LNode(morpheme:Morpheme,
                 beginOffset: Int, // TODO: startOffset 으로 바꾸자.
                 endOffset: Int,
                 var accumulatedCost:Int = Int.MaxValue) extends OffsetNode {
  var leftNode:LNode = null

  def deCompound(): Seq[LNode] = {
    LNode.deCompound(this)
  }

  def deCompoundJava(): java.util.List[LNode] = {
    deCompound().asJava
  }

  def deInflect(): Seq[LNode] = {
    LNode.deInflect(this)
  }

  def deInflectJava(): java.util.List[LNode] = {
    deInflect().asJava
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy