
kr.bydelta.koala.util.package.scala Maven / Gradle / Ivy
package kr.bydelta.koala
/**
* Created by bydelta on 17. 4. 8.
*/
package object util {
private final val SecondPos = Seq('ㅏ', 'ㅑ')
private final val SecondNeg = Seq('ㅓ', 'ㅕ')
private lazy val endsWithL = charEndsWith('ㄹ')(_)
private lazy val endsWithEu = charEndsWithMo('ㅡ')(_)
private lazy val startsWithN = charStartsWith('ㄴ')(_)
private lazy val startsWithB = charStartsWith('ㅂ')(_)
private lazy val startsWithS = charStartsWith('ㅅ')(_)
private lazy val startsWithOh = charStartsWithMo('ㅗ')(_)
private lazy val startsWithAh = charStartsWithMo('ㅏ')(_)
private lazy val startsWithUh = charStartsWithMo('ㅓ')(_)
val HanFirstList = Array('ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ')
val HanSecondList = Array('ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ')
val HanLastList = Array('\u0000', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ')
def reunionKorean(seq: Seq[Char], acc: Seq[Char] = Seq.empty): String =
if (seq.isEmpty) new String(acc.reverse.toArray)
else {
val char = seq.head
val newAcc =
if (!char.isHangul || acc.isEmpty || !acc.head.isHangul) {
// (앞 문자든, 현재 문자든) 한글이 아닐 때.
char +: acc
} else if (HanSecondList.contains(char)) {
// 지금 문자가 중성일때. 앞문자는 완성문자이거나, 초성이거나, 불완전한 글자임.
if (!HanFirstList.contains(acc.head)) {
// 앞에 초성만 있지 않을때.
if (!acc.head.isCompleteHangul || !acc.head.endsWithJongsung) {
// 앞에 완성문자가 아니거나(초성문자 제외), 종성으로 끝나지 않을 때.
char +: acc
} else {
// 종성으로 끝나는 완전한 문자.
val chosung = HanFirstList.indexOf(HanLastList(acc.head.getJongsungCode))
val newFrontChar = (acc.head - acc.head.getJongsungCode).toChar
val newChar: Char = reconstructKorean(chosung, HanSecondList.indexOf(char), 0)
newChar +: newFrontChar +: acc.tail
}
} else {
// 앞에 초성만 있었을때.
val chosung = HanFirstList.indexOf(acc.head)
val newChar: Char = reconstructKorean(chosung, HanSecondList.indexOf(char), 0)
newChar +: acc.tail
}
} else {
if (!acc.head.isCompleteHangul || acc.head.endsWithJongsung) {
// 앞에 아무것도 없거나, 불완전하거나, 종성으로 이미 끝났을때.
char +: acc
} else if (HanFirstList.contains(char)) {
// 종성으로 끝나지않은, 완전한 문자이고, 따라오는 문자가 초성일때.
val newChar: Char = (acc.head + HanLastList.indexOf(char)).toChar
newChar +: acc.tail
} else {
// 따라오는 문자가 초성이 아닐때.
char +: acc
}
}
reunionKorean(seq.tail, newAcc)
}
// 규칙적탈락: 어간 'ㄹ'탈락. 'ㄹ'이 'ㄴㅂㅅ오'앞에서 탈락.
// 규칙적첨가: ('ㄹ'이외의 종료 어간) + '-ㄴ,-ㄹ,-오,-시,-며.'
// 규칙적탈락: 어간 'ㅡ'탈락. 'ㅡ'가 'ㅏ/ㅓ'앞에서 탈락.
def reduceVerbApply(verb: Seq[Char], isVerb: Boolean, rest: Seq[Char]): Seq[Char] = {
val verbStr = verb.mkString
val restStr = rest.mkString
val verbRev = verb.reverse
val char = verbRev.head
val next = rest.head
if (!next.isHangul) {
verb ++ rest
} else if ((verbStr.matches("^벗|솟|씻|뺏$") && isVerb) ||
(char != '낫' && char.getJongsungCode == 19 && !isVerb))
verb ++ harmony(verbRev, rest)
else if (char.getJongsungCode == 19) // 종성: ㅅ
(char - char.getJongsungCode).toChar +: harmony(verbRev, rest)
else if (verbStr.matches("^듣|깨닫|붇|묻|눋$"))
verbRev.tail.reverse ++: (char + 1).toChar +: harmony(verbRev, rest)
else if (verbStr.matches("^돕|겁|곱$"))
(char - char.getJongsungCode).toChar +: (addOh(rest.head) +: rest.tail)
else if (verbStr.matches("^굽|뽑|씹|업|입|잡|접|좁|집$"))
verb ++ harmony(verbRev, rest)
else if (char.getJongsungCode == 17) // 종성: ㅂ
verbRev.tail.reverse ++: ((char - 17).toChar +: (addWoo(rest.head) +: rest.tail))
else if (verbStr.matches("^치르|따르|다다르|우러르|들르$") &&
(startsWithAh(next) || startsWithUh(next)))
verbRev.tail.reverse ++:
harmony(verbRev.tail, reconstructKorean(char.getChosungCode, next.getJungsungCode, next.getJongsungCode) +: rest.tail)
else if (verbStr == "푸르" && startsWithUh(next))
verb ++:
harmony(verbRev, (next - 6 * JUNGSUNG_RANGE).toChar +: rest.tail)
else if (verbStr == "푸" && startsWithUh(next))
(next + 6 * JUNGSUNG_RANGE).toChar +: rest.tail
else if (char == '르' && (startsWithAh(next) || startsWithUh(next)))
verbRev.tail.tail.reverse ++: ((verbRev.tail.head + 8).toChar +:
harmony(verbRev.tail, (next - 6 * JUNGSUNG_RANGE).toChar +: rest.tail))
else if (char == '하' && (startsWithAh(next) || startsWithUh(next)))
verb ++: harmony(Seq('어'), (next + 2 * JONGSUNG_RANGE).toChar +: rest.tail) //force "ㅕ"
else if (isVerb && char == '가' && next == '아' && (rest.tail.head - rest.tail.head.getJongsungCode) == '라')
verb ++: ('거' +: rest.tail)
else if (isVerb && char == '오' && next == '아' && (rest.tail.head - rest.tail.head.getJongsungCode) == '라')
verb ++: ('너' +: rest.tail)
else if (verbStr == "다" && restStr == "아")
"다오".toSeq
else if (!isVerb && char.getJongsungCode == 27 && char != '좋') {
// 종성: ㅎ
if (next.isIncompleteHangul) {
verbRev.tail.reverse ++ ((char - 27 + HanLastList.indexOf(next)).toChar +: rest.tail)
} else if (next.getJungsungCode == 18) {
// "ㅡ"
verbRev.tail.reverse ++ ((char - 27 + next.getJongsungCode).toChar +: rest.tail)
} else if (startsWithAh(next) || startsWithUh(next)) {
verbRev.tail.reverse ++
(reconstructKorean(char.getChosungCode, next.getJungsungCode + 1, next.getJongsungCode) +: rest.tail)
} else
verb ++ harmony(verbRev, rest)
} else if (endsWithEu(char) &&
(startsWithAh(next) || startsWithUh(next))) {
verbRev.tail.reverse ++:
harmony(verbRev.tail, reconstructKorean(char.getChosungCode, next.getJungsungCode, next.getJongsungCode) +: rest.tail)
} else if (endsWithL(char) &&
(startsWithB(next) || startsWithN(next) || startsWithS(next) || startsWithOh(next))) {
verbRev.tail.reverse ++: harmony(verbRev.tail, (char - char.getJongsungCode).toChar +: rest)
} else if (char.endsWithJongsung && !endsWithL(char)) {
if (next == 'ㄴ' || next == 'ㄹ') {
verb ++ harmony(verbRev, reconstructKorean(jung = 18, jong = HanLastList.indexOf(next)) +: rest)
} else if (next == '오' || next == '시' || next == '며') {
verb ++ harmony(verbRev, '으' +: rest)
} else
verb ++ harmony(verbRev, rest)
} else {
verb ++ harmony(verbRev, rest)
}
}
private def addOh(ch: Char): Char = {
val jcode = ch.getJungsungCode
if (ch.isIncompleteHangul)
reconstructKorean(cho = HanFirstList.indexOf(ch), jung = 13)
else if (jcode == 18) // ㅡ->ㅜ
(ch - JONGSUNG_RANGE * 5).toChar
else if (jcode == 0) //ㅏ->ㅘ
(ch + JONGSUNG_RANGE * 9).toChar
else if (jcode == 4) //ㅓ->ㅘ
(ch + JONGSUNG_RANGE * 5).toChar
else
ch
}
private def addWoo(ch: Char): Char = {
val jcode = ch.getJungsungCode
if (ch.isIncompleteHangul)
reconstructKorean(cho = HanFirstList.indexOf(ch), jung = 13)
else if (jcode == 18) // ㅡ->ㅜ
(ch - JONGSUNG_RANGE * 5).toChar
else if (jcode == 0) //ㅏ->ㅝ
(ch + JONGSUNG_RANGE * 14).toChar
else if (jcode == 4) //ㅓ->ㅝ
(ch + JONGSUNG_RANGE * 10).toChar
else
ch
}
private def harmony(front: Seq[Char], rest: Seq[Char]) =
if (!rest.head.isCompleteHangul)
rest
else if (front.isEmpty) {
val restJung = HanSecondList(rest.head.getJungsungCode)
val ch = rest.head
if (SecondPos.contains(restJung))
reconstructKorean(ch.getChosungCode,
HanSecondList.indexOf(SecondNeg(SecondPos.indexOf(restJung))), ch.getJongsungCode) +: rest.tail
else
rest
} else {
val frontJung = HanSecondList(front.head.getJungsungCode)
val isTheCase = frontJung == 'ㅏ' || frontJung == 'ㅗ'
val ch = rest.head
val restJung = HanSecondList(ch.getJungsungCode)
if (isTheCase && SecondNeg.contains(restJung)) {
reconstructKorean(ch.getChosungCode,
HanSecondList.indexOf(SecondPos(SecondNeg.indexOf(restJung))), ch.getJongsungCode) +: rest.tail
} else if (!isTheCase && SecondPos.contains(restJung)) {
reconstructKorean(ch.getChosungCode,
HanSecondList.indexOf(SecondNeg(SecondPos.indexOf(restJung))), ch.getJongsungCode) +: rest.tail
} else
rest
}
private def charStartsWith(jamo: Char)(ch: Char) =
ch.getChosungCode == HanFirstList.indexOf(jamo)
private def charEndsWith(jamo: Char)(ch: Char) =
ch.getJongsungCode == HanLastList.indexOf(jamo)
private def charStartsWithMo(mo: Char)(ch: Char) =
(ch.getChosungCode == HanFirstList.indexOf('ㅇ')) &&
(ch.getJungsungCode == HanSecondList.indexOf(mo))
private def charEndsWithMo(mo: Char)(ch: Char) =
!ch.endsWithJongsung &&
(ch.getJungsungCode == HanSecondList.indexOf(mo))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy