All Downloads are FREE. Search and download functionalities are using the official Maven repository.

skinny.nlp.SkinnyJapaneseAnalyzerFactory.scala Maven / Gradle / Ivy

There is a newer version: 2.5.2
Show newest version
package skinny.nlp

import java.io.{ InputStreamReader, ByteArrayInputStream }
import org.apache.lucene.analysis.ja.{ JapaneseTokenizer, JapaneseAnalyzer }
import org.apache.lucene.analysis.ja.dict.UserDictionary
import org.apache.lucene.analysis.util.CharArraySet
import scala.collection.JavaConverters._
import skinny.util.LoanPattern._

object SkinnyJapaneseAnalyzerFactory {

  def ensureKuromojiExistence: Unit = {
    if (kuromojiNotFound) {
      throw new IllegalStateException(
        """Kuromoji Analyzer Required
          |
          |-------------------------------------------
          |
          | ***** Kuromoji Analyzer Required *****
          |
          | Japanese converters (katakana, hiragana, romaji) requires Kuromoji analyzer from Apache Lucene. http://lucene.apache.org/
          |
          | Add the following dependency to your build.sbt.
          |
          |   libraryDependencies += "org.apache.lucene" % "lucene-analyzers-kuromoji" % ""
          |
          |-------------------------------------------
          |""".stripMargin
      )
    }
  }

  lazy val kuromojiNotFound: Boolean = {
    try {
      Class.forName("org.apache.lucene.analysis.ja.JapaneseAnalyzer")
      false
    } catch { case scala.util.control.NonFatal(_) => true }
  }

  def create(dictionaryText: String): SkinnyJapaneseAnalyzer = {
    val dictionary: UserDictionary = using(new ByteArrayInputStream(dictionaryText.getBytes)) { stream =>
      using(new InputStreamReader(stream)) { reader =>
        UserDictionary.open(reader)
      }
    }
    create(dictionary)
  }

  def create(userDictionary: UserDictionary = null): SkinnyJapaneseAnalyzer = {
    ensureKuromojiExistence
    new KuromojiJapaneseAnalyzer(new JapaneseAnalyzer(
      userDictionary,
      JapaneseTokenizer.Mode.NORMAL,
      new CharArraySet(0, true),
      Set.empty[String].asJava
    ))
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy