All Downloads are FREE. Search and download functionalities are using the official Maven repository.

epic.preprocess.package.scala Maven / Gradle / Ivy

There is a newer version: 0.4.4
Show newest version
package epic

import java.io.File
import java.net.URL

/**
 * TODO
 *
 * @author dlwh
 **/
package object preprocess {
  def tokenize(sentence: String): IndexedSeq[String] = TreebankTokenizer(sentence)

  def loadContent(url: URL):String = TextExtractor.extractText(url)
  
  def preprocess(url: URL):IndexedSeq[IndexedSeq[String]] = {
    preprocess(loadContent(url))
  }

  def preprocess(text: String): IndexedSeq[IndexedSeq[String]] = {
    _seg(text).map(tokenize)
  }

  def preprocess(file: File): IndexedSeq[IndexedSeq[String]] = {
    preprocess(file.toURI.toURL)
  }


  private lazy val _seg = MLSentenceSegmenter.bundled().get

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy