All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.knowitall.tool.tokenize.ClearTokenizer.scala Maven / Gradle / Ivy

package edu.knowitall
package tool
package tokenize

import scala.collection.JavaConversions._
import edu.knowitall.common.Resource.using
import com.clearnlp.tokenization.AbstractTokenizer
import com.clearnlp.nlp.NLPGetter
import java.util.zip.ZipInputStream
import java.net.URL

class ClearTokenizer
extends Tokenizer {
    val tokenizer = NLPGetter.getTokenizer("en")

  def tokenize(sentence: String): Seq[Token] = {
    val strings = tokenizer.getTokens(sentence)
    Tokenizer.computeOffsets(strings, sentence)
  }
}

object ClearTokenizerMain extends TokenizerMain {
  val tokenizer = new ClearTokenizer()
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy