All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.ml.odkl.texts.NGramUtils.scala Maven / Gradle / Ivy

package org.apache.spark.ml.odkl.texts

import java.util

import scala.collection.JavaConverters._

/**
  * Created by eugeny.malyutin on 28.07.16.
  */
object NGramUtils {
  def nGram(input: java.util.List[String], lowerNGramBound: Int, upperNGramBound: Int): util.List[String] = {
    nGramFun(input.asScala, lowerNGramBound, upperNGramBound).asJava
  }

  def nGramFun(input: Seq[String], lowerNGramBound: Int, upperNGramBound: Int): Seq[String] = {
    {
      for (i <- Range(lowerNGramBound, upperNGramBound + 1)) yield {
        //if upTo - Range(n,n+1) = List(n) e.g. standard  NGrams
        input.iterator.sliding(i).withPartial(false).map(_.mkString(" ")).toArray
      }
    }.flatMap(f => {
      f
    }).toSeq
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy