
com.rockymadden.stringmetric.tokenize.NGramTokenizer.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stringmetric-core Show documentation
Show all versions of stringmetric-core Show documentation
String metrics and phonetic algorithms for Scala.
The newest version!
package com.rockymadden.stringmetric.tokenize
import com.rockymadden.stringmetric.Tokenizer.StringTokenizer
final case class NGramTokenizer(n: Int) extends StringTokenizer {
override def tokenize(a: Array[Char]): Option[Array[Array[Char]]] = {
if (n <= 0) return None
if (a.length < n) None
else Some(sequence(a, Array.empty[Array[Char]], n))
}
override def tokenize(a: String): Option[Array[String]] = tokenize(a.toCharArray).map(_.map(_.mkString))
@annotation.tailrec
private val sequence: ((Array[Char], Array[Array[Char]], Int) => Array[Array[Char]]) = (i, o, n) =>
if (i.length <= n) o :+ i
else sequence(i.tail, o :+ i.take(n), n)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy