All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.halfmatthalfcat.stringmetric.similarity.JaccardMetric.scala Maven / Gradle / Ivy

package com.github.halfmatthalfcat.stringmetric.similarity

import com.github.halfmatthalfcat.stringmetric.{NGramTokenizer, StringMetric}

final case class JaccardMetric(n: Int) extends StringMetric[Double] {
	override def compare(a: Array[Char], b: Array[Char]): Option[Double] =
		if (n <= 0 || a.length < n || b.length < n) None // Because length is less than n, it is not possible to compare.
		else if (a.sameElements(b)) Some(1d)
		else NGramTokenizer(n).tokenize(a).flatMap { ca1bg =>
			NGramTokenizer(n).tokenize(b).map { ca2bg =>
				val i = (ca1bg.map(_.mkString) intersect ca2bg.map(_.mkString)).length

				i.toDouble / (ca1bg.length + ca2bg.length - i)
			}
		}

	override def compare(a: String, b: String): Option[Double] = compare(a.toCharArray, b.toCharArray)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy