net.ricecode.similarity.LevenshteinDistanceStrategy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of string-similarity Show documentation
Show all versions of string-similarity Show documentation
A Java library that implements several algorithms that calculate similarity between strings.
The newest version!
package net.ricecode.similarity;
/**
* A strategy that uses the Levenshtein's Distance to calculate the edit distance of two strings.
* Then it converts this to a "score" to fit the framework.
*
* @see About Levenshtein Distance
*/
public class LevenshteinDistanceStrategy implements SimilarityStrategy {
/**
* Calculates the similarity score of objects, where 0.0 implies absolutely no similarity
* and 1.0 implies absolute similarity.
*
* @param first The first string to compare.
* @param second The second string to compare.
* @return A number between 0.0 and 1.0.
* @throws NullPointerException if one or both of the strings are null
*/
public double score(String first, String second) {
int maxLength = Math.max(first.length(), second.length());
//Can't divide by 0
if (maxLength == 0) return 1.0d;
return ((double) (maxLength - computeEditDistance(first, second))) / (double) maxLength;
}
protected int computeEditDistance(String first, String second) {
first = first.toLowerCase();
second = second.toLowerCase();
int[] costs = new int[second.length() + 1];
for (int i = 0; i <= first.length(); i++) {
int previousValue = i;
for (int j = 0; j <= second.length(); j++) {
if (i == 0) {
costs[j] = j;
}
else if (j > 0) {
int useValue = costs[j - 1];
if (first.charAt(i - 1) != second.charAt(j - 1)) {
useValue = Math.min(Math.min(useValue, previousValue), costs[j]) + 1;
}
costs[j - 1] = previousValue;
previousValue = useValue;
}
}
if (i > 0) {
costs[second.length()] = previousValue;
}
}
return costs[second.length()];
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy