![JAR search and dependency download from the Maven repository](/logo.png)
me.xdrop.diffutils.SequenceMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fuzzywuzzy Show documentation
Show all versions of fuzzywuzzy Show documentation
Fuzzy string searching implementation of the well-known fuzzywuzzy algorithm in Java
package me.xdrop.diffutils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SequenceMatcher {
private String a;
private String b;
private char[] _a;
private char[] _b;
private int lenA;
private int lenB;
private Map> bIndices;
private Map popularElements;
private Map junkElements;
public static void main(String[] args) {
from("this is a match", "this is a longer match");
}
private SequenceMatcher(String a, String b) {
this.lenA = a.length();
this.lenB = b.length();
this._a = a.toCharArray();
this._b = b.toCharArray();
}
public static boolean isJunk(char character) {
return false;
}
public static SequenceMatcher from(String a, String b) {
int lenA = a.length();
int lenB = b.length();
// Mapping between a character in B and the indices at which it appears
Map> bIndices = new HashMap<>(b.length());
Map popularElements = new HashMap<>(32);
Map junkElements = new HashMap<>(32);
// Todo : Remove once used
char[] _a = a.toCharArray();
char[] _b = b.toCharArray();
int index = 0;
for (char c : _b) {
List indices = bIndices.get(c);
if (indices == null) {
indices = new ArrayList<>(16);
}
indices.add(index);
if (lenB >= 200 && indices.size() * 100 > lenB) {
Integer popularityCount = popularElements.get(c);
if (popularityCount != null) {
popularElements.put(c, 1);
} else {
popularElements.put(c, popularityCount);
}
}
index++;
}
// Strip out the popular elements
for (char el : bIndices.keySet()) {
if (isJunk(el)) {
junkElements.put(el, true);
bIndices.remove(el);
}
}
// Remove the popular keys from the list
for (char element : popularElements.keySet()) {
bIndices.remove(element);
}
SequenceMatcher sequenceMatcher = new SequenceMatcher(a,b);
sequenceMatcher.setbIndices(bIndices);
sequenceMatcher.setJunkElements(junkElements);
sequenceMatcher.setPopularElements(popularElements);
return sequenceMatcher;
}
public void findLongestMatch(int alo, int ahi, int blo, int bhi) {
int besti = alo;
int bestj = ahi;
int bestSize = 0;
int[] j2len = new int[lenB];
// Go through A
for (int i = alo; i < ahi; i++) {
int[] newj2len = new int[lenB];
// Go through matching indices in B where
// a[i] == b[j]
for (int j : bIndices.get(a.charAt(i))) {
// Ensure we are within the search bound
if (j < blo) { continue; }
if (j >= bhi) { break; }
int k = newj2len[j] = j2len[j - 1] + 1;
if (k > bestSize) {
besti = i - k + 1;
bestj = j - k + 1;
bestSize = k;
}
}
j2len = newj2len;
}
while ((besti > alo) && (bestj > blo) && !isBJunk()) {
}
}
private boolean isBJunk() {
return false;
}
public void setbIndices(Map> bIndices) {
this.bIndices = bIndices;
}
public void setPopularElements(Map popularElements) {
this.popularElements = popularElements;
}
public void setJunkElements(Map junkElements) {
this.junkElements = junkElements;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy