All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.xdrop.diffutils.SequenceMatcher Maven / Gradle / Ivy

Go to download

Fuzzy string searching implementation of the well-known fuzzywuzzy algorithm in Java

There is a newer version: 1.4.0
Show newest version
package me.xdrop.diffutils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class SequenceMatcher {
    private String a;
    private String b;
    private char[] _a;
    private char[] _b;
    private int lenA;
    private int lenB;
    private Map> bIndices;
    private Map popularElements;
    private Map junkElements;

    public static void main(String[] args) {
        from("this is a match", "this is a longer match");
    }

    private SequenceMatcher(String a, String b) {
        this.lenA = a.length();
        this.lenB = b.length();
        this._a = a.toCharArray();
        this._b = b.toCharArray();
    }

    public static boolean isJunk(char character) {
        return false;
    }

    public static SequenceMatcher from(String a, String b) {
        int lenA = a.length();
        int lenB = b.length();

        // Mapping between a character in B and the indices at which it appears
        Map> bIndices = new HashMap<>(b.length());
        Map popularElements = new HashMap<>(32);
        Map junkElements = new HashMap<>(32);

        // Todo : Remove once used
        char[] _a = a.toCharArray();
        char[] _b = b.toCharArray();

        int index = 0;
        for (char c : _b) {
            List indices = bIndices.get(c);

            if (indices == null) {
                indices = new ArrayList<>(16);
            }

            indices.add(index);

            if (lenB >= 200 && indices.size() * 100 > lenB) {
                Integer popularityCount = popularElements.get(c);
                if (popularityCount != null) {
                    popularElements.put(c, 1);
                } else {
                    popularElements.put(c, popularityCount);
                }
            }

            index++;
        }

        // Strip out the popular elements
        for (char el : bIndices.keySet()) {
            if (isJunk(el)) {
                junkElements.put(el, true);
                bIndices.remove(el);
            }
        }

        // Remove the popular keys from the list
        for (char element : popularElements.keySet()) {
            bIndices.remove(element);
        }


        SequenceMatcher sequenceMatcher = new SequenceMatcher(a,b);
        sequenceMatcher.setbIndices(bIndices);
        sequenceMatcher.setJunkElements(junkElements);
        sequenceMatcher.setPopularElements(popularElements);
        return sequenceMatcher;
    }

    public void findLongestMatch(int alo, int ahi, int blo, int bhi) {
        int besti = alo;
        int bestj = ahi;
        int bestSize = 0;
        int[] j2len = new int[lenB];

        // Go through A
        for (int i = alo; i < ahi; i++) {
            int[] newj2len = new int[lenB];
            // Go through matching indices in B where
            // a[i] == b[j]
            for (int j : bIndices.get(a.charAt(i))) {
                // Ensure we are within the search bound
                if (j < blo) { continue; }
                if (j >= bhi) { break; }
                int k = newj2len[j] = j2len[j - 1] + 1;
                if (k > bestSize) {
                    besti = i - k + 1;
                    bestj = j - k + 1;
                    bestSize = k;
                }
            }
            j2len = newj2len;
        }

        while ((besti > alo) && (bestj > blo) && !isBJunk()) {

        }

    }

    private boolean isBJunk() {
        return false;
    }

    public void setbIndices(Map> bIndices) {
        this.bIndices = bIndices;
    }

    public void setPopularElements(Map popularElements) {
        this.popularElements = popularElements;
    }

    public void setJunkElements(Map junkElements) {
        this.junkElements = junkElements;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy