All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pingbu.search.FuzzyIndex Maven / Gradle / Ivy

The newest version!
package pingbu.search;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import pingbu.pinyin.Pinyin;

/**
 * 模糊搜索索引,该索引以文本匹配度作为匹配依据
 *
 * @author pingbu
 */
public class FuzzyIndex implements SearchIndex {

    private static String normalize(final char a, final char b) {
        return Pinyin.normailizeChar(a) + "-" + Pinyin.normailizeChar(b);
    }

    private static class MatchedItem {
        public final int id;
        final char c1, c2;

        MatchedItem(final int id, final char c1, final char c2) {
            this.id = id;
            this.c1 = c1;
            this.c2 = c2;
        }
    }

    private static class SearchingIndex {
        public final int pos;
        public final List index;

        SearchingIndex(final int pos, final List index) {
            this.pos = pos;
            this.index = index;
        }
    }

    private final Map> mCharIndex = new HashMap<>();
    private final Map> mIndex = new HashMap<>();

    @Override
    public void addItem(final int id, final String value) {
        for (int i = 0; i < value.length(); ++i) {
            final char c = value.charAt(i);
            final short nc = Pinyin.normailizeChar(c);
            List index = mCharIndex.get(nc);
            if (index == null) {
                index = new ArrayList<>();
                mCharIndex.put(nc, index);
            }
            index.add(new MatchedItem(id, c, '\0'));
        }
        for (int i = 1; i < value.length(); ++i) {
            final String word = normalize(value.charAt(i - 1), value.charAt(i));
            List index = mIndex.get(word);
            if (index == null) {
                index = new ArrayList<>();
                mIndex.put(word, index);
            }
            index.add(new MatchedItem(id, value.charAt(i - 1), value.charAt(i)));
        }
    }

    @Override
    public Iterator iterate(final String value) {
        return new Iterator(value);
    }

    private final class Iterator implements SearchIndex.Iterator {
        private final String mText;
        private final List mWordIndexes = new ArrayList<>();
        private final int[] mWordIndexPos;

        public Iterator(final String text) {
            mText = text;
            for (int i = 0; i < text.length(); ++i) {
                final char c = text.charAt(i);
                final short nc = Pinyin.normailizeChar(c);
                final SearchingIndex index = new SearchingIndex(i, mCharIndex.get(nc));
                mWordIndexes.add(index);
            }
            for (int i = 1; i < text.length(); ++i) {
                final String word = normalize(text.charAt(i - 1), text.charAt(i));
                final SearchingIndex index = new SearchingIndex(i, mIndex.get(word));
                mWordIndexes.add(index);
            }
            mWordIndexPos = new int[mWordIndexes.size()];
        }

        @Override
        public int getNextItem() {
            int id = Integer.MAX_VALUE;
            for (int i = 0; i < mWordIndexes.size(); ++i) {
                final List index = mWordIndexes.get(i).index;
                if (index != null && mWordIndexPos[i] < index.size()) {
                    final int t = index.get(mWordIndexPos[i]).id;
                    if (t < id)
                        id = t;
                }
            }
            return id;
        }

        @Override
        public double sumUpToItem(final int id) {
            double score = 0;
            for (int i = 0; i < mWordIndexes.size(); ++i) {
                final SearchingIndex index = mWordIndexes.get(i);
                while (index.index != null && mWordIndexPos[i] < index.index.size()) {
                    final MatchedItem mi = index.index.get(mWordIndexPos[i]);
                    if (mi.id > id)
                        break;
                    if (mi.c2 == 0)
                        score += Pinyin.compareChar(mi.c1, mText.charAt(index.pos));
                    else
                        score += Pinyin.compareChar(mi.c1, mText.charAt(index.pos - 1))
                                * Pinyin.compareChar(mi.c2, mText.charAt(index.pos)) * 4;
                    ++mWordIndexPos[i];
                }
            }
            score /= mText.length() * 5 - 4;
            return Math.min(score, 1);
        }

        @Override
        public void close()  {
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy