All Downloads are FREE. Search and download functionalities are using the official Maven repository.

querqy.trie.SequenceLookup Maven / Gradle / Ivy

There is a newer version: 3.18.1
Show newest version
package querqy.trie;

import querqy.CompoundCharSequence;
import querqy.LowerCaseCharSequence;
import querqy.trie.model.ExactMatch;
import querqy.trie.model.LookupState;
import querqy.trie.model.PrefixMatch;
import querqy.trie.model.SuffixMatch;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

public class SequenceLookup {

    private static String DELIMITER = " ";

    private final TrieMap trieMap;
    private final PrefixTrieMap prefixTrieMap;
    private final SuffixTrieMap suffixTrieMap;

    private final boolean ignoreCase;

    public SequenceLookup() {
        this(true);
    }

    public SequenceLookup(final boolean ignoreCase) {
        this.trieMap = new TrieMap<>();
        this.prefixTrieMap = new PrefixTrieMap<>();
        this.suffixTrieMap = new SuffixTrieMap<>();
        this.ignoreCase = ignoreCase;
    }

    public void put(final List terms, final T ruleObject) {
        trieMap.put(new CompoundCharSequence(DELIMITER, lc(terms)), ruleObject);
    }

    public void putPrefix(final CharSequence term, final T ruleObject) {
        prefixTrieMap.putPrefix(lc(term), ruleObject, true);
    }

    public void putSuffix(final CharSequence term, final T ruleObject) {
        suffixTrieMap.putSuffix(lc(term), ruleObject, true);
    }

    public List> findSingleTermPrefixMatches(final List terms) {
        final List> prefixMatches = new ArrayList<>();
        final AtomicInteger lookupOffset = new AtomicInteger(0);

        terms.forEach(term -> {
            prefixTrieMap.getPrefix(lc(term)).ifPresent(
                    prefixMatch -> prefixMatches.add(prefixMatch.setLookupOffset(lookupOffset.get())));
            lookupOffset.getAndIncrement(); });

        return prefixMatches;
    }

    public List> findSingleTermSuffixMatches(final List terms) {
        final List> suffixMatches = new ArrayList<>();
        final AtomicInteger lookupOffset = new AtomicInteger(0);

        terms.forEach(term -> {
            suffixTrieMap.getBySuffix(lc(term)).ifPresent(
                    suffixMatch -> suffixMatches.add(suffixMatch.setLookupOffset(lookupOffset.get())));
            lookupOffset.getAndIncrement(); });

        return suffixMatches;
    }

    public List> findExactMatches(final List terms) {

        final List> exactMatches = new ArrayList<>();
        int lookupIndex = 0;

        final LinkedList> lookupStates = new LinkedList<>();

        for (final CharSequence term : lc(terms)) {
            lookupStates.add(new LookupState<>(lookupIndex, new LinkedList<>(), null));

            final int lookupStatesTempSize = lookupStates.size();
            for (int i = 0; i < lookupStatesTempSize; i++) {

                final LookupState lookupState = lookupStates.removeLast();

                final State subsequentLookupState = lookupState.getState() != null
                        ? trieMap.get(term, lookupState.getState()).getStateForCompleteSequence()
                        : trieMap.get(term).getStateForCompleteSequence();

                if (!subsequentLookupState.isKnown) {
                    continue;
                }

                if (subsequentLookupState.isFinal() && subsequentLookupState.value != null) {
                    exactMatches.add(new ExactMatch<>(
                            lookupState.lookupOffsetStart, lookupIndex + 1, subsequentLookupState.value));
                }

                final State subsequentLookupStateNext = trieMap.get(DELIMITER, subsequentLookupState).getStateForCompleteSequence();
                if (subsequentLookupStateNext.isKnown) {
                    lookupStates.addFirst(lookupState.addTerm(term).setState(subsequentLookupStateNext));
                }
            }

            lookupIndex++;
        }

        return exactMatches;
    }

    private List lc(final List seqList) {
        return seqList.stream().map(this::lc).collect(Collectors.toCollection(LinkedList::new));
    }

    private CharSequence lc(final CharSequence seq) {
        return ignoreCase ? new LowerCaseCharSequence(seq) : seq;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy