All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.helipy.text.ahocorasick.Trie Maven / Gradle / Ivy

package com.helipy.text.ahocorasick;

import java.util.Collection;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;

/**
 * @author nuclear-sun
 */
public class Trie {

    private State root;

    private int stateCount;

    Trie() {
        this.root = new State();
    }

    int getStateCount() {
        return stateCount;
    }

    State addKeyword(final String keyword) {

        State currState = root;
        for (int i = 0, len = keyword.length(); i < len; i++) {

            char ch = keyword.charAt(i);

            State childState = currState.getSuccess().get(ch);
            if (childState == null) {
                childState = new State();
                currState.getSuccess().put(ch, childState);

            }
            currState = childState;
        }
        currState.setKeyword(keyword);
        return currState;
    }

    State getRootState() {
        return this.root;
    }

    void constructFailureAndPrevWordPointer() {

        int ordinal = 1;
        root.setOrdinal(ordinal++);

        Queue queue = new LinkedList<>();
        Collection directChildren = root.getSuccess().values();

        for (State child : directChildren) {
            child.setFailure(root);
            queue.offer(child);
        }

        while (!queue.isEmpty()) {

            State parentState = queue.poll();
            parentState.setOrdinal(ordinal++);

            Map children = parentState.getSuccess();

            int resolvedChildCnt = 0;
            boolean foundPrevWord = false;
            State parentFailure = parentState;

            while ((parentFailure = parentFailure.getFailure()) != null) {

                // indicate whether either foundPrevWord or resolvedChildCnt changed
                boolean modified = false;

                // set previous word pointer
                if (!foundPrevWord && parentFailure.getKeyword() != null) {
                    parentState.setPrevWordState(parentFailure);
                    foundPrevWord = true;
                    modified = true;
                }

                // set failure pointer
                Map failureChildren = parentFailure.getSuccess();
                if (failureChildren == null || failureChildren.size() == 0) {
                    continue;
                }

                for (Map.Entry entry : children.entrySet()) {
                    Character character = entry.getKey();
                    State childState = entry.getValue();

                    State childFailure = failureChildren.get(character);

                    if (childState.getFailure() == null && childFailure != null) {
                        childState.setFailure(childFailure);
                        resolvedChildCnt += 1;
                        modified = true;
                    }

                }

                // break loop if both tasks are done
                if (modified && foundPrevWord && resolvedChildCnt == children.size()) {
                    break;
                }

            }

            for (State childState : children.values()) {
                if (childState.getFailure() == null) {
                    childState.setFailure(root);
                }
                queue.offer(childState);
            }

        }

        this.stateCount = ordinal - 1;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy