All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sirius.kernel.commons.Trie Maven / Gradle / Ivy

/*
 * Made with all the love in the world
 * by scireum in Remshalden, Germany
 *
 * Copyright by scireum GmbH
 * http://www.scireum.de - [email protected]
 */

package sirius.kernel.commons;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * A map like data structure which associates strings (char sequences) to values.
 * 

* A trie is a highly efficient data structure for iterating through a string and retrieving a previously stored * value. Checking containment or retrieving a value has guaranteed O(n) runtime, where n is the length of the * processed string, independent of the size of the trie. *

* An Example: If we have a list of stop words: "one", "two", "three" and want to detect if these occur in a * given text, we can do the following: *

 * {@code
 * Trie<Boolean> trie = Trie.create();
 *
 * trie.put("one", true);
 * trie.put("two", true);
 * trie.put("three", true);
 *
 * String check = "I'd like to have three beer please";
 * Trie.ContainmentIterator<Boolean> iter = trie.iterator();
 *
 * for(int i = 0; i < check.length(); i++) {
 *     if (!iter.doContinue(check.charAt(i))) {
 *         if (iter.isCompleted()) {
 *             System.out.println("Found!");
 *         } else {
 *             iter.resetWith(check.charAt(i));
 *         }
 *     }
 * }
 *
 * }
 * 
* * @param the type of values managed by the trie */ public class Trie { /** * Contains the root of the Trie */ private final Node root = new Node(); /** * Creates a new {@link Trie} without forcing you to re-type the generics. * * @param the type of values managed by the trie * @return a new instance of {@link Trie} */ public static Trie create() { return new Trie<>(); } /** * Represents an iterator which navigates through the trie character by character. * * @param the type of values managed by the trie */ public interface ContainmentIterator { /** * Determines if the current path can be continued with the given character. *

* This will not change the internal state. * * @param c the character to continue with * @return true if the current path can continued using the given character, * false otherwise. */ boolean canContinue(char c); /** * Tries to continue the current path with the given character. *

* If the current path can be continued, the internal state will be updated. Otherwise the internal * state will remain unchanged - the iterator is not reset automatically. * * @param c the character to continue with * @return true if it was possible to continue using the given character, false otherwise */ boolean doContinue(char c); /** * Returns the value associated with the key represented by the path traversed so far. * * @return the value represented by the path traversed to far or null if no value is available */ V getValue(); /** * Sets the value to be associated with the key represented by the path traversed so far * * @param value the value to set */ void setValue(V value); /** * Determines if the iterator is currently pointing at a valid match. * * @return true if a value was previously associated with path traversed so far, * false otherwise */ boolean isCompleted(); /** * Determines if the iterator can backtrack. * * @return true if at least one transition took place, flfalse if the iterator is at the * root node. */ boolean canGoBack(); /** * Undoes the latest transition to support backtracking */ void goBack(); /** * Returns a set of all possible continuations for the current state of the iterator * * @return a set of all possible characters to continue the current path */ Set getPossibilities(); /** * Restarts the iterator at the beginning of the trie. */ void reset(); /** * Restarts the iterator at the beginning and tries to perform the next transition using the given character. * * @param c the character to try to use after resetting the iterator * @return true if the transition using c was possible, false otherwise. In this * case the iterator remains in the "reset" state and can be used as if reset() was called. */ boolean resetWith(char c); } /** * Internal class representing a single node in the trie */ class Node { /** * Points to the parent node of this node */ private Node parent; /** * Contains a sorted list of keys */ private List keys = Lists.newArrayList(); /** * Contains the list of continuations matching the keys list */ private List continuations = Lists.newArrayList(); /** * Contains the value associated with the path to this node */ private V value; } /** * Internal implementation of the ContainmentIterator */ private class ContainmentIteratorImpl implements ContainmentIterator { private Node current = root; @Override public boolean canContinue(char c) { int index = Collections.binarySearch(current.keys, c); return !(index < 0 || current.keys.get(index) != c); } @Override public boolean doContinue(char c) { int index = Collections.binarySearch(current.keys, c); if (index < 0 || current.keys.get(index) != c) { return false; } current = current.continuations.get(index); return true; } /** * Adds a new step for the given character. Internally, a binary search is performed as the keylist * is sorted ascending. */ void addStep(char c) { int index = Collections.binarySearch(current.keys, c); if (index < 0) { index = (index + 1) * -1; current.keys.add(index, c); Node newNode = new Node(); newNode.parent = current; current.continuations.add(index, newNode); current = newNode; } else { current = current.continuations.get(index); } } @Override public V getValue() { return current.value; } @Override public void setValue(V value) { current.value = value; } @Override public boolean isCompleted() { return current.value != null; } @Override public boolean canGoBack() { return current != root; } @Override public void goBack() { current = current.parent; } @Override public Set getPossibilities() { return Sets.newTreeSet(current.keys); } @Override public void reset() { current = root; } @Override public boolean resetWith(char c) { current = root; return doContinue(c); } } /** * Determines if the given key is contained in the trie. * * @param key the key to check for. * @return true if a value is associated with the path represented by the given key, * false otherwise */ public boolean containsKey(@Nonnull CharSequence key) { if (Strings.isEmpty(key)) { throw new IllegalArgumentException("key"); } ContainmentIterator iter = iterator(); for (int i = 0; i < key.length(); i++) { if (!iter.doContinue(key.charAt(i))) { return false; } } return iter.isCompleted(); } /** * Generates a new iterator for the underlying trie. * * @return a new iterator to navigate through the underlying trie */ public ContainmentIterator iterator() { return new ContainmentIteratorImpl(); } /** * Returns the value associated with the given key. * * @param key the path to navigate through * @return the value associated with the path defined by the given key or null if no value is present */ public V get(@Nonnull CharSequence key) { if (Strings.isEmpty(key)) { throw new IllegalArgumentException("key"); } ContainmentIterator iter = iterator(); for (int i = 0; i < key.length(); i++) { if (!iter.doContinue(key.charAt(i))) { return null; } } return iter.getValue(); } /** * Associates the given key with the given value. * * @param key the path to store the given value * @param value the value to store in the trie. */ @SuppressWarnings("squid:S2583") @Explain("Duplicate @Nonnull value check as it isn't enforced by the compiler.") public void put(@Nonnull CharSequence key, @Nonnull V value) { if (Strings.isEmpty(key)) { throw new IllegalArgumentException("key"); } if (value == null) { throw new IllegalArgumentException("value"); } ContainmentIterator iter = iterator(); for (int i = 0; i < key.length(); i++) { if (!iter.doContinue(key.charAt(i))) { ((ContainmentIteratorImpl) iter).addStep(key.charAt(i)); } } iter.setValue(value); } /** * Retrieves all keys that are stored in this {@link Trie}. * * @return a {@link Set} of all keys that are stored in this {@link Trie} */ public Set keySet() { return getAllKeysBeginningWith(""); } /** * Retrieves the number of keys that are stored in this {@link Trie}. * * @return the size of this {@link Trie}'s {@link #keySet() key set} */ public int size() { return keySet().size(); } /** * Performs a prefix search within this {@link Trie}'s {@link #keySet() key set} * * @param prefix to search for * @return all keys that are beginning with the given prefix (may include prefix itself) */ public Set getAllKeysBeginningWith(String prefix) { ContainmentIterator iter = iterator(); for (int i = 0; i < prefix.length(); i++) { if (!iter.doContinue(prefix.charAt(i))) { return Collections.emptySet(); } } return getAllKeysBeginningWith(prefix, iter); } private Set getAllKeysBeginningWith(String prefix, ContainmentIterator iter) { if (iter.getPossibilities().isEmpty()) { if (iter.getValue() != null) { return Sets.newHashSet(prefix); } else { return Collections.emptySet(); } } Set result = new HashSet<>(); if (iter.getValue() != null) { result.add(prefix); } for (char possibility : iter.getPossibilities()) { iter.doContinue(possibility); result.addAll(getAllKeysBeginningWith(prefix + possibility, iter)); iter.goBack(); } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy