sirius.kernel.commons.Trie Maven / Gradle / Ivy
/*
* Made with all the love in the world
* by scireum in Remshalden, Germany
*
* Copyright by scireum GmbH
* http://www.scireum.de - [email protected]
*/
package sirius.kernel.commons;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* A map like data structure which associates strings (char sequences) to values.
*
* A trie is a highly efficient data structure for iterating through a string and retrieving a previously stored
* value. Checking containment or retrieving a value has guaranteed O(n) runtime, where n is the length of the
* processed string, independent of the size of the trie.
*
* An Example: If we have a list of stop words: "one", "two", "three" and want to detect if these occur in a
* given text, we can do the following:
*
* {@code
* Trie<Boolean> trie = Trie.create();
*
* trie.put("one", true);
* trie.put("two", true);
* trie.put("three", true);
*
* String check = "I'd like to have three beer please";
* Trie.ContainmentIterator<Boolean> iter = trie.iterator();
*
* for(int i = 0; i < check.length(); i++) {
* if (!iter.doContinue(check.charAt(i))) {
* if (iter.isCompleted()) {
* System.out.println("Found!");
* } else {
* iter.resetWith(check.charAt(i));
* }
* }
* }
*
* }
*
*
* @param the type of values managed by the trie
*/
public class Trie {
/**
* Contains the root of the Trie
*/
private final Node root = new Node();
/**
* Creates a new {@link Trie} without forcing you to re-type the generics.
*
* @param the type of values managed by the trie
* @return a new instance of {@link Trie}
*/
public static Trie create() {
return new Trie<>();
}
/**
* Represents an iterator which navigates through the trie character by character.
*
* @param the type of values managed by the trie
*/
public interface ContainmentIterator {
/**
* Determines if the current path can be continued with the given character.
*
* This will not change the internal state.
*
* @param c the character to continue with
* @return true if the current path can continued using the given character,
* false otherwise.
*/
boolean canContinue(char c);
/**
* Tries to continue the current path with the given character.
*
* If the current path can be continued, the internal state will be updated. Otherwise the internal
* state will remain unchanged - the iterator is not reset automatically.
*
* @param c the character to continue with
* @return true if it was possible to continue using the given character, false otherwise
*/
boolean doContinue(char c);
/**
* Returns the value associated with the key represented by the path traversed so far.
*
* @return the value represented by the path traversed to far or null if no value is available
*/
V getValue();
/**
* Sets the value to be associated with the key represented by the path traversed so far
*
* @param value the value to set
*/
void setValue(V value);
/**
* Determines if the iterator is currently pointing at a valid match.
*
* @return true if a value was previously associated with path traversed so far,
* false otherwise
*/
boolean isCompleted();
/**
* Determines if the iterator can backtrack.
*
* @return true if at least one transition took place, flfalse if the iterator is at the
* root node.
*/
boolean canGoBack();
/**
* Undoes the latest transition to support backtracking
*/
void goBack();
/**
* Returns a set of all possible continuations for the current state of the iterator
*
* @return a set of all possible characters to continue the current path
*/
Set getPossibilities();
/**
* Restarts the iterator at the beginning of the trie.
*/
void reset();
/**
* Restarts the iterator at the beginning and tries to perform the next transition using the given character.
*
* @param c the character to try to use after resetting the iterator
* @return true if the transition using c was possible, false otherwise. In this
* case the iterator remains in the "reset" state and can be used as if reset() was called.
*/
boolean resetWith(char c);
}
/**
* Internal class representing a single node in the trie
*/
class Node {
/**
* Points to the parent node of this node
*/
private Node parent;
/**
* Contains a sorted list of keys
*/
private List keys = Lists.newArrayList();
/**
* Contains the list of continuations matching the keys list
*/
private List continuations = Lists.newArrayList();
/**
* Contains the value associated with the path to this node
*/
private V value;
}
/**
* Internal implementation of the ContainmentIterator
*/
private class ContainmentIteratorImpl implements ContainmentIterator {
private Node current = root;
@Override
public boolean canContinue(char c) {
int index = Collections.binarySearch(current.keys, c);
return !(index < 0 || current.keys.get(index) != c);
}
@Override
public boolean doContinue(char c) {
int index = Collections.binarySearch(current.keys, c);
if (index < 0 || current.keys.get(index) != c) {
return false;
}
current = current.continuations.get(index);
return true;
}
/**
* Adds a new step for the given character. Internally, a binary search is performed as the keylist
* is sorted ascending.
*/
void addStep(char c) {
int index = Collections.binarySearch(current.keys, c);
if (index < 0) {
index = (index + 1) * -1;
current.keys.add(index, c);
Node newNode = new Node();
newNode.parent = current;
current.continuations.add(index, newNode);
current = newNode;
} else {
current = current.continuations.get(index);
}
}
@Override
public V getValue() {
return current.value;
}
@Override
public void setValue(V value) {
current.value = value;
}
@Override
public boolean isCompleted() {
return current.value != null;
}
@Override
public boolean canGoBack() {
return current != root;
}
@Override
public void goBack() {
current = current.parent;
}
@Override
public Set getPossibilities() {
return Sets.newTreeSet(current.keys);
}
@Override
public void reset() {
current = root;
}
@Override
public boolean resetWith(char c) {
current = root;
return doContinue(c);
}
}
/**
* Determines if the given key is contained in the trie.
*
* @param key the key to check for.
* @return true if a value is associated with the path represented by the given key,
* false otherwise
*/
public boolean containsKey(@Nonnull CharSequence key) {
if (Strings.isEmpty(key)) {
throw new IllegalArgumentException("key");
}
ContainmentIterator iter = iterator();
for (int i = 0; i < key.length(); i++) {
if (!iter.doContinue(key.charAt(i))) {
return false;
}
}
return iter.isCompleted();
}
/**
* Generates a new iterator for the underlying trie.
*
* @return a new iterator to navigate through the underlying trie
*/
public ContainmentIterator iterator() {
return new ContainmentIteratorImpl();
}
/**
* Returns the value associated with the given key.
*
* @param key the path to navigate through
* @return the value associated with the path defined by the given key or null if no value is present
*/
public V get(@Nonnull CharSequence key) {
if (Strings.isEmpty(key)) {
throw new IllegalArgumentException("key");
}
ContainmentIterator iter = iterator();
for (int i = 0; i < key.length(); i++) {
if (!iter.doContinue(key.charAt(i))) {
return null;
}
}
return iter.getValue();
}
/**
* Associates the given key with the given value.
*
* @param key the path to store the given value
* @param value the value to store in the trie.
*/
@SuppressWarnings("squid:S2583")
@Explain("Duplicate @Nonnull value check as it isn't enforced by the compiler.")
public void put(@Nonnull CharSequence key, @Nonnull V value) {
if (Strings.isEmpty(key)) {
throw new IllegalArgumentException("key");
}
if (value == null) {
throw new IllegalArgumentException("value");
}
ContainmentIterator iter = iterator();
for (int i = 0; i < key.length(); i++) {
if (!iter.doContinue(key.charAt(i))) {
((ContainmentIteratorImpl) iter).addStep(key.charAt(i));
}
}
iter.setValue(value);
}
/**
* Retrieves all keys that are stored in this {@link Trie}.
*
* @return a {@link Set} of all keys that are stored in this {@link Trie}
*/
public Set keySet() {
return getAllKeysBeginningWith("");
}
/**
* Retrieves the number of keys that are stored in this {@link Trie}.
*
* @return the size of this {@link Trie}'s {@link #keySet() key set}
*/
public int size() {
return keySet().size();
}
/**
* Performs a prefix search within this {@link Trie}'s {@link #keySet() key set}
*
* @param prefix to search for
* @return all keys that are beginning with the given prefix (may include prefix itself)
*/
public Set getAllKeysBeginningWith(String prefix) {
ContainmentIterator iter = iterator();
for (int i = 0; i < prefix.length(); i++) {
if (!iter.doContinue(prefix.charAt(i))) {
return Collections.emptySet();
}
}
return getAllKeysBeginningWith(prefix, iter);
}
private Set getAllKeysBeginningWith(String prefix, ContainmentIterator iter) {
if (iter.getPossibilities().isEmpty()) {
if (iter.getValue() != null) {
return Sets.newHashSet(prefix);
} else {
return Collections.emptySet();
}
}
Set result = new HashSet<>();
if (iter.getValue() != null) {
result.add(prefix);
}
for (char possibility : iter.getPossibilities()) {
iter.doContinue(possibility);
result.addAll(getAllKeysBeginningWith(prefix + possibility, iter));
iter.goBack();
}
return result;
}
}