All Downloads are FREE. Search and download functionalities are using the official Maven repository.

se.softhouse.common.collections.CharacterTrie Maven / Gradle / Ivy

/*
 * Copyright 2013 Jonatan Jönsson
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package se.softhouse.common.collections;

import static java.util.Objects.requireNonNull;

import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeMap;

import javax.annotation.CheckReturnValue;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * 
 * Stores {@link String}s in a trie.
 * The main purpose when using a structure like this is the methods
 * {@link #findLongestPrefix(CharSequence)} and {@link #getEntriesWithPrefix(CharSequence)}.
 *
 * Neither null keys or null values are allowed because just like the
 * devil, they are evil.
 *
 * If you're iterating over the whole trie more often than you do {@link #getEntriesWithPrefix(CharSequence) simple lookups}
 * you're probably better off using a {@link TreeMap}.
 *
 * TODO(jontejj): Implement SortedMap instead of Map
 * 
* * @param the type of values stored in the trie */ @NotThreadSafe public final class CharacterTrie extends AbstractMap { private int size = 0; private final Entry root; private int modCount = 0; /** * An entry represents a node in the tree. */ private static final class Entry implements Map.Entry { /** * The char the parent node will use to reference this child with */ private final Character index; /** * If true this node represents a value. * TODO(jontejj): This could have been optimized so that all entries have values. I.e * there's not a level for each character if there's no other string that shares the prefix. */ private boolean isValue; /** * the value of this node */ private V value; /** * The nodes that belongs to this Node. Having them in a {@link TreeMap} ensures that * iterating over them is done in a consistent(sorted) manner. */ private TreeMap> children; @Nullable private final Entry parent; private Entry(final Character index, @Nullable final Entry parent) { this.index = index; this.parent = parent; } @Override public String getKey() { StringBuilder sb = new StringBuilder(); Entry current = this; while(!current.isRoot()) { sb.append(current.index); current = current.parent; } return sb.reverse().toString(); } @Override public V getValue() { return value; } @Override public V setValue(final V value) { V oldValue = this.value; isValue = true; this.value = value; return oldValue; } @Override public boolean equals(Object obj) { if(!(obj instanceof Map.Entry)) return false; Map.Entry entry = (Map.Entry) obj; return getKey().equals(entry.getKey()) && getValue().equals(entry.getValue()); } @Override public int hashCode() { return getKey().hashCode() ^ getValue().hashCode(); } @Override public String toString() { return getKey() + "=" + getValue(); } private Map.Entry findLongestPrefix(final CharSequence prefix) { Entry child = findLastChild(prefix); if(child.isValue) return child; return null; } /** * Clear this entry from being a value * * @return true if this call had any effect */ private boolean unset() { boolean wasValue = isValue; isValue = false; value = null; return wasValue; } private boolean isRoot() { return parent == null; } private boolean hasChildren() { return children != null ? children.size() > 0 : false; } private Entry getChild(final Character c) { return children != null ? children.get(c) : null; } /** * @param keyToFetch the key to find the child/leaf for * @return the leaf in the tree that is reached for the given key, * or null if no such leaf could be found */ private Entry findChild(final CharSequence keyToFetch) { // Start at the root and search the tree for the entry matching the // given key Entry current = this; for(int i = 0, len = keyToFetch.length(); i < len && current != null; i++) { Character c = keyToFetch.charAt(i); current = current.getChild(c); } return current; } /** * @param prefix the key to find the child/leaf for * @return the leaf in the tree that is reached for the given key, * or null if no such leaf could be found */ private Entry findLastChild(final CharSequence prefix) { // Start at the root and search the tree for an entry starting with // key, return the last possible match so that matches with more matching chars will be // prioritized Entry current = this; for(int i = 0, len = prefix.length(); i < len; i++) { Character c = prefix.charAt(i); Entry next = current.getChild(c); if(next == null) return current; current = next; } return current; } /** * @param keyToFetch the key to find the child/leaf for * @return the value for the leaf in the tree that is reached for the * given key, * or null if no such value could be found */ private V get(final CharSequence keyToFetch) { Entry child = findChild(keyToFetch); if(child == null) return null; if(child.isValue) return child.value; return null; } /** * @param c the Character index to remove * @throws NullPointerException if this Entry doesn't have had any * children before */ private void deleteChild(final Character c) { children.remove(c); } /** * Makes sure that a child that represents the given {@code childChar} is found in this * entry. * * @param childChar the character to create/get a child for * @return either the already existing child or a newly created one */ private Entry ensureChild(final Character childChar) { if(children == null) { children = new TreeMap>(); Entry child = new Entry(childChar, this); children.put(childChar, child); return child; } Entry existing = children.get(childChar); if(existing != null) return existing; Entry child = new Entry(childChar, this); children.put(childChar, child); return child; } /** * Removes all key-value pairs in this trie */ private void clear() { children = new TreeMap>(); unset(); } /** * Finds the successor entry for predecessor, * It starts by looking if it's a value itself, then it checks * the children and if nothing there then it walks back up and checks siblings. * (essentially a pre-order tree traversal) * * @param level the current level we're in (in the current stack) */ private Entry successor(Entry predecessor, CharSequence predecessorKey, int level, boolean isGoingDown) { if(isValue && predecessor != this && isGoingDown) return this; if(hasChildren()) { Map.Entry> next = null; if(predecessor != null && predecessor.commonDescent(this) && level < predecessorKey.length()) { // Go through each sibling one after the other char charAtLevel = predecessorKey.charAt(level); next = children.higherEntry(charAtLevel); } else { next = children.firstEntry(); } // Visit the next child if(next != null) return next.getValue().successor(predecessor, predecessorKey, level + 1, true); } if(!isRoot()) // Go back up and enter the sibling return parent.successor(predecessor, predecessorKey, level - 1, false); return null; } /** * Returns true if this node is an ancestor for {@code entry}. */ private boolean ancestorFor(Entry entry) { if(isRoot()) return true; Entry entryAncestor = entry.parent; while(entryAncestor != null && this != entryAncestor) { entryAncestor = entryAncestor.parent; } return this == entryAncestor; } private boolean commonDescent(Entry entry) { if(this.ancestorFor(entry)) return true; else if(entry.ancestorFor(this)) return true; return false; } private int size() { int size = isValue ? 1 : 0; if(hasChildren()) { for(Entry child : children.values()) { size += child.size(); } } return size; } } /** * Creates a new, empty, {@link CharacterTrie} */ @CheckReturnValue public static CharacterTrie newTrie() { return new CharacterTrie(); } /** * Creates a new {@link CharacterTrie} with the entries from {@code map} */ @CheckReturnValue public static CharacterTrie newTrie(Map map) { CharacterTrie trie = newTrie(); trie.putAll(map); return trie; } private CharacterTrie() { root = createRoot(); } /** * @throws NullPointerException if {@code key} or {@code value} is null */ @Override public V put(final String key, final V value) { requireNonNull(key, "Null key given, CharacterTrie does not support null keys as they are error-prone"); requireNonNull(value, "Null value given, CharacterTrie does not support null values as they are error-prone. " + "Use the Null Object Pattern instead."); // Start at the root and search the tree for the entry to insert the // final character into Entry current = root; for(int i = 0, len = key.length(); i < len; i++) { Character c = key.charAt(i); // Traverses the tree down to the end where we put in our child current = current.ensureChild(c); } V oldValue = current.setValue(value); if(oldValue == null) { size++; modCount++; } return oldValue; } @Override @CheckReturnValue public int size() { return size; } @Override public V remove(final Object keyToRemove) { CharSequence key = (CharSequence) keyToRemove; // Start at the root and search the tree for the entry to delete Entry current = root; for(int i = 0, len = key.length(); i < len; i++) { Character c = key.charAt(i); current = current.getChild(c); if(current == null) return null; } return removeEntry(current); } private V removeEntry(Entry entryToRemove) { V oldValue = entryToRemove.getValue(); if(entryToRemove.unset()) { size--; modCount++; if(entryToRemove.hasChildren()) // We have children so we are important and can't be removed return oldValue; Entry parent = entryToRemove.parent; // Remove ourselves from the parent parent.deleteChild(entryToRemove.index); // Clean up unused entries while(!parent.hasChildren() && !parent.isValue) { Entry grandParent = parent.parent; if(grandParent == null) { break; // we reached root } // Ask the grandParent to remove our parent grandParent.deleteChild(parent.index); // Walk up the tree and remove entries without children parent = grandParent; } return oldValue; } return null; } @Override @CheckReturnValue public boolean containsKey(@Nullable final Object key) { if(key == null) return false; String keyToCheckContainMentFor = (String) key; return root.get(keyToCheckContainMentFor) != null; } @Override @CheckReturnValue public V get(final Object key) { CharSequence keyToFetch = (CharSequence) key; return root.get(keyToFetch); } /** * Returns the entry that shares the longest prefix with {@code key}, or null * if no such entry exists * * @see Longest_prefix_match */ @CheckReturnValue public Map.Entry findLongestPrefix(final CharSequence prefix) { return root.findLongestPrefix(prefix); } /** * Returns all entries whose key starts with {@code prefix}. The returned {@link Set} is a view * so removed elements from it are also removed in this structure. */ public Set> getEntriesWithPrefix(final CharSequence prefix) { Entry startingPoint = root.findChild(prefix); if(startingPoint == null) return Collections.emptySet(); return new EntrySet(startingPoint); } /** * Create a simple Entry which parent is null. */ private Entry createRoot() { return new Entry('r', null); } @Override public void clear() { root.clear(); size = 0; modCount++; } @Override public Set> entrySet() { return new EntrySet(root); } private final class EntrySet extends AbstractSet> { private final Entry startingPoint; private EntrySet(Entry startingPoint) { this.startingPoint = startingPoint; } @Override public Iterator> iterator() { return new EntryIterator(startingPoint); } @Override public int size() { return startingPoint.size(); } @Override public void clear() { modCount++; startingPoint.clear(); } } private final class EntryIterator implements Iterator> { private int expectedModCount = modCount; private Entry next; private Entry lastReturned = null; private EntryIterator(Entry startingPoint) { next = startingPoint; } @Override public boolean hasNext() { if(next == null) return false; // Don't recalculate next if someone calls hasNext twice without calling next if(next == lastReturned || lastReturned == null) { if(lastReturned == null) { next = next.successor(lastReturned, "", 0, true); } else { CharSequence lastKey = lastReturned.getKey(); int lastDepth = lastKey.length(); next = next.successor(lastReturned, lastKey, lastDepth, true); } } return next != null; } @Override public Map.Entry next() { verifyUnmodified(); if(!hasNext()) throw new NoSuchElementException(); lastReturned = next; return next; } @Override public void remove() { verifyUnmodified(); if(lastReturned == null) throw new IllegalStateException("You probably forgot to call next before calling remove"); if(removeEntry(lastReturned) == null) throw new IllegalStateException("You probably forgot to call next before calling remove"); expectedModCount++; } private void verifyUnmodified() { if(expectedModCount != modCount) throw new ConcurrentModificationException("Trie modified during iteration"); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy