org.wicketstuff.datatable_autocomplete.trie.PatriciaTrie Maven / Gradle / Ivy
/*
*
* ==============================================================================
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.wicketstuff.datatable_autocomplete.trie;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.wicket.IClusterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author mocleiri
*
* A Trie is a specialized search tree that is optimized for
* retrieval of data.
*
* This implementation is read-only and expects to load the data then
* minimize itself and be part of a singleton that returns the indexed
* data.
*
* A Patricia Trie is used to index words from left to right.
*
* A Suffix Tree, which is useful for any string matching, can be build on top of a Patricia Trie simply using a variant indexing method.
*
* An ITrieFilter can be used to filter additional fields within an
* indexed object when the list of matching words (objects) is being
* computed.
*
* @see http://en.wikipedia.org/wiki/Radix_tree
*
* It is suited for quick retrieval of prefix matches over large static
* datasets (100,000 elements)
*
* This implementation will index an object C based on the word (String)
* that is extracted using the ITrieNodeConfiguration.getWord (C c) method.
*
*/
public class PatriciaTrie implements IClusterable, Trie {
/**
*
*/
private static final long serialVersionUID = -6075870905379098868L;
private static final Logger log = LoggerFactory.getLogger(PatriciaTrie.class);
private TrieNode root = null;
private ITrieConfiguration configuration = null;
/**
*
*/
public PatriciaTrie() {
super();
}
/**
*
*/
public PatriciaTrie(ITrieConfiguration configuration) {
this.configuration = configuration;
this.configuration.setTrie(this);
this.root = configuration.createTrieNode(null, "", "");
}
/* (non-Javadoc)
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#index(C)
*/
public void index(C value) {
// traverse to the point where no match is found and then insert at that
// point.
if (configuration.isSuffixTree()) {
// suffix tree
// for anystring match
String word = configuration.getWord(value);
int length = word.length();
for (int i = 0; i < length; i++) {
// index each substring of the word from the initial full word through to the last character.
String subWord = word.substring(i);
this.root.index(subWord, value);
}
}
else {
// prefix tree
// for prefix match
this.root.index(value);
}
}
/* (non-Javadoc)
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#getWordList(java.lang.String)
*/
public List getWordList(String prefix) {
return getWordList(prefix, configuration.getDefaultFilter(), -1);
}
// private List getWordList(TrieNode prefixNode) {
//
// return getWordList(prefixNode, configuration.getDefaultFilter(), -1);
// }
public PrefixTrieMatch find(String prefix, ITrieFilterfilter) {
return this.root.find(prefix, filter);
}
public List getWordList(String prefix, ITrieFilter filter, int limit) {
PrefixTrieMatch prefixNodeMatch = this.root.find(prefix, filter);
if (prefixNodeMatch == null)
return new LinkedList();
else
return prefixNodeMatch.getWordList(limit);
}
/**
* Visit each TrieNode
*
* @param v
*/
public void visit(ITrieNodeVisitor v) {
this.root.visit(v);
}
/**
* Compresses the sparse nodes with only 1 branch; makes the Trie into a
* Patricia Trie which uses less space.
*/
public void simplifyIndex() {
// the first simplification is to remove nodes that have only 1 branch.
// we will basically have nodes that represent more than a single
// character
this.root.simplify();
/*
* We visit each leaf then iterate over upward to mark the max length of
* each nodes sub tree.
*/
final List> leafNodeList = new LinkedList>();
this.root.visit(new ITrieNodeVisitor() {
public void visit(TrieNode element) {
if (element.getOrderedNodeList().size() == 0)
leafNodeList.add(element);
for (TrieNode trieNode : element.getOrderedNodeList()) {
trieNode.visit(this);
}
}
});
for (TrieNode trieNode : leafNodeList) {
TrieNode parentNode = trieNode.getParentNode();
TrieNode currentNode = trieNode;
while (parentNode != null) {
// start at the bottom and work upwards
int currentLength = currentNode.getCharacter().length();
int currentMax = currentNode.getMaxChildStringLength()
+ currentLength;
int maxParentLength = parentNode.getMaxChildStringLength();
if (currentMax > maxParentLength) {
parentNode.setMaxChildStringLength(currentMax);
}
currentNode = parentNode;
parentNode = parentNode.getParentNode();
}
}
}
/**
* @return
*/
public int getChildren() {
return root.getOrderedNodeList().size();
}
/**
* Return the size of the subtree for the prefix given. This avoids the need
* to get the list especially when the count is large.
*
* @param prefix
* @return the number of elements in the subtree corresponding to the prefix
* given.
*
*/
public int getPrefixMatchedElementCount(String prefix, final ITrieFilternodeFilter) {
PrefixTrieMatch match = root.find(prefix, nodeFilter);
if (match == null)
return 0;
final AtomicInteger counter = new AtomicInteger(0);
match.getNode().visit(new ITrieNodeVisitor() {
public void visit(TrieNode node) {
for (C value : node.getOrderedMatchList()) {
if (nodeFilter.isVisible(value)) {
counter.addAndGet(node.getTotalMatches());
}
}
}
});
return counter.intValue();
}
/**
*
* @return the total number of elements indexed by this trie.
*
* Note this can be an expensive call as each node in the trie is
* visited.
*
*/
public int size() {
final AtomicInteger counter = new AtomicInteger(0);
// visit each node an aggregate the number of matches:
root.visit(new ITrieNodeVisitor() {
public void visit(TrieNode node) {
counter.addAndGet(node.getTotalMatches());
}
});
return counter.intValue();
}
/**
* @return
* @see org.wicketstuff.datatable_autocomplete.trie.TrieNode#getNextNodeCharacterSet()
*/
public Set getNextNodeCharacterSet() {
/*
* This is really just to support the datatable-autocomplete-examples
* where we give a count of the matches for each first character
* contained in this set.
*/
return root.getNextNodeCharacterSet();
}
public List getWordList(String prefix, ITrieFilter filter) {
return getWordList(prefix, filter, -1);
}
/* (non-Javadoc)
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#getWordList(java.lang.String, int)
*/
public List getWordList(String prefix, int limit) {
return getWordList(prefix, null, limit);
}
/* (non-Javadoc)
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#postIndexing()
*/
public void postIndexing() {
this.simplifyIndex();
}
/* (non-Javadoc)
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#preIndexing()
*/
public void preIndexing() {
}
public TrieNode getRoot() {
return root;
}
}