org.wicketstuff.datatable_autocomplete.trie.PatriciaTrie Maven / Gradle / Ivy
/*
*
* ==============================================================================
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.wicketstuff.datatable_autocomplete.trie;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.wicket.IClusterable;
/**
* @author mocleiri
*
* A Trie is a specialized search tree that is optimized for retrieval of data.
*
* This implementation is read-only and expects to load the data then minimize itself and be
* part of a singleton that returns the indexed data.
*
* A Patricia Trie is used to index words from left to right.
*
* A Suffix Tree, which is useful for any string matching, can be build on top of a Patricia
* Trie simply using a variant indexing method.
*
* An ITrieFilter can be used to filter additional fields within an indexed object when
* the list of matching words (objects) is being computed.
*
* @see Radix tree
*
* It is suited for quick retrieval of prefix matches over large static datasets (100,000
* elements)
*
* This implementation will index an object C based on the word (String) that is extracted
* using the ITrieNodeConfiguration.getWord (C c) method.
*
*/
public class PatriciaTrie implements IClusterable, Trie
{
/**
*
*/
private static final long serialVersionUID = -6075870905379098868L;
private TrieNode root = null;
private ITrieConfiguration configuration = null;
/**
*
*/
public PatriciaTrie()
{
super();
}
/**
*
*/
public PatriciaTrie(ITrieConfiguration configuration)
{
this.configuration = configuration;
this.configuration.setTrie(this);
this.root = configuration.createTrieNode(null, "", "");
}
/*
* (non-Javadoc)
*
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#index(C)
*/
public void index(C value)
{
// traverse to the point where no match is found and then insert at that
// point.
if (configuration.isSuffixTree())
{
// suffix tree
// for anystring match
String word = configuration.getWord(value);
int length = word.length();
for (int i = 0; i < length; i++)
{
// index each substring of the word from the initial full word through to the last
// character.
String subWord = word.substring(i);
this.root.index(subWord, value);
}
}
else
{
// prefix tree
// for prefix match
this.root.index(value);
}
}
/*
* (non-Javadoc)
*
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#getWordList(java.lang.String)
*/
public List getWordList(String prefix)
{
return getWordList(prefix, configuration.getDefaultFilter(), -1);
}
// private List getWordList(TrieNode prefixNode) {
//
// return getWordList(prefixNode, configuration.getDefaultFilter(), -1);
// }
public PrefixTrieMatch find(String prefix, ITrieFilter filter)
{
return this.root.find(prefix, filter);
}
public List getWordList(String prefix, ITrieFilter filter, int limit)
{
PrefixTrieMatch prefixNodeMatch = this.root.find(prefix, filter);
if (prefixNodeMatch == null)
return new LinkedList();
else
return prefixNodeMatch.getWordList(limit);
}
/**
* Visit each TrieNode
*
* @param v
*/
public void visit(ITrieNodeVisitor v)
{
this.root.visit(v);
}
/**
* Compresses the sparse nodes with only 1 branch; makes the Trie into a Patricia Trie which
* uses less space.
*/
public void simplifyIndex()
{
// the first simplification is to remove nodes that have only 1 branch.
// we will basically have nodes that represent more than a single
// character
this.root.simplify();
/*
* We visit each leaf then iterate over upward to mark the max length of each nodes sub
* tree.
*/
final List> leafNodeList = new LinkedList>();
this.root.visit(new ITrieNodeVisitor()
{
public void visit(TrieNode element)
{
if (element.getOrderedNodeList().size() == 0)
leafNodeList.add(element);
for (TrieNode trieNode : element.getOrderedNodeList())
{
trieNode.visit(this);
}
}
});
for (TrieNode trieNode : leafNodeList)
{
TrieNode parentNode = trieNode.getParentNode();
TrieNode currentNode = trieNode;
while (parentNode != null)
{
// start at the bottom and work upwards
int currentLength = currentNode.getCharacter().length();
int currentMax = currentNode.getMaxChildStringLength() + currentLength;
int maxParentLength = parentNode.getMaxChildStringLength();
if (currentMax > maxParentLength)
{
parentNode.setMaxChildStringLength(currentMax);
}
currentNode = parentNode;
parentNode = parentNode.getParentNode();
}
}
}
/**
* @return children of the root node.
*/
public int getChildren()
{
return root.getOrderedNodeList().size();
}
/**
* Return the size of the subtree for the prefix given. This avoids the need to get the list
* especially when the count is large.
*
* @param prefix
* @return the number of elements in the subtree corresponding to the prefix given.
*
*/
public int getPrefixMatchedElementCount(String prefix, final ITrieFilter nodeFilter)
{
PrefixTrieMatch match = root.find(prefix, nodeFilter);
if (match == null)
return 0;
final AtomicInteger counter = new AtomicInteger(0);
match.getNode().visit(new ITrieNodeVisitor()
{
public void visit(TrieNode node)
{
for (C value : node.getOrderedMatchList())
{
if (nodeFilter.isVisible(value))
{
counter.addAndGet(node.getTotalMatches());
}
}
}
});
return counter.intValue();
}
/**
*
* @return the total number of elements indexed by this trie.
*
* Note this can be an expensive call as each node in the trie is visited.
*
*/
public int size()
{
final AtomicInteger counter = new AtomicInteger(0);
// visit each node an aggregate the number of matches:
root.visit(new ITrieNodeVisitor()
{
public void visit(TrieNode node)
{
counter.addAndGet(node.getTotalMatches());
}
});
return counter.intValue();
}
/**
* @return the set of strings that map to the next nodes of the root node.
* @see org.wicketstuff.datatable_autocomplete.trie.TrieNode#getNextNodeCharacterSet()
*/
public Set getNextNodeCharacterSet()
{
/*
* This is really just to support the datatable-autocomplete-examples where we give a count
* of the matches for each first character contained in this set.
*/
return root.getNextNodeCharacterSet();
}
public List getWordList(String prefix, ITrieFilter filter)
{
return getWordList(prefix, filter, -1);
}
/*
* (non-Javadoc)
*
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#getWordList(java.lang.String, int)
*/
public List getWordList(String prefix, int limit)
{
return getWordList(prefix, null, limit);
}
/*
* (non-Javadoc)
*
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#postIndexing()
*/
public void postIndexing()
{
this.simplifyIndex();
}
/*
* (non-Javadoc)
*
* @see org.wicketstuff.datatable_autocomplete.trie.Trie#preIndexing()
*/
public void preIndexing()
{
}
public TrieNode getRoot()
{
return root;
}
}