All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wicketstuff.datatable_autocomplete.trie.TrieNode Maven / Gradle / Ivy

/*
 * 
 * ==============================================================================
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.wicketstuff.datatable_autocomplete.trie;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author mocleiri
 * 
 *         A Node in the Trie
 * 
 *         Contains the character(s) that are contained in this node.
 * 
 *         A map of next character(s) to TrieNode's and the count of the number of words in the
 *         subtree beneath this node.
 * 
 *         The C context object that is indexed by this node.
 * 
 *         The configuration object that provides the string version of an C object.
 * 
 * 
 * 
 */
public class TrieNode implements Serializable
{

	/**
	 * 
	 */
	private static final long serialVersionUID = -3544907989469418291L;

	private static final Logger log = LoggerFactory.getLogger(TrieNode.class);

	private static final Comparator> trieNodeComparator = new Comparator>()
	{

		public int compare(TrieNode o1, TrieNode o2)
		{

			return o1.getCharacter().compareTo(o2.getCharacter());

		}
	};

	// the string that corresponds to what is matched between our parent node
	// and the root node of the enire trie.
	private final String rootMatchedString;

	// the character in the word (final character) that this node represents
	// after minimization it may be a couple of characters
	private String character;

	private Map> nodeMap = new HashMap>();

	// child nodes (ordered a-z) so that we can traverse properly
	private List> orderedNodeList = new ArrayList>();

	private TrieNode parentNode;

	// if >1 then this is the total number of strings that terminate on this
	// node
	private int totalMatches = 0;

	// contains the matches for the indexed element in the 'character'
	// i.e. index 0 matches the character.get(0) and index 1 matches the element
	// that matches to character.get(0) + character.get(1)
	private Map> matchMap = new LinkedHashMap>();

	// if the entire tree below this node was traversed this would be the length
	// of the longest string formed.
	// this is used when doing a 'superselect' match to know when a branch is
	// not viable.
	private int maxChildStringLength = 0;

	private final ITrieConfiguration configuration;

	private Long nodeID;

	/**
	 * @param parentNode
	 * @param rootMatchedString
	 * @param character
	 * @param configuration
	 */
	public TrieNode(TrieNode parentNode, String rootMatchedString, String character,
		ITrieConfiguration configuration)
	{

		super();

		this.parentNode = parentNode;
		this.rootMatchedString = rootMatchedString;
		this.character = character;

		this.configuration = configuration;

	}

	/**
	 * @return the maxChildStringLength
	 */
	public int getMaxChildStringLength()
	{

		return this.maxChildStringLength;
	}

	/**
	 * @param maxChildStringLength
	 *            the maxChildStringLength to set
	 */
	public void setMaxChildStringLength(int maxChildStringLength)
	{

		this.maxChildStringLength = maxChildStringLength;
	}

	@Override
	public String toString()
	{

		StringBuffer children = new StringBuffer();

		List nodeList = new ArrayList(this.nodeMap.keySet());

		for (int i = 0; i < nodeList.size() - 1; i++)
		{

			String node = nodeList.get(i);
			children.append(node);
			children.append(", ");

		}

		if (nodeList.size() > 0)
			children.append(nodeList.get(nodeList.size() - 1));

		return "NODE [ matchedPrefix = '" + rootMatchedString + "', character ='" + getCharacter() +
			"', word = '" + getWord() + "', children = (" + children.toString() + ") ]";
	}

	/**
	 * @return the character
	 */
	public String getCharacter()
	{

		return this.character;
	}

	/**
	 * Add the word into the Trie index.
	 * 
	 * Recurses down the tree until all of the characters in word have been placed.
	 * 
	 * @param object
	 * @return the node that the word was finally attached to
	 */
	public TrieNode index(C object)
	{

		String word = configuration.getWord(object);

		return index(word, object);
	}

	/**
	 * 
	 * @param word
	 * @param object
	 * 
	 * @return the node that the word was finally attached to
	 */
	public TrieNode index(String word, C object)
	{

		if (!configuration.isIndexCaseSensitive())
			word = word.toLowerCase();

		return index(word, object, 0);
	}

	public List> getOrderedNodeList()
	{

		return this.orderedNodeList;

	}

	/**
	 * @param word2
	 * @param i
	 * @return
	 */
	private TrieNode index(String word, C context, int startingIndex)
	{


		if (word.length() == startingIndex)
		{

			/*
			 * This is the node that matches the word.
			 * 
			 * First: make sure that this is the first match on this node.
			 * 
			 * Second: insert the object into the first slot in the matchMap. Note: the index is
			 * always zero because we build an uncompressed Trie first.
			 */

			List matchList = this.matchMap.get(0);

			if (matchList == null)
			{
				matchList = new ArrayList();
				this.matchMap.put(0, matchList);
			}
			// note the increase in matches terminating with this node.
			this.totalMatches++;
			matchList.add(context);

			return this;
		}
		else
		{

			// use the character at the starting index to get the next node

			String nextCharacter = word.substring(startingIndex, startingIndex + 1);

			TrieNode nextNode = nodeMap.get(nextCharacter);

			if (nextNode == null)
			{

				String matchedSubString = word.substring(0, startingIndex);

				nextNode = newNode(this, matchedSubString, nextCharacter);
				nodeMap.put(nextCharacter, nextNode);
				orderedNodeList.add(nextNode);

			}

			return nextNode.index(word, context, startingIndex + 1);

		}

	}

	/**
	 * @param nextCharacter
	 * @return
	 */
	protected TrieNode newNode(TrieNode parent, String rootMatchedString, String nextCharacter)
	{

		return this.configuration.createTrieNode(parent, rootMatchedString, nextCharacter);
	}


	/**
	 * 
	 * In the normal case there is a single path through the tree and we identify the node that
	 * matches the prefix.
	 * 
	 * In the any case there will be many nodes since there are multiple paths to a match.
	 * 
	 * @param matchingNodeList
	 * @param substring
	 */
	private void findMatchingNodes(Set> matchingNodeList, ITrieFilter nodeFilter,
		String substring)
	{

		PrefixTrieMatch match = find(substring, nodeFilter);

		if (match != null)
		{
			TrieNode node = match.getNode();

			matchingNodeList.add(node);
			// guaranteed to match the subtree so just exit at this point.
			// this will prevent matching the same words multiple times where the substring is
// small.
			return;
		}

		if (getMaxChildStringLength() < substring.length())
			return; // not enough length in the subtree to match the string so
		// no need to look.

		for (TrieNode trieNode : this.orderedNodeList)
		{

			trieNode.findMatchingNodes(matchingNodeList, nodeFilter, substring);
			// match = trieNode.find(substring);
			//
			// if (match != null)
			// matchingNodeList.add (match.getNode());

		}

	}


	/*
	 * Recursively finds the Node that corresponds to the prefix specificed.
	 */
	public PrefixTrieMatch find(String key, ITrieFilter nodeFilter)
	{


		if (!configuration.isIndexCaseSensitive())
			key = key.toLowerCase();

		/*
		 * Check the current character against the
		 */

		int keyLength = key.length();

		int characterLength = getCharacter().length();

		if (keyLength == characterLength)
		{

			if (getCharacter().equals(key))
			{
				// match
				return new PrefixTrieMatch(getWord(), nodeFilter, this);
			}
			else
			{
				// no match
				return null;
			}
		}
		else if (keyLength > characterLength)
		{

			// compare the 'characterLength' substring of key
			String subKey = key.substring(0, characterLength);

			if (subKey.equals(getCharacter()))
			{
				// matches this node but we still need to compare against the
				// child nodes.

				int difference = keyLength - characterLength;

				String newKey = null;

				newKey = key.substring(keyLength - difference);

				TrieNode nextNode = this.nodeMap.get(newKey.substring(0, 1));

				if (nextNode == null)
					return null;

				return nextNode.find(newKey, nodeFilter);

			}
			else
			{
				return null;
			}

		}
		else
		{
			// keyLength < characterLength
			if (keyLength > 0 && characterLength > 1 && getCharacter().contains(key))
			{
				return new PrefixTrieMatch(getWord(), nodeFilter, this);
			}
			return null;

		}


	}

	/**
	 * @return the parentNode
	 */
	public TrieNode getParentNode()
	{

		return this.parentNode;
	}

	/**
	 * @return the totalMatches
	 */
	public int getTotalMatches()
	{

		return this.totalMatches;
	}

	/**
	 * Traverses up the tree to the root to generate the word that this node represents.
	 * 
	 * @return generated word
	 */
	public String getWord()
	{

		return this.rootMatchedString + this.character;

// StringBuffer buf = new StringBuffer();
//
// TrieNode currentNode = this;
//
// while (currentNode != null) {
// buf.insert(0, currentNode.getCharacter());
//
// currentNode = currentNode.getParentNode();
// }
//
// if (buf.toString().equals(this.rootMatchedString + this.character)) {
// log.info("can remove the looping back to parent.");
// }
// return buf.toString();
	}

	/**
	 * @param wordList
	 * @param filter
	 * @param limit
	 *            -1 if no limit or the value at which point the list should stop being filled in.
	 */
	public void buildWordList(List wordList, ITrieFilter filter, int limit)
	{

// if (this.orderedNodeList.size() == 0
// && (filter == null || filter.isVisible(this))) {
// // can be null in certain cases where the match is to an empty Trie.
// addExistingContextToList(wordList, filter);
//
// } else {

		log.debug(orderedNodeList.toString());

		addExistingContextToList(wordList, filter, limit);

		if (wordList.size() == limit)
			return;

		for (TrieNode node : orderedNodeList)
		{
			node.buildWordList(wordList, filter, limit);

			if (wordList.size() == limit)
				return;

		}
// }

	}

	private void addExistingContextToList(List wordList, ITrieFilter filter, int limit)
	{

		List keyList = new LinkedList();

		keyList.addAll(matchMap.keySet());

		/*
		 * Sort numerically so that order is based on shortest match first.
		 */
		Collections.sort(keyList);

		for (Integer i : keyList)
		{

			List contextList = this.matchMap.get(i);

			// check with the filter to only include those with the proper

			for (C c : contextList)
			{

				if (filter.isVisible(c))
				{
					wordList.add(c);

					if (wordList.size() == limit)
						return;
				}


			}


		}
	}

	/**
	 * Visit each of the TrieNodes in the Trie according to the order?
	 * 
	 * @param v
	 */
	public void visit(ITrieNodeVisitor v)
	{

		v.visit(this);

		for (TrieNode child : this.orderedNodeList)
		{
			child.visit(v);
		}
	}


	/**
	 * Called once all the keys have been added into the Trie.
	 * 
	 * This will compact the nodes so that there will be no single child nodes which will help to
	 * reduce the memory usage.
	 * 
	 * 
	 */
	public void simplify()
	{

		Collections.sort(orderedNodeList, trieNodeComparator);

		/*
		 * simplify our children first.
		 */
		while (this.orderedNodeList.size() == 1)
		{
			// consolidate with the subnode
			// remove the sub node and set us as the parent to their
			// children.
			TrieNode onlyChild = this.orderedNodeList.remove(0);

			this.nodeMap.clear();

			String childCharacter = onlyChild.getCharacter();

			for (int i = 0; i < childCharacter.length(); i++)
			{

				String c = childCharacter.substring(i, i + 1);

				// we grow our character to represent both
				this.character = this.character + c;

				List childContext = onlyChild.matchMap.get(i);

				if (childContext != null && childContext.size() > 0)
				{
					int matchIndex = this.character.length() + i;

					List ourContext = this.matchMap.get(i);

					if (ourContext == null)
					{
						// insert the child context as our own.
						this.matchMap.put(Integer.valueOf(matchIndex), childContext);
					}
					else
					{
						// append the child context to our own
						ourContext.addAll(childContext);
					}
				}

			}

			this.nodeMap = onlyChild.nodeMap;
			this.orderedNodeList = onlyChild.orderedNodeList;

			this.totalMatches += onlyChild.totalMatches;

			for (TrieNode n : orderedNodeList)
			{

				// adjust the parent reference
				n.parentNode = this;
			}

		}

		/*
		 * then simplify our children
		 */

		for (TrieNode n : orderedNodeList)
		{

			n.simplify();

		}

	}

	/**
	 * 
	 * @return the set of strings that map to the next nodes.
	 * 
	 */
	public Set getNextNodeCharacterSet()
	{
		return this.nodeMap.keySet();
	}

	public void setNodeID(Long nodeID)
	{
		this.nodeID = nodeID;
		// TODO Auto-generated method stub

	}

	/**
	 * @return the nodeID
	 */
	public Long getNodeID()
	{
		return nodeID;
	}

	/**
	 * 
	 * @return the ordered list of matches for this node
	 */
	public List getOrderedMatchList()
	{

		List matchList = new LinkedList();

		List keyList = new ArrayList();

		keyList.addAll(matchMap.keySet());

		Collections.sort(keyList);

		for (Integer key : keyList)
		{

			matchList.addAll(matchMap.get(key));

		}

		return matchList;

	}


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy