All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wicketstuff.datatable_autocomplete.trie.TrieNode Maven / Gradle / Ivy

/*
 * 
 * ==============================================================================
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.wicketstuff.datatable_autocomplete.trie;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.traversal.NodeFilter;

/**
 * @author mocleiri
 * 
 *         A Node in the Trie
 * 
 *         Contains the character(s) that are contained in this node.
 * 
 *         A map of next character(s) to TrieNode's and the count of the
 *         number of words in the subtree beneath this node.
 * 
 *         The C context object that is indexed by this node.
 * 
 *         The configuration object that provides the string version of an C
 *         object.
 * 
 * 
 * 
 */
public class TrieNode implements Serializable {

	/**
	 * 
	 */
	private static final long serialVersionUID = -3544907989469418291L;

	private static final Logger log = LoggerFactory.getLogger(TrieNode.class);

	private static final Comparator> trieNodeComparator = new Comparator>() {

		public int compare(TrieNode o1, TrieNode o2) {

			return o1.getCharacter().compareTo(o2.getCharacter());

		}
	};

	// the string that corresponds to what is matched between our parent node
	// and the root node of the enire trie.
	private final String rootMatchedString;

	// the character in the word (final character) that this node represents
	// after minimization it may be a couple of characters
	private String character;

	private Map> nodeMap = new HashMap>();

	// child nodes (ordered a-z) so that we can traverse properly
	private List> orderedNodeList = new ArrayList>();

	private TrieNode parentNode;

	// if >1 then this is the total number of strings that terminate on this
	// node
	private int totalMatches = 0;

	// contains the matches for the indexed element in the 'character'
	// i.e. index 0 matches the character.get(0) and index 1 matches the element
	// that matches to character.get(0) + character.get(1)
	private Map> matchMap = new LinkedHashMap>();
	
	// if the entire tree below this node was traversed this would be the length
	// of the longest string formed.
	// this is used when doing a 'superselect' match to know when a branch is
	// not viable.
	private int maxChildStringLength = 0;

	private final ITrieConfiguration configuration;

	private Long nodeID;

	/**
	 * @param context
	 * @param configuration
	 * @param word
	 */
	public TrieNode(TrieNode parentNode, String rootMatchedString,
			String character, ITrieConfiguration configuration) {

		super();

		this.parentNode = parentNode;
		this.rootMatchedString = rootMatchedString;
		this.character = character;

		this.configuration = configuration;

	}

	/**
	 * @return the maxChildStringLength
	 */
	public int getMaxChildStringLength() {

		return this.maxChildStringLength;
	}

	/**
	 * @param maxChildStringLength
	 *            the maxChildStringLength to set
	 */
	public void setMaxChildStringLength(int maxChildStringLength) {

		this.maxChildStringLength = maxChildStringLength;
	}

	public String toString() {

		StringBuffer children = new StringBuffer();

		List nodeList = new ArrayList(this.nodeMap.keySet());

		for (int i = 0; i < nodeList.size() - 1; i++) {

			String node = nodeList.get(i);
			children.append(node);
			children.append(", ");

		}

		if (nodeList.size() > 0)
			children.append(nodeList.get(nodeList.size() - 1));

		return "NODE [ matchedPrefix = '" + rootMatchedString
				+ "', character ='" + getCharacter() + "', word = '"
				+ getWord() + "', children = (" + children.toString() + ") ]";
	}

	/**
	 * @return the character
	 */
	public String getCharacter() {

		return this.character;
	}

	/**
	 * Add the word into the Trie index.
	 * 
	 * Recurses down the tree until all of the characters in word have been
	 * placed.
	 * 
	 * @param object
	 * @return
	 */
	public TrieNode index(C object) {

		String word = configuration.getWord(object);
		
		return index(word, object);
	}
	
	/**
	 * 
	 * @param word
	 * @param object
	 * 
	 * @return return the node that the word was finally attached to
	 */
	public TrieNode index (String word, C object) {
		
		if (!configuration.isIndexCaseSensitive())
			word = word.toLowerCase();
		
		return index(word, object, 0);
	}

	public List> getOrderedNodeList() {

		return this.orderedNodeList;

	}

	/**
	 * @param word2
	 * @param i
	 * @return
	 */
	private TrieNode index(String word, C context, int startingIndex) {

		
		
		
		if (word.length() == startingIndex) {

			/*
			 * This is the node that matches the word.
			 * 
			 * First: make sure that this is the first match on this node.
			 * 
			 * Second: insert the object into the first slot in the matchMap.
			 * Note: the index is always zero because we build an uncompressed
			 * Trie first.
			 */

			List matchList = this.matchMap.get(0);

			if (matchList == null) {
				matchList = new ArrayList();
				this.matchMap.put(0, matchList);
			}
			// note the increase in matches terminating with this node.
			this.totalMatches++;
			matchList.add(context);

			return this;
		} else {

			// use the character at the starting index to get the next node

			String nextCharacter = word.substring(startingIndex,
					startingIndex + 1);

			TrieNode nextNode = nodeMap.get(nextCharacter);

			if (nextNode == null) {

				String matchedSubString = word.substring(0, startingIndex);

				nextNode = newNode(this, matchedSubString, nextCharacter);
				nodeMap.put(nextCharacter, nextNode);
				orderedNodeList.add(nextNode);

			}

			return nextNode.index(word, context, startingIndex + 1);

		}

	}

	/**
	 * @param nextCharacter
	 * @return
	 */
	protected TrieNode newNode(TrieNode parent, String rootMatchedString,
			String nextCharacter) {

		return this.configuration.createTrieNode(parent, rootMatchedString, nextCharacter);
	}

	

	/**
	 * 
	 * In the normal case there is a single path through the tree and we
	 * identify the node that matches the prefix.
	 * 
	 * In the any case there will be many nodes since there are multiple paths
	 * to a match.
	 * 
	 * @param matchingNodeList
	 * @param substring
	 */
	private void findMatchingNodes(Set> matchingNodeList, ITrieFilternodeFilter,
			String substring) {

		PrefixTrieMatch match = find(substring, nodeFilter);

		if (match != null) {
			TrieNode node = match.getNode();
			
			matchingNodeList.add(node);
			// guaranteed to match the subtree so just exit at this point.
			// this will prevent matching the same words multiple times where the substring is small.
			return;
		}

		if (getMaxChildStringLength() < substring.length())
			return; // not enough length in the subtree to match the string so
		// no need to look.

		for (TrieNode trieNode : this.orderedNodeList) {

			trieNode.findMatchingNodes(matchingNodeList, nodeFilter, substring);
			// match = trieNode.find(substring);
			//			
			// if (match != null)
			// matchingNodeList.add (match.getNode());

		}

	}

	
	/*
	 * Recursively finds the Node that corresponds to the prefix specificed.
	 */
	public PrefixTrieMatch find(String key, ITrieFilternodeFilter) {


		if (!configuration.isIndexCaseSensitive())
			key = key.toLowerCase();
		
		/*
		 * Check the current character against the
		 */

		int keyLength = key.length();

		int characterLength = getCharacter().length();

		if (keyLength == characterLength) {

			if (getCharacter().equals(key)) {
				// match
				return new PrefixTrieMatch(getWord(), nodeFilter, this);
			} else {
				// no match
				return null;
			}
		} else if (keyLength > characterLength) {

			// compare the 'characterLength' substring of key
			String subKey = key.substring(0, characterLength);

			if (subKey.equals(getCharacter())) {
				// matches this node but we still need to compare against the
				// child nodes.

				int difference = keyLength - characterLength;

				String newKey = null;

				newKey = key.substring(keyLength - difference);

				TrieNode nextNode = this.nodeMap.get(newKey.substring(0, 1));

				if (nextNode == null)
					return null;

				return nextNode.find(newKey, nodeFilter);

			} else {
				return null;
			}

		} else {
			// keyLength < characterLength
			if (keyLength > 0 && characterLength > 1
					&& getCharacter().contains(key)) {
				return new PrefixTrieMatch(getWord(), nodeFilter, this);
			}
			return null;

		}

		

	}

	/**
	 * @return the parentNode
	 */
	public TrieNode getParentNode() {

		return this.parentNode;
	}

	/**
	 * @return the totalMatches
	 */
	public int getTotalMatches() {

		return this.totalMatches;
	}

	/**
	 * Traverses up the tree to the root to generate the word that this node
	 * represents.
	 * 
	 * @return
	 */
	public String getWord() {

		return this.rootMatchedString + this.character;
		
//		StringBuffer buf = new StringBuffer();
//
//		TrieNode currentNode = this;
//
//		while (currentNode != null) {
//			buf.insert(0, currentNode.getCharacter());
//
//			currentNode = currentNode.getParentNode();
//		}
//		
//		if (buf.toString().equals(this.rootMatchedString + this.character)) {
//			log.info("can remove the looping back to parent.");
//		}
//		return buf.toString();
	}

	/**
	 * @param limit -1 if no limit or the value at which point the list should stop being filled in.
	 * @param prefix
	 * @return
	 */
	public void buildWordList(List wordList, ITrieFilter filter, int limit) {

//		if (this.orderedNodeList.size() == 0
//				&& (filter == null || filter.isVisible(this))) {
//			// can be null in certain cases where the match is to an empty Trie.
//			addExistingContextToList(wordList, filter);
//
//		} else {

			log.debug(orderedNodeList.toString());

			addExistingContextToList(wordList, filter, limit);
			
			if (wordList.size() == limit)
				return;

			for (TrieNode node : orderedNodeList) {
				node.buildWordList(wordList, filter, limit);
				
				if (wordList.size() == limit)
					return;
				
			}
//		}

	}

	private void addExistingContextToList(List wordList, ITrieFilter filter, int limit) {

		List keyList = new LinkedList();

		keyList.addAll(matchMap.keySet());

		/*
		 * Sort numerically so that order is based on shortest match first.
		 */
		Collections.sort(keyList);

		for (Integer i : keyList) {

			List contextList = this.matchMap.get(i);
			
			// check with the filter to only include those with the proper
			
			for (C c : contextList) {
				
				if (filter.isVisible(c)) {
					wordList.add(c);
					
					if (wordList.size() == limit)
						return;
				}
				
				
			}

		
		}
	}

	/**
	 * Visit each of the TrieNodes in the Trie according to the order?
	 * 
	 * @param v
	 */
	public void visit(ITrieNodeVisitor v) {

		v.visit(this);

		for (TrieNode child : this.orderedNodeList) {
			child.visit(v);
		}
	}



	/**
	 * Called once all the keys have been added into the Trie.
	 * 
	 * This will compact the nodes so that there will be no single child nodes
	 * which will help to reduce the memory usage.
	 * 
	 * 
	 */
	public void simplify() {

		Collections.sort(orderedNodeList, trieNodeComparator);

		/*
		 * simplify our children first.
		 */
		while (this.orderedNodeList.size() == 1) {
			// consolidate with the subnode
			// remove the sub node and set us as the parent to their
			// children.
			TrieNode onlyChild = this.orderedNodeList.remove(0);

			this.nodeMap.clear();

			String childCharacter = onlyChild.getCharacter();

			for (int i = 0; i < childCharacter.length(); i++) {

				String c = childCharacter.substring(i, i + 1);

				// we grow our character to represent both
				this.character = this.character + c;

				List childContext = onlyChild.matchMap.get(i);

				if (childContext != null && childContext.size() > 0) {
					int matchIndex = this.character.length() + i;

					ListourContext = this.matchMap.get(i);
					
					if (ourContext == null) {
						// insert the child context as our own.
						this.matchMap
						.put(Integer.valueOf(matchIndex), childContext);
					}
					else {
						// append the child context to our own
						ourContext.addAll(childContext);
					}
				}

			}

			this.nodeMap = onlyChild.nodeMap;
			this.orderedNodeList = onlyChild.orderedNodeList;

			this.totalMatches += onlyChild.totalMatches;

			for (TrieNode n : orderedNodeList) {

				// adjust the parent reference
				n.parentNode = this;
			}

		}

		/*
		 * then simplify our children
		 */

		for (TrieNode n : orderedNodeList) {

			n.simplify();

		}

	}

	/**
	 * 
	 * @return the set of strings that map to the next nodes.
	 * 
	 */
	public Set getNextNodeCharacterSet() {
		return this.nodeMap.keySet();
	}

	public void setNodeID(Long nodeID) {
		this.nodeID = nodeID;
		// TODO Auto-generated method stub
		
	}

	/**
	 * @return the nodeID
	 */
	public Long getNodeID() {
		return nodeID;
	}
	
	/**
	 * 
	 * @return the ordered list of matches for this node
	 */
	public ListgetOrderedMatchList() {
		
		ListmatchList = new LinkedList();
		
		ListkeyList =  new ArrayList();
		
		keyList.addAll(matchMap.keySet());
		
		Collections.sort(keyList);
		
		for (Integer key : keyList) {
			
			matchList.addAll(matchMap.get(key));
			
		}
		
		return matchList;
		
	}
	

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy