All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.Trie Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * Trie.java
 * Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.core;

import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;

import javax.swing.tree.DefaultMutableTreeNode;

/**
 * A class representing a Trie data structure for strings. See also Trie on
 * WikiPedia.
 * 
 * @author fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 10203 $
 */
public class Trie implements Serializable, Cloneable, Collection,
  RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = -5897980928817779048L;

  /**
   * Represents a node in the trie.
   * 
   * @author fracpete (fracpete at waikato dot ac dot nz)
   * @version $Revision: 10203 $
   */
  public static class TrieNode extends DefaultMutableTreeNode implements
    RevisionHandler {

    /** for serialization */
    private static final long serialVersionUID = -2252907099391881148L;

    /** the stop character */
    public final static Character STOP = '\0';

    /** for fast access to the children */
    protected Hashtable m_Children;

    /**
     * initializes the node
     * 
     * @param c the value of this node
     */
    public TrieNode(char c) {
      this(new Character(c));
    }

    /**
     * initializes the node
     * 
     * @param c the value of this node
     */
    public TrieNode(Character c) {
      super(c);

      m_Children = new Hashtable(100);
    }

    /**
     * returns the stored character
     * 
     * @return the stored character
     */
    public Character getChar() {
      return (Character) getUserObject();
    }

    /**
     * sets the character this node represents
     * 
     * @param value the character to store
     */
    public void setChar(Character value) {
      setUserObject(value);
    }

    /**
     * adds the given string to its children (creates children if necessary)
     * 
     * @param suffix the suffix to add to its children
     * @return true if the add operation changed the structure
     */
    public boolean add(String suffix) {
      boolean result;
      Character c;
      String newSuffix;
      TrieNode child;

      result = false;
      c = suffix.charAt(0);
      newSuffix = suffix.substring(1);

      // find child and add if necessary
      child = m_Children.get(c);
      if (child == null) {
        result = true;
        child = add(c);
      }

      // propagate remaining suffix
      if (newSuffix.length() > 0) {
        result = child.add(newSuffix) || result;
      }

      return result;
    }

    /**
     * adds the given charater to its children
     * 
     * @param c the character to add
     * @return the generated child node
     */
    protected TrieNode add(Character c) {
      TrieNode child;

      child = new TrieNode(c);
      add(child);
      m_Children.put(c, child);

      return child;
    }

    /**
     * removes the given characted from its children
     * 
     * @param c the character to remove
     */
    protected void remove(Character c) {
      TrieNode child;

      child = m_Children.get(c);
      remove(child);
      m_Children.remove(c);
    }

    /**
     * Removes a suffix from the trie.
     * 
     * @param suffix the suffix to remove
     * @return true if this trie changed as a result of the call
     */
    public boolean remove(String suffix) {
      boolean result;
      Character c;
      String newSuffix;
      TrieNode child;

      c = suffix.charAt(0);
      newSuffix = suffix.substring(1);
      child = m_Children.get(c);

      if (child == null) {
        result = false;
      } else if (newSuffix.length() == 0) {
        remove(c);
        result = true;
      } else {
        result = child.remove(newSuffix);
        if (child.getChildCount() == 0) {
          remove(child.getChar());
        }
      }

      return result;
    }

    /**
     * checks whether a suffix can be found in its children
     * 
     * @param suffix the suffix to look for
     * @return true if suffix was found
     */
    public boolean contains(String suffix) {
      boolean result;
      Character c;
      String newSuffix;
      TrieNode child;

      c = suffix.charAt(0);
      newSuffix = suffix.substring(1);
      child = m_Children.get(c);

      if (child == null) {
        result = false;
      } else if (newSuffix.length() == 0) {
        result = true;
      } else {
        result = child.contains(newSuffix);
      }

      return result;
    }

    /**
     * creates a deep copy of itself
     * 
     * @return a deep copy of itself
     */
    @Override
    public Object clone() {
      TrieNode result;
      Enumeration keys;
      Character key;
      TrieNode child;

      result = new TrieNode(getChar());
      keys = m_Children.keys();
      while (keys.hasMoreElements()) {
        key = keys.nextElement();
        child = (TrieNode) m_Children.get(key).clone();
        result.add(child);
        result.m_Children.put(key, child);
      }

      return result;
    }

    /**
     * Indicates whether some other object is "equal to" this one.
     * 
     * @param obj the object to check for equality
     * @return true if equal
     */
    @Override
    public boolean equals(Object obj) {
      boolean result;
      TrieNode node;
      Enumeration keys;
      Character key;

      node = (TrieNode) obj;

      // is payload the same?
      if (getChar() == null) {
        result = (node.getChar() == null);
      } else {
        result = getChar().equals(node.getChar());
      }

      // check children
      if (result) {
        keys = m_Children.keys();
        while (keys.hasMoreElements()) {
          key = keys.nextElement();
          result = m_Children.get(key).equals(node.m_Children.get(key));
          if (!result) {
            break;
          }
        }
      }

      return result;
    }

    /**
     * returns the node with the given suffix
     * 
     * @param suffix the suffix to look for
     * @return null if unsuccessful otherwise the corresponding node
     */
    public TrieNode find(String suffix) {
      TrieNode result;
      Character c;
      String newSuffix;
      TrieNode child;

      c = suffix.charAt(0);
      newSuffix = suffix.substring(1);
      child = m_Children.get(c);

      if (child == null) {
        result = null;
      } else if (newSuffix.length() == 0) {
        result = child;
      } else {
        result = child.find(newSuffix);
      }

      return result;
    }

    /**
     * returns the common prefix for all the nodes starting with this node. The
     * result includes this node, unless it's the root node or a STOP node.
     * 
     * @return the result of the search
     */
    public String getCommonPrefix() {
      return getCommonPrefix("");
    }

    /**
     * returns the common prefix for all the nodes starting with the node for
     * the specified prefix. Can be null if initial prefix is not found. The
     * result includes this node, unless it's the root node or a STOP node.
     * Using the empty string means starting with this node.
     * 
     * @param startPrefix the prefix of the node to start the search from
     * @return the result of the search, null if startPrefix cannot be found
     */
    public String getCommonPrefix(String startPrefix) {
      String result;
      TrieNode startNode;

      if (startPrefix.length() == 0) {
        startNode = this;
      } else {
        startNode = find(startPrefix);
      }

      if (startNode == null) {
        result = null;
      } else {
        result = startPrefix + startNode.determineCommonPrefix("");
      }

      return result;
    }

    /**
     * determines the common prefix of the nodes.
     * 
     * @param currentPrefix the common prefix found so far
     * @return the result of the search
     */
    protected String determineCommonPrefix(String currentPrefix) {
      String result;
      String newPrefix;

      if (!isRoot() && (getChar() != STOP)) {
        newPrefix = currentPrefix + getChar();
      } else {
        newPrefix = currentPrefix;
      }

      if (m_Children.size() == 1) {
        result = ((TrieNode) getChildAt(0)).determineCommonPrefix(newPrefix);
      } else {
        result = newPrefix;
      }

      return result;
    }

    /**
     * returns the number of stored strings, i.e., leaves
     * 
     * @return the number of stored strings
     */
    public int size() {
      int result;
      TrieNode leaf;

      result = 0;
      leaf = (TrieNode) getFirstLeaf();
      while (leaf != null) {
        if (leaf != getRoot()) {
          result++;
        }
        leaf = (TrieNode) leaf.getNextLeaf();
      }

      return result;
    }

    /**
     * returns the full string up to the root
     * 
     * @return the full string to the root
     */
    public String getString() {
      char[] result;
      TrieNode node;

      result = new char[this.getLevel()];
      node = this;
      while (node.getParent() != null) {
        if (node.isRoot()) {
          break;
        } else {
          result[node.getLevel() - 1] = node.getChar();
        }
        node = (TrieNode) node.getParent();
      }

      return new String(result);
    }

    /**
     * returns the node in a string representation
     * 
     * @return the node as string
     */
    @Override
    public String toString() {
      return "" + getChar();
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /**
   * Represents an iterator over a trie
   * 
   * @author fracpete (fracpete at waikato dot ac dot nz)
   * @version $Revision: 10203 $
   */
  public static class TrieIterator implements Iterator, RevisionHandler {

    /** the node to use as root */
    protected TrieNode m_Root;

    /** the last leaf for this root node */
    protected TrieNode m_LastLeaf;

    /** the current leaf node */
    protected TrieNode m_CurrentLeaf;

    /**
     * initializes the iterator
     * 
     * @param node the node to use as root
     */
    public TrieIterator(TrieNode node) {
      super();

      m_Root = node;
      m_CurrentLeaf = (TrieNode) m_Root.getFirstLeaf();
      m_LastLeaf = (TrieNode) m_Root.getLastLeaf();
    }

    /**
     * Returns true if the iteration has more elements.
     * 
     * @return true if there is at least one more element
     */
    @Override
    public boolean hasNext() {
      return (m_CurrentLeaf != null);
    }

    /**
     * Returns the next element in the iteration.
     * 
     * @return the next element
     */
    @Override
    public String next() {
      String result;

      result = m_CurrentLeaf.getString();
      result = result.substring(0, result.length() - 1); // remove STOP
      if (m_CurrentLeaf != m_LastLeaf) {
        m_CurrentLeaf = (TrieNode) m_CurrentLeaf.getNextLeaf();
      } else {
        m_CurrentLeaf = null;
      }

      return result;
    }

    /**
     * ignored
     */
    @Override
    public void remove() {
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 10203 $");
    }
  }

  /** the root node */
  protected TrieNode m_Root;

  /** the hash code */
  protected int m_HashCode;

  /**
   * whether the structure got modified and the hash code needs to be
   * re-calculated
   */
  protected boolean m_RecalcHashCode;

  /**
   * initializes the data structure
   */
  public Trie() {
    super();

    m_Root = new TrieNode(null);
    m_RecalcHashCode = true;
  }

  /**
   * Ensures that this collection contains the specified element.
   * 
   * @param o the string to add
   * @return true if the structure changed
   */
  @Override
  public boolean add(String o) {
    return m_Root.add(o + TrieNode.STOP);
  }

  /**
   * Adds all of the elements in the specified collection to this collection
   * 
   * @param c the collection to add
   */
  @Override
  public boolean addAll(Collection c) {
    boolean result;
    Iterator iter;

    result = false;

    iter = c.iterator();
    while (iter.hasNext()) {
      result = add(iter.next()) || result;
    }

    return result;
  }

  /**
   * Removes all of the elements from this collection
   */
  @Override
  public void clear() {
    m_Root.removeAllChildren();
    m_RecalcHashCode = true;
  }

  /**
   * returns a deep copy of itself
   * 
   * @return a copy of itself
   */
  @Override
  public Object clone() {
    Trie result;

    result = new Trie();
    result.m_Root = (TrieNode) m_Root.clone();

    return result;
  }

  /**
   * Returns true if this collection contains the specified element.
   * 
   * @param o the object to check for in trie
   * @return true if found
   */
  @Override
  public boolean contains(Object o) {
    return m_Root.contains(((String) o) + TrieNode.STOP);
  }

  /**
   * Returns true if this collection contains all of the elements in the
   * specified collection.
   * 
   * @param c the collection to look for in the trie
   * @return true if all elements were found
   */
  @Override
  public boolean containsAll(Collection c) {
    boolean result;
    Iterator iter;

    result = true;

    iter = c.iterator();
    while (iter.hasNext()) {
      if (!contains(iter.next())) {
        result = false;
        break;
      }
    }

    return result;
  }

  /**
   * checks whether the given prefix is stored in the trie
   * 
   * @param prefix the prefix to check
   * @return true if the prefix is part of the trie
   */
  public boolean containsPrefix(String prefix) {
    return m_Root.contains(prefix);
  }

  /**
   * Compares the specified object with this collection for equality.
   * 
   * @param o the object to check for equality
   */
  @Override
  public boolean equals(Object o) {
    return m_Root.equals(((Trie) o).getRoot());
  }

  /**
   * returns the common prefix for all the nodes
   * 
   * @return the result of the search
   */
  public String getCommonPrefix() {
    return m_Root.getCommonPrefix();
  }

  /**
   * returns the root node of the trie
   * 
   * @return the root node
   */
  public TrieNode getRoot() {
    return m_Root;
  }

  /**
   * returns all stored strings that match the given prefix
   * 
   * @param prefix the prefix that all strings must have
   * @return all strings that match the prefix
   */
  public Vector getWithPrefix(String prefix) {
    Vector result;
    TrieNode node;
    TrieIterator iter;

    result = new Vector();

    if (containsPrefix(prefix)) {
      node = m_Root.find(prefix);
      iter = new TrieIterator(node);
      while (iter.hasNext()) {
        result.add(iter.next());
      }
    }

    return result;
  }

  /**
   * Returns the hash code value for this collection.
   * 
   * @return the hash code
   */
  @Override
  public int hashCode() {
    if (m_RecalcHashCode) {
      m_HashCode = toString().hashCode();
      m_RecalcHashCode = false;
    }

    return m_HashCode;
  }

  /**
   * Returns true if this collection contains no elements.
   * 
   * @return true if empty
   */
  @Override
  public boolean isEmpty() {
    return (m_Root.getChildCount() == 0);
  }

  /**
   * Returns an iterator over the elements in this collection.
   * 
   * @return returns an iterator over all the stored strings
   */
  @Override
  public Iterator iterator() {
    return new TrieIterator(m_Root);
  }

  /**
   * Removes a single instance of the specified element from this collection, if
   * it is present.
   * 
   * @param o the object to remove
   * @return true if this collection changed as a result of the call
   */
  @Override
  public boolean remove(Object o) {
    boolean result;

    result = m_Root.remove(((String) o) + TrieNode.STOP);

    m_RecalcHashCode = result;

    return result;
  }

  /**
   * Removes all this collection's elements that are also contained in the
   * specified collection
   * 
   * @param c the collection to remove
   * @return true if the collection changed
   */
  @Override
  public boolean removeAll(Collection c) {
    boolean result;
    Iterator iter;

    result = false;

    iter = c.iterator();
    while (iter.hasNext()) {
      result = remove(iter.next()) || result;
    }

    m_RecalcHashCode = result;

    return result;
  }

  /**
   * Retains only the elements in this collection that are contained in the
   * specified collection
   * 
   * @param c the collection to use as reference
   * @return true if this collection changed as a result of the call
   */
  @Override
  public boolean retainAll(Collection c) {
    boolean result;
    Iterator iter;
    Object o;

    result = false;
    iter = iterator();
    while (iter.hasNext()) {
      o = iter.next();
      if (!c.contains(o)) {
        result = remove(o) || result;
      }
    }

    m_RecalcHashCode = result;

    return result;
  }

  /**
   * Returns the number of elements in this collection.
   * 
   * @return the number of nodes in the tree
   */
  @Override
  public int size() {
    return m_Root.size();
  }

  /**
   * Returns an array containing all of the elements in this collection.
   * 
   * @return the stored strings as array
   */
  @Override
  public Object[] toArray() {
    return toArray(new String[0]);
  }

  /**
   * Returns an array containing all of the elements in this collection; the
   * runtime type of the returned array is that of the specified array.
   * 
   * @param a the array into which the elements of this collection are to be
   *          stored
   * @return an array containing the elements of this collection
   */
  @Override
  public  T[] toArray(T[] a) {
    T[] result;
    Iterator iter;
    Vector list;
    int i;

    list = new Vector();
    iter = Utils.> cast(iterator());
    while (iter.hasNext()) {
      list.add(iter.next());
    }

    if (Array.getLength(a) != list.size()) {
      result = Utils. cast(Array.newInstance(a.getClass()
        .getComponentType(), list.size()));
    } else {
      result = a;
    }

    for (i = 0; i < list.size(); i++) {
      result[i] = list.get(i);
    }

    return result;
  }

  /**
   * returns the node as String
   * 
   * @param node the node to turn into a string
   * @return the node as string
   */
  protected String toString(TrieNode node) {
    StringBuffer result;
    int i;
    StringBuffer indentation;

    result = new StringBuffer();

    // indent the node
    indentation = new StringBuffer();
    for (i = 0; i < node.getLevel(); i++) {
      indentation.append(" | ");
    }
    result.append(indentation.toString());

    // add the node label
    if (node.getChar() == null) {
      result.append("");
    } else if (node.getChar() == TrieNode.STOP) {
      result.append("STOP");
    } else {
      result.append("'" + node.getChar() + "'");
    }
    result.append("\n");

    // add the children
    for (i = 0; i < node.getChildCount(); i++) {
      result.append(toString((TrieNode) node.getChildAt(i)));
    }

    return result.toString();
  }

  /**
   * returns the trie in string representation
   * 
   * @return the trie as string
   */
  @Override
  public String toString() {
    return toString(m_Root);
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 10203 $");
  }

  /**
   * Only for testing (prints the built Trie). Arguments are added to the Trie.
   * If not arguments provided then a few default strings are uses for building.
   * 
   * @param args commandline arguments
   */
  public static void main(String[] args) {
    String[] data;

    if (args.length == 0) {
      data = new String[3];
      data[0] = "this is a test";
      data[1] = "this is another test";
      data[2] = "and something else";
    } else {
      data = args.clone();
    }

    // build trie
    Trie t = new Trie();
    for (String element : data) {
      t.add(element);
    }
    System.out.println(t);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy