All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.html.Trie Maven / Gradle / Ivy

There is a newer version: 20240325.1
Show newest version
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package org.owasp.html;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

/**
 * A trie used to separate punctuation tokens in a run of non-whitespace
 * characters by preferring the longest punctuation string possible in a
 * greedy left-to-right scan.
 *
 * @author Mike Samuel 
 */
final class Trie {
  private final char[] childMap;
  private final Trie[] children;
  private final boolean terminal;
  private final int value;

  /**
   * @param elements not empty, non null.
   */
  public Trie(Map elements) {
    this(sortedUniqEntries(elements), 0);
  }

  private Trie(List> elements, int depth) {
    this(elements, depth, 0, elements.size());
  }

  /**
   * @param elements not empty, non null.  Not modified.
   * @param depth the depth in the tree.
   * @param start an index into punctuationStrings of the first string in this
   *   subtree.
   * @param end an index into punctuationStrings past the last string in this
   *   subtree.
   */
  private Trie(
      List> elements, int depth,
      int start, int end) {
    this.terminal = depth == elements.get(start).getKey().length();
    if (this.terminal) {
      this.value = elements.get(start).getValue();
      if (start + 1 == end) {  // base case
        this.childMap = ZERO_CHARS;
        this.children = ZERO_TRIES;
        return;
      } else {
        ++start;
      }
    } else {
      this.value = Integer.MAX_VALUE;
    }
    int childCount = 0;
    {
      int last = -1;
      for (int i = start; i < end; ++i) {
        char ch = elements.get(i).getKey().charAt(depth);
        if (ch != last) {
          ++childCount;
          last = ch;
        }
      }
    }
    this.childMap = new char[childCount];
    this.children = new Trie[childCount];
    int childStart = start;
    int childIndex = 0;
    char lastCh = elements.get(start).getKey().charAt(depth);
    for (int i = start + 1; i < end; ++i) {
      char ch = elements.get(i).getKey().charAt(depth);
      if (ch != lastCh) {
        childMap[childIndex] = lastCh;
        children[childIndex++] = new Trie(
          elements, depth + 1, childStart, i);
        childStart = i;
        lastCh = ch;
      }
    }
    childMap[childIndex] = lastCh;
    children[childIndex++] = new Trie(elements, depth + 1, childStart, end);
  }

  /** Does this node correspond to a complete string in the input set. */
  public boolean isTerminal() { return terminal; }

  public int getValue() { return value; }

  /**
   * The child corresponding to the given character.
   * @return null if no such trie.
   */
  public Trie lookup(char ch) {
    int i = Arrays.binarySearch(childMap, ch);
    return i >= 0 ? children[i] : null;
  }

  /**
   * The descendant of this trie corresponding to the string for this trie
   * appended with s.
   * @param s non null.
   * @return null if no such trie.
   */
  public Trie lookup(CharSequence s) {
    Trie t = this;
    for (int i = 0, n = s.length(); i < n; ++i) {
      t = t.lookup(s.charAt(i));
      if (null == t) { break; }
    }
    return t;
  }

  public boolean contains(char ch) {
    return Arrays.binarySearch(childMap, ch) >= 0;
  }

  private static  List> sortedUniqEntries(
      Map m) {
    return new ArrayList>(
        new TreeMap(m).entrySet());
  }

  private static final char[] ZERO_CHARS = new char[0];
  private static final Trie[] ZERO_TRIES = new Trie[0];

  /**
   * Append all strings s such that {@code this.lookup(s).isTerminal()} to the
   * given list in lexical order.
   */
  public void toStringList(List strings) {
    toStringList("", strings);
  }

  private void toStringList(String prefix, List strings) {
    if (terminal) { strings.add(prefix); }
    for (int i = 0, n = childMap.length; i < n; ++i) {
      children[i].toStringList(prefix + childMap[i], strings);
    }
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    toStringBuilder(0, sb);
    return sb.toString();
  }

  private void toStringBuilder(int depth, StringBuilder sb) {
    sb.append(terminal ? "terminal" : "nonterminal");
    ++depth;
    for (int i = 0; i < childMap.length; ++i) {
      sb.append('\n');
      for (int d = 0; d < depth; ++d) {
        sb.append('\t');
      }
      sb.append('\'').append(childMap[i]).append("' ");
      children[i].toStringBuilder(depth, sb);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy