All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.squarespace.less.core.HashPrefixTree Maven / Gradle / Ivy

/**
 * Copyright, 2015, Squarespace, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.squarespace.less.core;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;


/**
 * HashPrefixTree is a structure where each key is represented by a list of
 * parts each of type {@code K}.  Keys that share the same prefix will
 * reuse the same interior nodes.  The tree supports searching full matching,
 * prefix matching, and searching by all subsequences of a key.
 *
 * Additionally, each key can have one or more values associated with it.
 *
 * Each level in the tree uses a simple hash table that uses chaining
 * for hash collision overflow.
 *
 * Keys can only be added to the HashTrie, not removed. Removal is currently
 * not a requirement.
 */
public class HashPrefixTree {

  /**
   * Initial capacity of the root node.
   */
  private static final int ROOT_CAPACITY = 32;

  /**
   * Initial capacity of interior nodes.
   */
  private static final int INTERIOR_CAPACITY = 4;

  /**
   * Load factor determining the sparseness of each hash table.
   */
  private static final float LOAD_FACTOR = 0.75f;

  /**
   * Comparator for key equivalence.
   */
  private final HPTKeyComparator comparator;

  /**
   * Seed value to mix into keys' hash codes.
   */
  private final int hashSeed;

  /**
   * Root of the tree.
   */
  private HPTNode root;

  /**
   * Sequence of key ids for this tree.
   */
  private int keyIdSequence;

  /**
   * Construct a tree with the given key comparator, using the current
   * system timestamp as the hash seed.
   */
  public HashPrefixTree(HPTKeyComparator comparator) {
    this(comparator, (int)System.currentTimeMillis());
  }

  /**
   * Construct a tree with the given key comparator, using the user-provided
   * hash seed.
   */
  public HashPrefixTree(HPTKeyComparator comparator, int hashSeed) {
    this.comparator = comparator;
    this.hashSeed = hashSeed;
  }

  /**
   * Returns the root of the tree.
   */
  public HPTNode root() {
    return root;
  }

  /**
   * Inserts the given key and returns the corresponding node.
   */
  public HPTNode insert(List key) {
    check(key);
    init();
    int size = key.size();
    HPTNode current = root;
    for (int i = 0; i < size; i++) {
      HPTNode result = find(current, i, key.get(i), true);
      current.maxDepth = Math.max(current.maxDepth, size - i);
      current = result;
    }
    return current;
  }

  /**
   * Search for the key and return the list of values, or null if the key
   * is not found or has no associated values.  This method also avoids
   * searching for keys that are not long enough to result in a full match,
   * based on the tree depth.
   */
  public HPTNode search(List key) {
    check(key);
    int size = key.size();

    // Start at the root, and only search lower in the tree if it is
    // large enough to produce a match.
    HPTNode current = root;
    for (int i = 0; i < size; i++) {
      if (current == null || (size - i) > current.maxDepth()) {
        return null;
      }
      current = find(current, i, key.get(i), false);
    }
    if (current == null || current.values == null) {
      return null;
    }
    return current.values.isEmpty() ? null : current;
  }

  /**
   * Match all subsequences of the given key.
   *
   * Given the key ["a", "b", "c", "d"] this will scan subsequences in the order
   * below. Searches marked with "current=root" indicate the search starts back
   * at the root.
   * 
   * Step  Loop state                          Search key
   *
   *  1.   [current=root,   start=0, i=0]      ["a"]
   *  2.   [current=level1, start=0, i=1]      ["a", "b"]
   *  3.   [current=level2, start=0, i=2]      ["a", "b", "c"]
   *  4.   [current=level3, start=0, i=3]      ["a", "b", "c", "d"]
   *
   *  5.   [current=root,   start=1, i=1]      ["b"]
   *  6.   [current=level1, start=1, i=2]      ["b", "c"]
   *  7.   [current=level2, start=1, i=3]      ["b", "c", "d"]
   *
   *  8.   [current=root,   start=2, i=2]      ["c"]
   *  9.   [current=level1, start=2, i=3]      ["c", "d"]
   *
   * 10.   [current=root,   start=3, i=3]      ["d"]
   * 
* Only subsequences that find at least one value will result in a match. Given * the expected sparseness of the tree, the vast majority of these searches * will fail fast. For example, if the root does not contain "a" searching would * skip to step 5 above. */ public List> searchSubsequences(List key) { return searchSubsequences(key, null); } public List> searchSubsequences(List key, Set dupeFilter) { check(key); // List of matches initialized on first match List> matches = null; int size = key.size(); int start = 0; // Outer loop moves the start index towards the end of the key. while (start < size) { // Start at the root for each starting index. HPTNode current = root; for (int i = start; i < size; i++) { K keyPart = key.get(i); current = find(current, start - i, keyPart, false); // Nothing found, move to next start index. if (current == null) { break; } // Check if the current node has values as we move lower into // the tree. if (current.values != null && !current.values.isEmpty()) { if (matches == null) { matches = new ArrayList<>(); } // If a dupe filter is present, avoid duplicate matches. if (dupeFilter == null || !dupeFilter.contains(current.keyId)) { // Add the match start/end indices, where end is exclusive. matches.add(new HPTMatch<>(start, i + 1, current.keyId, current.values)); } } } start++; } return matches; } /** * Makes sure the key is valid for insertion, searching. */ private void check(List key) { if (key == null || key.isEmpty()) { throw new IllegalArgumentException("key must be a list of size >= 1"); } } /** * Initializes the tree's storage on first insert. */ private void init() { if (root == null) { root = buildRoot(); } } /** * Locate a node in the {@code node}'s hash table. If the {@code create} parameter * is true we create missing nodes. */ private HPTNode find(HPTNode node, int keyIndex, K keyPart, boolean create) { // Ensure the table is initialized and properly sized if (create) { expand(node, LOAD_FACTOR); } // No table allocated, bail out. if (node == null || node.children == null) { return null; } // Hash the key to obtain its index in the hash table, then search the // chain to find the first match. int index = hashKey(keyPart) & (node.children.length - 1); // Search the table and overflow chain. HPTNode result = node.children[index]; while (result != null) { if (comparator.keysEqual(keyIndex, keyPart, result.keyPart)) { return result; } result = result.next; } // Create the missing node if requested, and move it to the head of the // collision overflow chain. if (create) { return create(node, index, keyPart); } return result; } private HPTNode create(HPTNode parent, int index, K keyPart) { HPTNode result = new HPTNode<>(keyIdSequence++, keyPart); result.next = parent.children[index]; parent.children[index] = result; parent.size++; return result; } /** * Expands the {@code node}'s child array, using the load factor to determine * the degree of sparseness. */ private void expand(HPTNode node, float loadFactor) { if (node.children == null) { node.children = reallocate(INTERIOR_CAPACITY); return; } else { double threshold = Math.floor(node.children.length * loadFactor) - 1; if (node.size < threshold) { return; } } // Resize the array HPTNode[] original = node.children; int capacity = node.children.length * 2; node.children = reallocate(capacity); // Recompute the hashes of all members and add them to the new array. int mask = capacity - 1; for (int i = original.length - 1; i >= 0; i--) { HPTNode elem = original[i]; while (elem != null) { int index = hashKey(elem.keyPart) & mask; HPTNode next = elem.next; elem.next = node.children[index]; node.children[index] = elem; elem = next; } } } /** * Allocates an array of nodes of the given capacity. */ @SuppressWarnings("unchecked") protected static HPTNode[] reallocate(int capacity) { return (HPTNode[]) new HPTNode[capacity]; } /** * Hash mixing function. Attempts to distribute bits of the hash value by XOR-ing * some of the higher-order bits. Also mixes in the seed value. */ protected int hashKey(K key) { // Murmur3's mix + seed int h = hashSeed ^ key.hashCode(); h ^= h >>> 16; h *= 0x85ebca6b; h ^= h >>> 13; h *= 0xc2b2ae35; h ^= h >>> 16; return h; } /** * Constructs the root of the tree with a larger initial capacity. */ private HPTNode buildRoot() { HPTNode node = new HPTNode(keyIdSequence++, null); node.children = reallocate(ROOT_CAPACITY); return node; } /** * A node of the tree. */ public static class HPTNode { /** * Unique id of this key within this tree structure. We use * this to filter out potential repeated matches. For example, * when we're doing 2 separate queries that are logically * part of the same lookup, we want to avoid returning duplicate * results on successive calls */ private final int keyId; /** * Part of the key. */ private final K keyPart; /** * List of values associated with this node. This is populated only if * the key segment represents the end of the key. */ private List values; /** * Child node hash table. */ private HPTNode[] children; /** * Size of the child node hash table. */ private int size; /** * Link to the next node which hashes to the same value. Used for * hash collision overflow. */ private HPTNode next; /** * Maximum depth of the tree below this point. We track this as an * optimization, to avoid scanning lower levels of the tree that are * too shallow to produce a full match. */ private int maxDepth; /** * Builds a new node for the given key segment. */ public HPTNode(int keyId, K keyPart) { this.keyId = keyId; this.keyPart = keyPart; } public int keyId() { return keyId; } /** * Returns the maximum depth of the tree below this node. */ public int maxDepth() { return maxDepth; } /** * Returns the size of this node's child hash table. */ public int size() { return size; } /** * Returns the list of values associated with this node. */ public List values() { return values; } /** * Appends a value to this leaf node. */ public HPTNode append(V value) { if (values == null) { values = new ArrayList<>(2); } values.add(value); return this; } } /** * Represents a partial key match. Collects the start and end indices * of the key, representing the segment of the key which matched. The * end index is exclusive. Also collects all values matched by this * key segment. */ // TODO: make and return nodes, not values. public static class HPTMatch { private final int start; private final int end; private final int keyId; private final List values; public HPTMatch(int start, int end, int keyId, List values) { this.start = start; this.end = end; this.keyId = keyId; this.values = values; } /** * Start index of the segment of the key that matched. */ public int start() { return start; } /** * End index of the segment of the key that matched, exclusive. */ public int end() { return end; } /** * Unique id of the key corresponding to this match. */ public int keyId() { return keyId; } /** * Values found by this match. */ public List values() { return values; } public String toString() { return "Match[" + start + ", " + end + "] = " + values; } } /** * Compares key segments, with knowledge about position within the key. */ public interface HPTKeyComparator { boolean keysEqual(int queryKeyIndex, K queryKey, K treeKey); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy