All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.squarespace.less.match.DATBuilder Maven / Gradle / Ivy

The newest version!
package com.squarespace.less.match;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Builds a double-array trie (DAT) for fast incremental lookup of key / value pairs. We
 * do not store the actual value in the trie, only its index in the entries list provided
 * at build time.
 *
 * WARNING: building large tries at runtime can be time-consuming. You can pre-build the
 * arrays, embed them into a class and construct the DAT at runtime from these components.
 */
class DATBuilder {

  /**
   * Double-array trie fields.
   */
  private int[] base;
  private int[] check;

  /**
   * Indices, preserving original order.
   */
  private int[] indices;

  // The following fields are temporary, used only for building the DAT.

  private Map indexmap;
  private List input;
  private boolean[] used;
  private int pos;
  private final boolean ignoreCase;

  DATBuilder(List input) {
    this(input, false);
  }

  DATBuilder(List input, boolean ignoreCase) {
    this.input = input;
    this.ignoreCase = ignoreCase;
  }

  /**
   * Base array.
   */
  public int[] base() {
    return base;
  }

  /**
   * Check array.
   */
  public int[] check() {
    return check;
  }

  /**
   * Values indices.
   */
  public int[] indices() {
    return indices;
  }

  /**
   * Build a double-array trie, populating the 'base', 'check', and 'values' arrays.
   */
  public void build() {
    // Copy and sort the input keys
    List sorted = new ArrayList<>();

    // Preserve indices of input order.
    this.indexmap = new HashMap<>();
    for (int i = 0; i < input.size(); i++) {
      String key = input.get(i);
      if (ignoreCase) {
        key = key.toLowerCase();
      }
      if (indexmap.containsKey(key)) {
        throw new IllegalArgumentException(
            "keys must be unique. duplicate found: '" + key + "'");
      }
      indexmap.put(key, i);
      sorted.add(key);
    }

    // Keys should be sorted due to the way the tree is built.
    Collections.sort(sorted);

    N root = new N();
    root.left = 0;
    root.right = sorted.size();
    root.depth = 0;

    // Size of the trie depends on the nature of the characters in the keys.
    int size = 65000;
    while (true) {
      try {
        alloc(size);
        // Get the siblings of the root node. This is the set of first
        // characters for all strings.
        List siblings = tree(sorted, root);
        build(sorted, siblings);
        break;
      } catch (ArrayIndexOutOfBoundsException e) {
        // Size was too conservative, increase it by a bit
        size += size / 5;
      }
    }

    trim();

    // Clear the temporary variables
    this.indexmap = null;
    this.input = null;
    this.used = null;
  }

  /**
   * Trim arrays to minimal size.
   */
  private void trim() {
    int len = base.length;
    while (len >= 0 && base[len - 1] == 0) {
      len--;
    }

    int[] newbase = new int[len];
    System.arraycopy(base, 0, newbase, 0, len);
    this.base = newbase;

    int[] newcheck = new int[len];
    System.arraycopy(check, 0, newcheck, 0, len);
    this.check = newcheck;

    int[] newindices = new int[len];
    System.arraycopy(indices, 0, newindices, 0, len);
    this.indices = newindices;
  }

  private void alloc(int sz) {
    base = new int[sz];
    check = new int[sz];
    indices = new int[sz];
    used = new boolean[sz];
    pos = 0;
    base[0] = 1;
    Arrays.fill(indices, -1);
  }

  /**
   * 
   * To construct the tree, we recursively locate all siblings having
   * the given 'parent' prefix.
   *
   * For the strings ["ANT", "AT", "OAKS", "OAR"] this produces the tree:
   *
   * [root]
   *    │
   *    ├─ A
   *    │  ├─ N
   *    │  │  └─ T
   *    │  │     └─ 0
   *    │  │
   *    │  └─ T
   *    │     └─ 0
   *    └─ O
   *       └─ A
   *          ├─ K
   *          │  └─ S
   *          │     └─ 0
   *          └─ R
   *             └─ 0
   *
   * The siblings are all characters at the same depth under the
   * same prefix, e.g. {'N', 'T'} are siblings immediately under 'A',
   * and the siblings {'K', 'R'} share the same prefix "OA".
   *
   * The zeros distinguish prefixes from full matches, e.g. the trie
   * does not contain the key "AN" even though it is a prefix of "ANT".
   *
   * To construct the tree we scan all keys under the parent in order
   * from left to right. For the strings in the above example this routine
   * scans from top to bottom, with tree depth 1-4 and computed siblings
   * on the right:
   *
   *  depth         inputs       sibling sets
   *  ----------------------------------------------
   *    1:  'A' 'A' 'O' 'O'  → {'A',       'O'}
   *    2:  'N' 'T' 'A' 'A'  → {'N', 'T'} {'A'}
   *    3:  'T'     'K' 'R'  → {'T'}      {'K', 'R'}
   *    4:          'S'      →            {'S'}
   *
   * 
*/ private List tree(List keys, N parent) { List sibs = new ArrayList<>(); int prev = 0; for (int i = parent.left; i < parent.right; i++) { String key = keys.get(i); // Check if all chars for this key have been scanned if (key.length() < parent.depth) { continue; } // Locate the character in this string at the current depth, // e.g. at root depth 0 we're using the first character of each string. int curr = 0; if (key.length() != parent.depth) { curr = (int)key.charAt(parent.depth); } // If this is the first time we've seen this character, create // a sibling node at this level. if (curr != prev || sibs.isEmpty()) { N n = new N(); n.depth = parent.depth + 1; n.code = curr; n.left = i; // Check if this is the last character in the current key and map // it to the index of its corresponding value. char last_char = key.charAt(key.length() - 1); if (curr == 0 || curr == last_char) { n.index = indexmap.get(key); } if (!sibs.isEmpty()) { // Set right point of previous sibling to the current sibling sibs.get(sibs.size() - 1).right = i; } sibs.add(n); } prev = curr; } if (!sibs.isEmpty()) { sibs.get(sibs.size() - 1).right = parent.right; } return sibs; } /** * Build the DAT by indexing the given siblings. */ private int build(List keys, List siblings) { int sz = siblings.size(); N nzero = siblings.get(0); // Find the starting offset for this level int index = nzero.code > pos ? nzero.code : pos; boolean is_first = true; // Scan for a block of empty slots to index the siblings. int begin = 0; loop: while (true) { index++; if (check[index] != 0) { continue; } if (is_first) { pos = index; is_first = false; } begin = index - nzero.code; if (used[begin]) { continue; } for (int i = 1; i < sz; i++) { N n = siblings.get(i); if (check[begin + n.code] != 0) { continue loop; } } break; } used[begin] = true; // Initialize the 'check' cell for each sibling for (int i = 0; i < sz; i++) { N n = siblings.get(i); check[begin + n.code] = begin; } // Iterate over the siblings and recursively index them for (int i = 0; i < sz; i++) { // Get the siblings that are a prefix of 'n' N n = siblings.get(i); List newsibs = tree(keys, n); // Pointer to the cell for the current node int j = begin + n.code; if (newsibs.isEmpty()) { // No siblings for this prefix, so we've reached a leaf. base[j] = -1; check[j] = 1; indices[j] = n.index; } else { // Create the link to the next level int h = build(keys, newsibs); base[j] = h; } } return begin; } /** * Node in the temporary tree. */ static class N { int code; int left; int right; int depth; int index; @Override public String toString() { return "N('" + (char)code + "' (" + code + ") " + " L=" + left + " R=" + right + " D=" + depth + " I=" + index + ")"; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy