All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xapi.collect.impl.StringTrie Maven / Gradle / Ivy

There is a newer version: 0.5
Show newest version
package xapi.collect.impl;


import static xapi.collect.api.CharPool.EMPTY_STRING;

import java.io.Serializable;
import java.util.concurrent.locks.Lock;

import xapi.util.impl.CharPoolTrie;
import xapi.util.impl.Chars;

public class StringTrie {

  private static final char[] emptyString = new char[0];

  /**
   * Our Edge class is one node in the StringTrieEdge graph. It is mutable so we can keep
   * our memory impact light, and volatile so we can stay threadsafe. All of
   * this is at relatively no extra cost to processing time, except for
   * synchronization time on acquiring downward locks during puts (and a little
   * extra processing time for deletes to acquire locks as well). Gwt doesn't
   * pay for synchronization, so it performs optimally in js. Note that it does
   * not hold a parent lock while taking a child lock, so multiple threads can
   * still quickly transverse any potential hotspots, where there is alot of
   * prefix-overlap in strings, such as java packages: com.foo.client.Something
   * com.foo.client.SomethingElse com.foo.server.Something
   * com.bar.client.Something com.bar.server.Something ...etc. The fragment com.
   * would be locked and released before acquiring foo. | bar. This prevents
   * concurrent modifications in different areas of the trie to avoid blocking
   * each other.
   *
   * @author "James X. Nelson ([email protected])"
   */
  protected class TrieEdge implements Serializable {
    private static final long serialVersionUID = 5885970862972987462L;

    protected E value;
    protected volatile TrieEdge greater;
    protected volatile TrieEdge lesser;
    // use char[] instead of string for optimized .toString() on keys.
    // we have to build keysets if requested,
    // and the only way to avoid using buffers or power-of-two guessing,
    // is to iteratively assemble exact length char[]s when assembling keys.
    // this allows parent char[]s to be built once per all their children.
    protected volatile char[] key;

    protected TrieEdge() {
      this(emptyString, 0, 0);
    }

    public TrieEdge(final char[] key, final int index, final int end) {
      if (index == 0 && end == key.length) {
        this.key = key;
        assert key == emptyString || end > 0;
      } else {
        this.key = new char[end - index];
        assert this.key.length > 0;
        System.arraycopy(key, index, this.key, 0, this.key.length);
      }
    }

    @Override
    public String toString() {
      return new String(key);
    }
  }

  // let subclasses do stuff. At least it's final...
  protected final TrieEdge root = new TrieEdge();

  public void put(final char[] key, final int start, final int end, final E value) {
    if (key == null || key.length == 0) {
      root.value = value;
    } else {
      if (start < 0 || end > key.length) throw new ArrayIndexOutOfBoundsException();
      doPut(root, key, start, end, value);
    }
  }

  public void put(final String key, final E value) {
    if (key == null || "".equals(key))
      root.value = value;
    else
      doPut(root, key.toCharArray(), 0, key.length(), value);
  }

  protected void doPut(final TrieEdge into, final char[] key,
    final int index, final int end, final E value) {
    assert index < end;
    // To stay threadsafe, we synchronize on Edges when we modify them.
    // To stay fast, we don't recurse until we are out of the synchro block.

    // We optimize for our worst-case scenario off the hop;
    // which is a deep node transversal (when one node points to many).
    final TrieEdge nextInto;
    int nextIndex;

    final char k = key[index];
    // handle peeking into deeper nodes that will result in recursion.
    final TrieEdge greater = into.greater;
    if (greater != null) {
      assert into.lesser != null;
      final char[] greaterKey = greater.key;
      // deep nodes are stored in greater slot
      if (greaterKey.length == 0) {// this is a deep node!
      // bounds check on its lesser
        if (k - greater.lesser.key[0] >= 0) {
          // if inserted key is not less than the lesser of the deep node,
          // then recurse into the greater, without locking.
          doPut(greater, key, index, end, value);
          return;
        }

        // we are in a deep node, and are less than the greater.
        // check if we need to insert a new deep node.
        synchronized (into) {
          if (greater == into.greater) {
            // The only comod we need to worry about here is the greater node;
            // if the lesser is changed while we were waiting, we're still okay.
            final TrieEdge lesser = into.lesser;
            final int delta = k - lesser.key[0];
            if (delta != 0) {
              final TrieEdge newParent = new TrieEdge();
              newParent.greater = into.greater;

              final TrieEdge newNode = new TrieEdge(key, index, end);
              newNode.value = value;

              if (delta > 0) {
                // new node is greater than current lesser; replace into.greater
                newParent.lesser = newNode;
              } else {
                // new node is less than our lesser; take lesser spot
                // and make the old lesser a new deep node
                newParent.lesser = lesser;
                into.lesser = newNode;
              }
              into.greater = newParent;
              return;// done!
            }
            // we start with the same char as into.lesser;
            // find out how far we match, and possibly recurse.
            if (insertLesser(into, key, index, end, value)) return;
            // if we didn't return, we must recurse into this lesser
            nextInto = into.lesser;
            nextIndex = index + lesser.key.length;
          } else {
            // the trie was modified while we were waiting,
            // recurse, as we need to run the deep checks again.
            nextInto = into;
            nextIndex = index;
          }
        }// end synchro
         // if we didn't return, we need to recurse.
        if (nextIndex == end) {
          nextInto.value = value;
        } else {
          doPut(nextInto, key, nextIndex, end, value);
        }
        return;
      }// end deep node
    }// end into.greater != null

    // because we are only locking on the parent node,
    // but potentially modifying the structure of child nodes,
    // and we don't want to invite deadlock, we only ever iterate downward;
    // we acquire the locks on children before modifying them
    // or reading their lesser / greater nodes.
    synchro: synchronized (into) {
      // into.lesser will only ever be null on the very first put.
      if (into.lesser == null) {
        assert into.greater == null;
        // both null, just take lesser and exit
        into.lesser = new TrieEdge(key, index, end);
        into.lesser.value = value;
        return;
      }
      // start our compare on lesser...
      final char[] lesserKey = into.lesser.key;
      final int deltaLesser = k - lesserKey[0];
      if (deltaLesser == 0) {
        // we match the first char of the lesser.
        if (insertLesser(into, key, index, end, value)) {
          return;
        } else {
          // if we didn't return, we must recurse
          nextInto = into.lesser;
          nextIndex = index + lesserKey.length;
          break synchro;
        }
      }
      // if we are less than the lesser, we need to usurp its position
      if (into.greater == null) {
        // with no greater node, our job is easy. Just fill this node up.
        final TrieEdge newNode = new TrieEdge(key, index, end);
        newNode.value = value;
        if (deltaLesser < 0) {
          into.greater = into.lesser;
          into.lesser = newNode;
        } else {
          into.greater = newNode;
        }
        return;
      }

      // we have to check the greater,
      // which may have changed since we last deep-checked it...
      final char[] greaterKey = into.greater.key;
      if (greaterKey.length == 0) {
        // the greater is now deep and it wasn't before.
        // recurse back into the same node; we can't get back here once deep
        nextInto = into;
        nextIndex = index;
        break synchro;
      }

      if (deltaLesser < 0) {
        // A greater exists, but we still need to usurp lesser
        final TrieEdge newParent = new TrieEdge();
        final TrieEdge newNode = new TrieEdge(key, index, end);
        newNode.value = value;
        newParent.lesser = into.lesser;
        newParent.greater = into.greater;
        into.greater = newParent;
        into.lesser = newNode;
        return;
      }

      // The only thing left to do is run a compare on greater
      final int deltaGreater = k - greaterKey[0];
      if (deltaGreater == 0) {
        // we must insert into the greater, or else recurse
        if (insertGreater(into, key, index, end, value)) return;
        nextInto = into.greater;
        nextIndex = index + into.greater.key.length;
        break synchro;
      }
      // we don't start with greater or lesser, and must create a deep node
      final TrieEdge newParent = new TrieEdge();
      final TrieEdge newNode = new TrieEdge(key, index, end);
      newNode.value = value;
      if (deltaGreater > 0) {
        // new node is the greatest
        newParent.greater = newNode;
        newParent.lesser = into.greater;
      } else {
        newParent.greater = into.greater;
        newParent.lesser = newNode;
      }
      into.greater = newParent;
      return;
    }// end synchro. If we haven't returned, we need to recurse.
    if (nextIndex == end) {
      nextInto.value = value;
    } else {
      doPut(nextInto, key, nextIndex, end, value);
    }
  }

  private boolean insertLesser(TrieEdge into, char[] key, int index, int end, E value) {
    int matchesTo = 1;// only called when we've already matched the first char
    final int keyLen = end - index;
    final char[] lesserKey = into.lesser.key;
    for (; matchesTo < keyLen; matchesTo++) {
      if (matchesTo == lesserKey.length) {
        return false;
      }
      final int delta = key[index + matchesTo] - lesserKey[matchesTo];
      if (delta < 0) {
        // new node is less than lesser
        into.lesser = newEdgeLesser(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
        return true;
      }
      if (delta > 0) {
        // new node is greater than lesser
        into.lesser = newEdgeGreater(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
        return true;
      }
    }
    if (matchesTo == lesserKey.length) {
      return false;
    }
    // If we haven't returned, than the existing key is longer than the one
    // we are inserting. Thus, we must slip the new node behind the old one.
    final TrieEdge newNode = new TrieEdge(key, index, end);
    newNode.value = value;
    final char[] newLesser = new char[lesserKey.length - keyLen];
    System.arraycopy(lesserKey, keyLen, newLesser, 0, newLesser.length);
    newNode.lesser = into.lesser;
    into.lesser = newNode;
    newNode.lesser.key = newLesser;
    return true;
  }

  private boolean insertGreater(TrieEdge into, char[] key, int index, int end, E value) {
    int matchesTo = 1;// only called when we've already matched the first char
    final int keyLen = end - index;
    final char[] greaterKey = into.greater.key;
    for (; matchesTo < keyLen; matchesTo++) {
      if (matchesTo == greaterKey.length) {
        return false;
      }
      final int delta = key[index + matchesTo] - greaterKey[matchesTo];
      if (delta < 0) {
        // new node is less than greater
        into.greater = newEdgeLesser(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
        return true;
      }
      if (delta > 0) {
        // new node is greater than lesser
        into.greater = newEdgeGreater(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
        return true;
      }
    }
    if (matchesTo == greaterKey.length) {
      return false;
    }
    // If we haven't returned, than the existing key is longer than the one
    // we are inserting. Thus, we must slip the new node behind the old one.
    final TrieEdge newNode = new TrieEdge(key, index, end);
    newNode.value = value;
    final char[] newGreater = new char[greaterKey.length - keyLen];
    System.arraycopy(greaterKey, keyLen, newGreater, 0, newGreater.length);
    newNode.greater = into.greater;
    into.greater = newNode;
    newNode.greater.key = newGreater;
    return true;
  }

  protected TrieEdge newEdgeLesser(TrieEdge previous, int keyMax, char[] existing, int matchesTo, char[] key,
    int keyIndex, int keyEnd, E value) {
    // found our break point
    final char[] newRootKey = new char[matchesTo];
    final char[] newExistingKey = new char[existing.length - newRootKey.length];
    final char[] newInsertedKey = new char[keyMax - newRootKey.length];

    // copy the common root into our new parent edge
    System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
    final TrieEdge newRoot = new TrieEdge(newRootKey, 0, newRootKey.length);

    // trim the existing key to it's unique suffix value
    System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
    previous.key = newExistingKey;

    // create a new node for our value
    System.arraycopy(key, keyIndex + newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
    final TrieEdge newEdge = new TrieEdge(newInsertedKey, 0, newInsertedKey.length);
    newEdge.value = value;

    assert newRoot.key.length > 0;
    assert previous.key.length > 0;
    assert newEdge.key.length > 0;

    newRoot.lesser = newEdge;
    newRoot.greater = previous;
    assert newEdge.toString().compareTo(previous.toString()) < 0 : "Invalid greaterthan: " + newEdge +
      " is not < " + previous;
    return newRoot;
  }

  protected TrieEdge newEdgeGreater(TrieEdge previous, int keyMax, char[] existing, int matchesTo, char[] key,
    int keyIndex, int keyEnd, E value) {
    // found our break point
    final char[] newRootKey = new char[matchesTo];
    final char[] newExistingKey = new char[existing.length - newRootKey.length];
    final char[] newInsertedKey = new char[keyMax - newRootKey.length];

    // copy the common root into our new parent edge
    System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
    final TrieEdge newRoot = new TrieEdge(newRootKey, 0, newRootKey.length);

    // trim the existing key to it's unique suffix value
    System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
    previous.key = newExistingKey;

    // create a new node for our value
    System.arraycopy(key, keyIndex + newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
    final TrieEdge newEdge = new TrieEdge(newInsertedKey, 0, newInsertedKey.length);
    newEdge.value = value;

    assert newRoot.key.length > 0;
    assert previous.key.length > 0;
    assert newEdge.key.length > 0;

    newRoot.lesser = previous;
    newRoot.greater = newEdge;
    assert newEdge.toString().compareTo(previous.toString()) > 0;

    return newRoot;
  }

  /**
   * @param into - The edge to lock
   * @param ownsParent - Whether we already own an explicit lock on the parent.
   * @return - Any object you want; null will do fine. This method is provided
   * as a stub for more sophisticated, concurrent subclasses which may want to
   * employ locking mechanisms (or event dispatch). You may call
   * {@link Object#wait(long, int)}; as you already own the lock. long param is
   * millis, should be zero. int param is nanos, keep it in the hundreds. DON'T
   * DO ANYTHING WHICH COULD BLOCK FOR A LONG TIME. Acquire locks tentatively,
   * either with {@link Lock#tryLock()} for failfast, or
   * {@link Lock#tryLock(long, java.util.concurrent.TimeUnit)}. Wait times, if
   * any, should be on a nano scale; If ownsParent is false, you should be
   * running in unsynchronized code. The only use for synchronous method blocks
   * in this case is to acquire a {@link Lock}. If ownsParent is true, you are
   * safe from intrusion from above (nobody will be able to modify your parent),
   * but you still have to contend
   */
  protected void lock(TrieEdge into, boolean ownsParent) {

  }

  /**
   * @param into - The edge to lock
   * @param ownsParent - If true, you are already synchronized on into.
   * @param cursor - Whatever object you returned when you locked. This method
   * is a stub for more sophisticated subclasses of StringTrie, which may need
   * to perform proper concurrent locking, or event dispatch. It is called in
   * the finally block of whatever code ran
   * {@link StringTrie#lock(StringTrieEdge, boolean)}. If you use Edge into.wait(0, nanos)
   * in lock(), now would be a great time to call into into.notify() :)
   */
  protected void unlock(TrieEdge into, boolean ownsParent) {
  }

  @Override
  public String toString() {
    final StringBuilder b = new StringBuilder();
    b.append("StringTrie[\n");
    if (root.value != null) {
      b.append("\"\" : " + root.value + "\n");
    }
    if (root.greater != null) {
      visit(root.greater, 1, new char[0], b);
    }
    if (root.lesser != null) {
      visit(root.lesser, 1, new char[0], b);
    }
    b.append("]");
    return b.toString();
  }

  private void visit(TrieEdge root, int depth, char[] key, StringBuilder b) {
    boolean anyKey = key.length > 0;
    if (root.key.length > 0) {
      for (int i = 0; i < depth; i++) {
        b.append(' ');
      }
      b.append(root.key);
      b.append("\t\t");
      if (root.value == null) {
        b.append("[branch]");
      } else {
        b.append(root.value);
      }
      b.append('\n');
    }
    if (root.lesser != null) {
      // visit lesser edge
      char[] childKey = root.lesser.key;
      if (anyKey) {
        // assert childKey.length > 0 : b;
        char[] nextKey = new char[key.length + childKey.length];
        System.arraycopy(key, 0, nextKey, 0, key.length);
        System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
        childKey = nextKey;
        nextKey = null;
      }
      visit(root.lesser, depth + (anyKey ? 1 : 0), childKey, b);
      childKey = null;
    }
    if (root.greater != null) {
      // visit greater edge
      char[] childKey = root.greater.key;
      if (anyKey) {
        char[] nextKey = new char[key.length + childKey.length];
        System.arraycopy(key, 0, nextKey, 0, key.length);
        System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
        childKey = nextKey;
        nextKey = null;
      }
      boolean addSpace = anyKey && root.greater.key.length > 0;
      visit(root.greater, depth + (addSpace ? 1 : 0), childKey, b);
      childKey = null;
    }
  }

  public E get(String key) {
    if (key == null) return get(EMPTY_STRING);
    return get(new Chars(key.toCharArray()), 0, key.length());
  }

  public E get(char[] key) {
    if (key == null) key = EMPTY_STRING;
    return get(new Chars(key), 0, key.length);
  }

  public E get(char[] key, int pos, int end) {
    if (key == null) key = EMPTY_STRING;
    return get(new Chars(key, pos, end), pos, end);
  }

  public E get(final Chars keys, int pos, final int end) {
    TrieEdge e = root;
    while (e != null) {
      // our test for success is always when we make it through a for loop
      // which matches our key, and when the next search position = key length.
      // if there was a value at this key, we would have returned it.
      if (pos == end) return returnValue(e, keys, pos, end);

      if (e.lesser != null) {
        final char[] lesser = e.lesser.key;
        testlesser: {
          for (int i = 0; i < lesser.length; i++) {
            if (end <= pos + i) return onEmpty(e, keys, pos, end);
            final int delta = keys.charAt(pos + i) - lesser[i];
            if (delta < 0) {
              // if a lesser is greater than us, there's nothing to return
              return onEmpty(e, keys, pos, end);
            }
            if (delta > 0) {
              break testlesser;
            }
          }// end for
           // if we didn't break, we equal the lesser. Descend into it.
          e = e.lesser;
          pos += lesser.length;
          continue;
        }// end test lesser
         // requested key is greater than lesser key. Carry on.
      }// end lesser

      if (e.greater == null) return onEmpty(e, keys, pos, end);
      final char[] greater = e.greater.key;
      if (greater.length == 0) {
        // deep node, just continue search
        e = e.greater;
        continue;
      }
      final int len = greater.length;
      if (len + pos > end) return onEmpty(e, keys, pos, end);
      for (int i = 0; i < len; i++) {
        if (keys.charAt(pos + i) != greater[i]) return onEmpty(e, keys, pos, end);
      }
      pos += len;
      // still haven't returned, so we match this greater
      e = e.greater;
    }
    return onEmpty(e, keys, pos, end);
  }

  public Iterable findPrefixed(String prefix) {
    return null;
  }

  public void compress(CharPoolTrie charPoolTrie) {

  }

  protected E returnValue(TrieEdge e, Chars keys, int pos, int end) {
    return e.value;
  }

  protected E onEmpty(TrieEdge e, Chars keys, int pos, int end) {
    return null;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy