All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xapi.collect.impl.StringTrie_2 Maven / Gradle / Ivy

Go to download

Everything needed to run a comprehensive dev environment. Just type X_ and pick a service from autocomplete; new dev modules will be added as they are built. The only dev service not included in the uber jar is xapi-dev-maven, as it includes all runtime dependencies of maven, adding ~4 seconds to build time, and 6 megabytes to the final output jar size (without xapi-dev-maven, it's ~1MB).

The newest version!
package xapi.collect.impl;


import static xapi.collect.api.CharPool.EMPTY_STRING;

import java.io.Serializable;
import java.util.concurrent.locks.Lock;


public class StringTrie_2  extends StringTrie{

  private static final char[] emptyString = new char[0];
  /**
   * Our Edge class is one node in the Trie graph.
   * It is mutable so we can keep our memory impact light,
   * and volatile so we can stay threadsafe.
   *
   * All of this is at relatively no extra cost to processing time,
   * except for synchronization time on acquiring downward locks during puts
   * (and a little extra processing time for deletes to acquire locks as well).
   * Gwt doesn't pay for synchronization, so it performs optimally in js.
   *
   * Note that it does not hold a parent lock while taking a child lock,
   * so multiple threads can still quickly transverse any potential hotspots,
   * where there is alot of prefix-overlap in strings, such as java packages:
   * com.foo.client.Something
   * com.foo.client.SomethingElse
   * com.foo.server.Something
   * com.bar.client.Something
   * com.bar.server.Something
   * ...etc.
   * The fragment com. would be locked and released before acquiring foo. | bar.
   * This prevents concurrent modifications in different areas of the trie
   * to avoid blocking each other.
   *
   * @author "James X. Nelson ([email protected])"
   *
   */
  protected class Edge implements Serializable {
    private static final long serialVersionUID = 5885970862972987462L;
    protected E value;
    protected volatile Edge greater;
    protected volatile Edge lesser;
    //use char[] instead of string for optimized .toString() on keys.
    //we have to build keysets if requested,
    //and the only way to avoid using buffers or power-of-two guessing,
    //is to iteratively assemble exact length char[]s when assembling keys.
    //this allows parent char[]s to be built once per all their children.
    protected volatile char[] key;


    protected Edge() {
      this(emptyString, 0, 0);
    }

    public Edge(char[] key, int index, int end) {
      if (index == 0 && end == key.length) {
        this.key = key;
        assert key == emptyString || end > 0;
      }else {
        this.key = new char[end-index];
        assert this.key.length > 0;
        System.arraycopy(key, index, this.key, 0, this.key.length);
      }
    }
    @Override
    public String toString() {
      return new String(key);
    }
  }

//  protected class DeepEdge extends Edge {
//    private static final long serialVersionUID = -5753734197657416201L;
//
//    public DeepEdge(StringTrie_2 stringTrie, char[] key, int index, int end) {
//      stringTrie.super(key, index, end);
////      Object o = new Edge[10];
//      // TODO Auto-generated constructor stub
//    }
//
//    Edge[] children;
//  }

  //let subclasses do stuff.  At least it's final...
  protected final Edge root = new Edge();

  @Override
  public void put(char[] key, int start, int end, E value) {
    if (key == null || key.length == 0) {
      root.value = value;
    } else {
      if (start < 0 || end > key.length)
        throw new ArrayIndexOutOfBoundsException();
      doPut(root, key, start, end, value);
    }
  }
  @Override
  public void put(String key, E value) {
    if (key == null || "".equals(key))
      root.value = value;
    else
      doPut(root, key.toCharArray(), 0, key.length(), value);
  }

  protected void doPut(final Edge into, char[] key, final int index, int end, E value) {
    assert index < end;
//  To stay threadsafe, we synchronize on Edges when we modify them.
//  To stay fast, we don't recurse until we are out of the synchro block.

//  We optimize for our worst-case scenario off the hop;
//  which is a deep node transversal (when one node points to many).
    final Edge nextInto;
    int nextIndex;

    final char k = key[index];
//  handle peeking into deeper nodes that will result in recursion.
    final Edge greater = into.greater;
    if (greater != null)
    {
      assert into.lesser != null;
      final char[] greaterKey = greater.key;
//    deep nodes are stored in greater slot
      if (greaterKey.length == 0) {//this is a deep node!
//      bounds check on its lesser
        if (k - greater.lesser.key[0] >= 0) {
//        if inserted key is not less than the lesser of the deep node,
//        then recurse into the greater, without locking.
          doPut(greater, key, index, end, value);
          return;
        }

//      we are in a deep node, and are less than the greater.
//      check if we need to insert a new deep node.
        synchronized(into) {//wait for any operations to finish
          if (greater == into.greater) {
//          The only comod we need to worry about here is the greater node;
//          if the lesser is changed while we were waiting, we're still okay.
            final Edge lesser = into.lesser;
            final int delta = k - lesser.key[0];
            if (delta != 0) {
              Edge newParent = new Edge();
              newParent.greater = into.greater;

              Edge newNode = new Edge(key, index, end);
              newNode.value = value;

              if (delta > 0) {
//            new node is greater than current lesser; replace into.greater
                newParent.lesser = newNode;
              } else {
//            new node is less than our lesser; take lesser spot
//            and make the old lesser a new deep node
                newParent.lesser = lesser;
                into.lesser = newNode;
              }
              into.greater = newParent;
              return;//done!
            }
//          we start with the same char as into.lesser;
//          find out how far we match, and possibly recurse.
            if (insertLesser(into, key, index, end, value))
              return;
//          if we didn't return, we must recurse into this lesser
            nextInto = into.lesser;
            nextIndex = index + lesser.key.length;
          } else {
//          the trie was modified while we were waiting,
//          recurse, as we need to run the deep checks again.
            nextInto = into;
            nextIndex = index;
          }
        }//end synchro
        //if we didn't return, we need to recurse.
        if (nextIndex == end) {
          nextInto.value = value;
        } else {
          doPut(nextInto, key, nextIndex, end, value);
        }
        return;
      }//end deep node
    }//end into.greater != null


    //because we are only locking on the parent node,
    //but potentially modifying the structure of child nodes,
    //and we don't want to invite deadlock, we only ever iterate downward;
    //we acquire the locks on children before modifying them
    //or reading their lesser / greater nodes.
    synchro:
    synchronized(into)
    {
//    into.lesser will only ever be null on the very first put.
      if (into.lesser == null) {
        assert into.greater == null;
//      both null, just take lesser and exit
        into.lesser = new Edge(key, index, end);
        into.lesser.value = value;
        return;
      }
//    start our compare on lesser...
      final char[] lesserKey = into.lesser.key;
      final int deltaLesser = k - lesserKey[0];
      if (deltaLesser == 0) {
//      we match the first char of the lesser.
        if (insertLesser(into, key, index, end, value)) {
          return;
        }
        else {
//        if we didn't return, we must recurse
          nextInto = into.lesser;
          nextIndex = index + lesserKey.length;
          break synchro;
        }
      }
//    if we are less than the lesser, we need to usurp its position
      if (into.greater == null) {
//      with no greater node, our job is easy.  Just fill this node up.
        Edge newNode = new Edge(key, index, end);
        newNode.value = value;
        if (deltaLesser < 0) {
          into.greater = into.lesser;
          into.lesser = newNode;
        }else {
          into.greater = newNode;
        }
        return;
      }

//    we have to check the greater,
//    which may have changed since we last deep-checked it...
      final char[] greaterKey = into.greater.key;
      if (greaterKey.length == 0) {
//      the greater is now deep and it wasn't before.
//      recurse back into the same node; we can't get back here once deep
        nextInto = into;
        nextIndex = index;
        break synchro;
      }

      if (deltaLesser < 0) {
//      A greater exists, but we still need to usurp lesser
        Edge newParent = new Edge();
        Edge newNode = new Edge(key, index, end);
        newNode.value = value;
        newParent.lesser = into.lesser;
        newParent.greater = into.greater;
        into.greater = newParent;
        into.lesser = newNode;
        return;
      }

//    The only thing left to do is run a compare on greater
      final int deltaGreater = k - greaterKey[0];
      if (deltaGreater == 0) {
//      we must insert into the greater, or else recurse
        if (insertGreater(into, key, index, end, value))
          return;
        nextInto = into.greater;
        nextIndex = index + into.greater.key.length;
        break synchro;
      }
//    we don't start with greater or lesser, and must create a deep node
      Edge newParent = new Edge();
      Edge newNode = new Edge(key, index, end);
      newNode.value = value;
      if (deltaGreater > 0) {
//      new node is the greatest
        newParent.greater = newNode;
        newParent.lesser = into.greater;
      } else {
        newParent.greater = into.greater;
        newParent.lesser = newNode;
      }
      into.greater = newParent;
      return;
    }//end synchro.  If we haven't returned, we need to recurse.
    if (nextIndex == end) {
      nextInto.value = value;
    } else {
      doPut(nextInto, key, nextIndex, end, value);
    }
  }

  private boolean insertLesser(Edge into, char[] key, int index, int end, E value) {
    int matchesTo = 1;//only called when we've already matched the first char
    final int keyLen = end - index;
    final char[] lesserKey = into.lesser.key;
    for (;matchesTo < keyLen; matchesTo++) {
      if (matchesTo == lesserKey.length) {
        return false;
      }
      int delta = key[index+matchesTo] - lesserKey[matchesTo];
      if (delta < 0) {
//      new node is less than lesser
        into.lesser = newEdgeLesser(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
        return true;
      }
      if (delta > 0) {
//      new node is greater than lesser
        into.lesser = newEdgeGreater(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
        return true;
      }
    }
    if (matchesTo == lesserKey.length) {
      return false;
    }
    //If we haven't returned, than the existing key is longer than the one
    //we are inserting.  Thus, we must slip the new node behind the old one.
    Edge newNode = new Edge(key, index, end);
    newNode.value = value;
    char[] newLesser = new char[lesserKey.length - keyLen];
    System.arraycopy(lesserKey, keyLen, newLesser, 0, newLesser.length);
    newNode.lesser = into.lesser;
    into.lesser = newNode;
    newNode.lesser.key = newLesser;
    return true;
  }
  private boolean insertGreater(Edge into, char[] key, int index, int end, E value) {
    int matchesTo = 1;//only called when we've already matched the first char
    final int keyLen = end - index;
    final char[] greaterKey = into.greater.key;
    for (;matchesTo < keyLen; matchesTo++) {
      if (matchesTo == greaterKey.length) {
        return false;
      }
      int delta = key[index+matchesTo] - greaterKey[matchesTo];
      if (delta < 0) {
//      new node is less than greater
        into.greater = newEdgeLesser(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
        return true;
      }
      if (delta > 0) {
//      new node is greater than lesser
        into.greater= newEdgeGreater(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
        return true;
      }
    }
    if (matchesTo == greaterKey.length) {
      return false;
    }
    //If we haven't returned, than the existing key is longer than the one
    //we are inserting.  Thus, we must slip the new node behind the old one.
    final Edge newNode = new Edge(key, index, end);
    newNode.value = value;
    final char[] newGreater = new char[greaterKey.length - keyLen];
    System.arraycopy(greaterKey, keyLen, newGreater, 0, newGreater.length);
    newNode.greater = into.greater;
    into.greater= newNode;
    newNode.greater.key = newGreater;
    return true;
  }
  protected Edge newEdgeLesser(Edge previous, int keyMax, char[] existing, int matchesTo, char[] key, int keyIndex, int keyEnd, E value) {
  //found our break point
    char[] newRootKey = new char[matchesTo];
    char[] newExistingKey = new char[existing.length - newRootKey.length];
    char[] newInsertedKey = new char[keyMax - newRootKey.length];

    //copy the common root into our new parent edge
    System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
    Edge newRoot = new Edge(newRootKey, 0, newRootKey.length);

    //trim the existing key to it's unique suffix value
    System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
    previous.key = newExistingKey;


    //create a new node for our value
    System.arraycopy(key, keyIndex+newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
    Edge newEdge = new Edge(newInsertedKey, 0, newInsertedKey.length);
    newEdge.value = value;

    assert newRoot.key.length > 0;
    assert previous.key.length > 0;
    assert newEdge.key.length > 0;

      newRoot.lesser = newEdge;
      newRoot.greater = previous;
      assert newEdge.toString().compareTo(previous.toString()) < 0
        : "Invalid greaterthan: "+newEdge+" is not < "+previous;
    return newRoot;
  }
  protected Edge newEdgeGreater(Edge previous, int keyMax, char[] existing, int matchesTo, char[] key, int keyIndex, int keyEnd, E value) {
    //found our break point
    char[] newRootKey = new char[matchesTo];
    char[] newExistingKey = new char[existing.length - newRootKey.length];
    char[] newInsertedKey = new char[keyMax - newRootKey.length];

    //copy the common root into our new parent edge
    System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
    Edge newRoot = new Edge(newRootKey, 0, newRootKey.length);

    //trim the existing key to it's unique suffix value
    System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
    previous.key = newExistingKey;


    //create a new node for our value
    System.arraycopy(key, keyIndex+newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
    Edge newEdge = new Edge(newInsertedKey, 0, newInsertedKey.length);
    newEdge.value = value;

    assert newRoot.key.length > 0;
    assert previous.key.length > 0;
    assert newEdge.key.length > 0;

    newRoot.lesser = previous;
    newRoot.greater= newEdge;
    assert newEdge.toString().compareTo(previous.toString()) > 0;

    return newRoot;
  }

  /**
   * @param into - The edge to lock
   * @param ownsParent - Whether we already own an explicit lock on the parent.
   * @return - Any object you want; null will do fine.
   *
   * This method is provided as a stub for more sophisticated, concurrent
   * subclasses which may want to employ locking mechanisms (or event dispatch).
   *
   * You may call {@link Object#wait(long, int)}; as you already own the lock.
   * long param is millis, should be zero.
   * int param is nanos, keep it in the hundreds.
   *
   * DON'T DO ANYTHING WHICH COULD BLOCK FOR A LONG TIME.
   * Acquire locks tentatively, either with {@link Lock#tryLock()} for failfast,
   * or {@link Lock#tryLock(long, java.util.concurrent.TimeUnit)}.
   *
   * Wait times, if any, should be on a nano scale;

   *
   * If ownsParent is false, you should be running in unsynchronized code.
   * The only use for synchronous method blocks in this case is to acquire a
   * {@link Lock}.
   *
   * If ownsParent is true, you are safe from intrusion from above
   * (nobody will be able to modify your parent), but you still have
   * to contend
   */
  protected Object lock(Edge into, boolean ownsParent) {
    return null;
  }

  /**
   * @param into - The edge to lock
   * @param ownsParent - If true, you are already synchronized on into.
   * @param cursor - Whatever object you returned when you locked.
   *
   * This method is a stub for more sophisticated subclasses of StringTrie_2,
   * which may need to perform proper concurrent locking, or event dispatch.
   *
   * It is called in the finally block of whatever code ran
   * {@link StringTrie_2#lock(Edge, boolean)}.
   *
   * If you use Edge into.wait(0, nanos) in lock(),
   * now would be a great time to call into into.notify() :)
   *
   */
  protected void unlock(Edge into, boolean ownsParent, Object cursor) {

  }


  @Override
  public String toString() {
    StringBuilder b = new StringBuilder();
    b.append("StringTrie[\n");
    if (root.value != null) {
      b.append("\"\" : "+root.value+"\n");
    }
    if (root.greater != null) {
      visit(root.greater, 1, new char[0], b);
    }
    if (root.lesser != null) {
      visit(root.lesser, 1, new char[0], b);
    }
    b.append("]");
    return b.toString();
  }

  private void visit(Edge root, int depth, char[] key, StringBuilder b) {
    final boolean anyKey = key.length > 0;
    if (root.key.length>0) {
      for (int i = 0;i 0 : b;
        char[] nextKey = new char[key.length+childKey.length];
        System.arraycopy(key, 0, nextKey, 0, key.length);
        System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
        childKey = nextKey;
        nextKey = null;
      }
      visit(root.lesser, depth+(anyKey?1:0), childKey, b);
      childKey = null;
    }
    if (root.greater != null) {
      //visit greater edge
      char[] childKey = root.greater.key;
      if (anyKey) {
        char[] nextKey = new char[key.length+childKey.length];
        System.arraycopy(key, 0, nextKey, 0, key.length);
        System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
        childKey = nextKey;
        nextKey = null;
      }
      boolean addSpace = anyKey&&root.greater.key.length>0;
      visit(root.greater, depth+(addSpace?1:0), childKey, b);
      childKey = null;
    }
  }


  @Override
  public E get(String key) {
    if (key == null)
      return get(EMPTY_STRING);
    return get(new Chars(key.toCharArray()), 0, key.length());
  }

  @Override
  public E get(char[] key) {
    if (key == null)
      key = EMPTY_STRING;
    return get(new Chars(key), 0, key.length);
  }
  @Override
  public E get(char[] key, int pos, int end) {
    if (key == null)
      key = EMPTY_STRING;
    return get(new Chars(key, pos, end), pos, end);
  }

  @Override
  public E get(final Chars keys, int pos, final int end) {
    Edge e = root;
    while (e != null) {
      //our test for success is always when we make it through a for loop
      //which matches our key, and when the next search position = length of key.
      //if there was a value at this key, we would have returned it.
      if (pos == end)
        return returnValue(e, keys, pos, end);

      if (e.lesser != null) {
        final char[] lesser = e.lesser.key;
        testlesser: {
          for (int i = 0; i < lesser.length; i++) {
            if (end <= pos+i)
              return onEmpty(e, keys, pos, end);
            final int delta = keys.charAt(pos+i) - lesser[i];
            if (delta < 0) {
              //if a lesser is greater than us, there's nothing to return
              return onEmpty(e, keys, pos, end);
            }
            if (delta > 0) {
              break testlesser;
            }
          }//end for
          //if we didn't break, we equal the lesser.  Descend into it.
          e = e.lesser;
          pos += lesser.length;
          continue;
        }//end test lesser
        //requested key is greater than lesser key.  Carry on.
      }//end lesser

      if (e.greater == null)
        return onEmpty(e, keys, pos, end);
      final char[] greater = e.greater.key;
      if (greater.length == 0) {
        //deep node, just continue search
        e = e.greater;
        continue;
      }
      final int len = greater.length;
      if (len + pos > end)
        return onEmpty(e, keys, pos, end);
      for (int i = 0; i < len; i++) {
        if (keys.charAt(pos+i) != greater[i])
          return onEmpty(e, keys, pos, end);
      }
      pos += len;
      //still haven't returned, so we match this greater
      e = e.greater;
    }
    return onEmpty(e, keys, pos, end);
  }

  protected E returnValue(Edge e, Chars keys, int pos, int end) {
    return e.value;
  }
  protected E onEmpty(Edge e, Chars keys, int pos, int end) {
    return null;
  }

  @Override
  public void compress(CharPoolTrie charPoolTrie) {

  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy