xapi.collect.impl.StringTrie Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xapi-core-collect Show documentation
Show all versions of xapi-core-collect Show documentation
Core interfaces for our collections api.
package xapi.collect.impl;
import static xapi.collect.api.CharPool.EMPTY_STRING;
import java.io.Serializable;
import java.util.concurrent.locks.Lock;
import xapi.util.impl.CharPoolTrie;
import xapi.util.impl.Chars;
public class StringTrie {
private static final char[] emptyString = new char[0];
/**
* Our Edge class is one node in the StringTrieEdge graph. It is mutable so we can keep
* our memory impact light, and volatile so we can stay threadsafe. All of
* this is at relatively no extra cost to processing time, except for
* synchronization time on acquiring downward locks during puts (and a little
* extra processing time for deletes to acquire locks as well). Gwt doesn't
* pay for synchronization, so it performs optimally in js. Note that it does
* not hold a parent lock while taking a child lock, so multiple threads can
* still quickly transverse any potential hotspots, where there is alot of
* prefix-overlap in strings, such as java packages: com.foo.client.Something
* com.foo.client.SomethingElse com.foo.server.Something
* com.bar.client.Something com.bar.server.Something ...etc. The fragment com.
* would be locked and released before acquiring foo. | bar. This prevents
* concurrent modifications in different areas of the trie to avoid blocking
* each other.
*
* @author "James X. Nelson ([email protected])"
*/
protected class TrieEdge implements Serializable {
private static final long serialVersionUID = 5885970862972987462L;
protected E value;
protected volatile TrieEdge greater;
protected volatile TrieEdge lesser;
// use char[] instead of string for optimized .toString() on keys.
// we have to build keysets if requested,
// and the only way to avoid using buffers or power-of-two guessing,
// is to iteratively assemble exact length char[]s when assembling keys.
// this allows parent char[]s to be built once per all their children.
protected volatile char[] key;
protected TrieEdge() {
this(emptyString, 0, 0);
}
public TrieEdge(final char[] key, final int index, final int end) {
if (index == 0 && end == key.length) {
this.key = key;
assert key == emptyString || end > 0;
} else {
this.key = new char[end - index];
assert this.key.length > 0;
System.arraycopy(key, index, this.key, 0, this.key.length);
}
}
@Override
public String toString() {
return new String(key);
}
}
// let subclasses do stuff. At least it's final...
protected final TrieEdge root = new TrieEdge();
public void put(final char[] key, final int start, final int end, final E value) {
if (key == null || key.length == 0) {
root.value = value;
} else {
if (start < 0 || end > key.length) throw new ArrayIndexOutOfBoundsException();
doPut(root, key, start, end, value);
}
}
public void put(final String key, final E value) {
if (key == null || "".equals(key))
root.value = value;
else
doPut(root, key.toCharArray(), 0, key.length(), value);
}
protected void doPut(final TrieEdge into, final char[] key,
final int index, final int end, final E value) {
assert index < end;
// To stay threadsafe, we synchronize on Edges when we modify them.
// To stay fast, we don't recurse until we are out of the synchro block.
// We optimize for our worst-case scenario off the hop;
// which is a deep node transversal (when one node points to many).
final TrieEdge nextInto;
int nextIndex;
final char k = key[index];
// handle peeking into deeper nodes that will result in recursion.
final TrieEdge greater = into.greater;
if (greater != null) {
assert into.lesser != null;
final char[] greaterKey = greater.key;
// deep nodes are stored in greater slot
if (greaterKey.length == 0) {// this is a deep node!
// bounds check on its lesser
if (k - greater.lesser.key[0] >= 0) {
// if inserted key is not less than the lesser of the deep node,
// then recurse into the greater, without locking.
doPut(greater, key, index, end, value);
return;
}
// we are in a deep node, and are less than the greater.
// check if we need to insert a new deep node.
synchronized (into) {
if (greater == into.greater) {
// The only comod we need to worry about here is the greater node;
// if the lesser is changed while we were waiting, we're still okay.
final TrieEdge lesser = into.lesser;
final int delta = k - lesser.key[0];
if (delta != 0) {
final TrieEdge newParent = new TrieEdge();
newParent.greater = into.greater;
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
if (delta > 0) {
// new node is greater than current lesser; replace into.greater
newParent.lesser = newNode;
} else {
// new node is less than our lesser; take lesser spot
// and make the old lesser a new deep node
newParent.lesser = lesser;
into.lesser = newNode;
}
into.greater = newParent;
return;// done!
}
// we start with the same char as into.lesser;
// find out how far we match, and possibly recurse.
if (insertLesser(into, key, index, end, value)) return;
// if we didn't return, we must recurse into this lesser
nextInto = into.lesser;
nextIndex = index + lesser.key.length;
} else {
// the trie was modified while we were waiting,
// recurse, as we need to run the deep checks again.
nextInto = into;
nextIndex = index;
}
}// end synchro
// if we didn't return, we need to recurse.
if (nextIndex == end) {
nextInto.value = value;
} else {
doPut(nextInto, key, nextIndex, end, value);
}
return;
}// end deep node
}// end into.greater != null
// because we are only locking on the parent node,
// but potentially modifying the structure of child nodes,
// and we don't want to invite deadlock, we only ever iterate downward;
// we acquire the locks on children before modifying them
// or reading their lesser / greater nodes.
synchro: synchronized (into) {
// into.lesser will only ever be null on the very first put.
if (into.lesser == null) {
assert into.greater == null;
// both null, just take lesser and exit
into.lesser = new TrieEdge(key, index, end);
into.lesser.value = value;
return;
}
// start our compare on lesser...
final char[] lesserKey = into.lesser.key;
final int deltaLesser = k - lesserKey[0];
if (deltaLesser == 0) {
// we match the first char of the lesser.
if (insertLesser(into, key, index, end, value)) {
return;
} else {
// if we didn't return, we must recurse
nextInto = into.lesser;
nextIndex = index + lesserKey.length;
break synchro;
}
}
// if we are less than the lesser, we need to usurp its position
if (into.greater == null) {
// with no greater node, our job is easy. Just fill this node up.
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
if (deltaLesser < 0) {
into.greater = into.lesser;
into.lesser = newNode;
} else {
into.greater = newNode;
}
return;
}
// we have to check the greater,
// which may have changed since we last deep-checked it...
final char[] greaterKey = into.greater.key;
if (greaterKey.length == 0) {
// the greater is now deep and it wasn't before.
// recurse back into the same node; we can't get back here once deep
nextInto = into;
nextIndex = index;
break synchro;
}
if (deltaLesser < 0) {
// A greater exists, but we still need to usurp lesser
final TrieEdge newParent = new TrieEdge();
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
newParent.lesser = into.lesser;
newParent.greater = into.greater;
into.greater = newParent;
into.lesser = newNode;
return;
}
// The only thing left to do is run a compare on greater
final int deltaGreater = k - greaterKey[0];
if (deltaGreater == 0) {
// we must insert into the greater, or else recurse
if (insertGreater(into, key, index, end, value)) return;
nextInto = into.greater;
nextIndex = index + into.greater.key.length;
break synchro;
}
// we don't start with greater or lesser, and must create a deep node
final TrieEdge newParent = new TrieEdge();
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
if (deltaGreater > 0) {
// new node is the greatest
newParent.greater = newNode;
newParent.lesser = into.greater;
} else {
newParent.greater = into.greater;
newParent.lesser = newNode;
}
into.greater = newParent;
return;
}// end synchro. If we haven't returned, we need to recurse.
if (nextIndex == end) {
nextInto.value = value;
} else {
doPut(nextInto, key, nextIndex, end, value);
}
}
private boolean insertLesser(TrieEdge into, char[] key, int index, int end, E value) {
int matchesTo = 1;// only called when we've already matched the first char
final int keyLen = end - index;
final char[] lesserKey = into.lesser.key;
for (; matchesTo < keyLen; matchesTo++) {
if (matchesTo == lesserKey.length) {
return false;
}
final int delta = key[index + matchesTo] - lesserKey[matchesTo];
if (delta < 0) {
// new node is less than lesser
into.lesser = newEdgeLesser(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
return true;
}
if (delta > 0) {
// new node is greater than lesser
into.lesser = newEdgeGreater(into.lesser, keyLen, lesserKey, matchesTo, key, index, end, value);
return true;
}
}
if (matchesTo == lesserKey.length) {
return false;
}
// If we haven't returned, than the existing key is longer than the one
// we are inserting. Thus, we must slip the new node behind the old one.
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
final char[] newLesser = new char[lesserKey.length - keyLen];
System.arraycopy(lesserKey, keyLen, newLesser, 0, newLesser.length);
newNode.lesser = into.lesser;
into.lesser = newNode;
newNode.lesser.key = newLesser;
return true;
}
private boolean insertGreater(TrieEdge into, char[] key, int index, int end, E value) {
int matchesTo = 1;// only called when we've already matched the first char
final int keyLen = end - index;
final char[] greaterKey = into.greater.key;
for (; matchesTo < keyLen; matchesTo++) {
if (matchesTo == greaterKey.length) {
return false;
}
final int delta = key[index + matchesTo] - greaterKey[matchesTo];
if (delta < 0) {
// new node is less than greater
into.greater = newEdgeLesser(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
return true;
}
if (delta > 0) {
// new node is greater than lesser
into.greater = newEdgeGreater(into.greater, keyLen, greaterKey, matchesTo, key, index, end, value);
return true;
}
}
if (matchesTo == greaterKey.length) {
return false;
}
// If we haven't returned, than the existing key is longer than the one
// we are inserting. Thus, we must slip the new node behind the old one.
final TrieEdge newNode = new TrieEdge(key, index, end);
newNode.value = value;
final char[] newGreater = new char[greaterKey.length - keyLen];
System.arraycopy(greaterKey, keyLen, newGreater, 0, newGreater.length);
newNode.greater = into.greater;
into.greater = newNode;
newNode.greater.key = newGreater;
return true;
}
protected TrieEdge newEdgeLesser(TrieEdge previous, int keyMax, char[] existing, int matchesTo, char[] key,
int keyIndex, int keyEnd, E value) {
// found our break point
final char[] newRootKey = new char[matchesTo];
final char[] newExistingKey = new char[existing.length - newRootKey.length];
final char[] newInsertedKey = new char[keyMax - newRootKey.length];
// copy the common root into our new parent edge
System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
final TrieEdge newRoot = new TrieEdge(newRootKey, 0, newRootKey.length);
// trim the existing key to it's unique suffix value
System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
previous.key = newExistingKey;
// create a new node for our value
System.arraycopy(key, keyIndex + newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
final TrieEdge newEdge = new TrieEdge(newInsertedKey, 0, newInsertedKey.length);
newEdge.value = value;
assert newRoot.key.length > 0;
assert previous.key.length > 0;
assert newEdge.key.length > 0;
newRoot.lesser = newEdge;
newRoot.greater = previous;
assert newEdge.toString().compareTo(previous.toString()) < 0 : "Invalid greaterthan: " + newEdge +
" is not < " + previous;
return newRoot;
}
protected TrieEdge newEdgeGreater(TrieEdge previous, int keyMax, char[] existing, int matchesTo, char[] key,
int keyIndex, int keyEnd, E value) {
// found our break point
final char[] newRootKey = new char[matchesTo];
final char[] newExistingKey = new char[existing.length - newRootKey.length];
final char[] newInsertedKey = new char[keyMax - newRootKey.length];
// copy the common root into our new parent edge
System.arraycopy(existing, 0, newRootKey, 0, newRootKey.length);
final TrieEdge newRoot = new TrieEdge(newRootKey, 0, newRootKey.length);
// trim the existing key to it's unique suffix value
System.arraycopy(existing, newRootKey.length, newExistingKey, 0, newExistingKey.length);
previous.key = newExistingKey;
// create a new node for our value
System.arraycopy(key, keyIndex + newRootKey.length, newInsertedKey, 0, newInsertedKey.length);
final TrieEdge newEdge = new TrieEdge(newInsertedKey, 0, newInsertedKey.length);
newEdge.value = value;
assert newRoot.key.length > 0;
assert previous.key.length > 0;
assert newEdge.key.length > 0;
newRoot.lesser = previous;
newRoot.greater = newEdge;
assert newEdge.toString().compareTo(previous.toString()) > 0;
return newRoot;
}
/**
* @param into - The edge to lock
* @param ownsParent - Whether we already own an explicit lock on the parent.
* @return - Any object you want; null will do fine. This method is provided
* as a stub for more sophisticated, concurrent subclasses which may want to
* employ locking mechanisms (or event dispatch). You may call
* {@link Object#wait(long, int)}; as you already own the lock. long param is
* millis, should be zero. int param is nanos, keep it in the hundreds. DON'T
* DO ANYTHING WHICH COULD BLOCK FOR A LONG TIME. Acquire locks tentatively,
* either with {@link Lock#tryLock()} for failfast, or
* {@link Lock#tryLock(long, java.util.concurrent.TimeUnit)}. Wait times, if
* any, should be on a nano scale; If ownsParent is false, you should be
* running in unsynchronized code. The only use for synchronous method blocks
* in this case is to acquire a {@link Lock}. If ownsParent is true, you are
* safe from intrusion from above (nobody will be able to modify your parent),
* but you still have to contend
*/
protected void lock(TrieEdge into, boolean ownsParent) {
}
/**
* @param into - The edge to lock
* @param ownsParent - If true, you are already synchronized on into.
* @param cursor - Whatever object you returned when you locked. This method
* is a stub for more sophisticated subclasses of StringTrie, which may need
* to perform proper concurrent locking, or event dispatch. It is called in
* the finally block of whatever code ran
* {@link StringTrie#lock(StringTrieEdge, boolean)}. If you use Edge into.wait(0, nanos)
* in lock(), now would be a great time to call into into.notify() :)
*/
protected void unlock(TrieEdge into, boolean ownsParent) {
}
@Override
public String toString() {
final StringBuilder b = new StringBuilder();
b.append("StringTrie[\n");
if (root.value != null) {
b.append("\"\" : " + root.value + "\n");
}
if (root.greater != null) {
visit(root.greater, 1, new char[0], b);
}
if (root.lesser != null) {
visit(root.lesser, 1, new char[0], b);
}
b.append("]");
return b.toString();
}
private void visit(TrieEdge root, int depth, char[] key, StringBuilder b) {
boolean anyKey = key.length > 0;
if (root.key.length > 0) {
for (int i = 0; i < depth; i++) {
b.append(' ');
}
b.append(root.key);
b.append("\t\t");
if (root.value == null) {
b.append("[branch]");
} else {
b.append(root.value);
}
b.append('\n');
}
if (root.lesser != null) {
// visit lesser edge
char[] childKey = root.lesser.key;
if (anyKey) {
// assert childKey.length > 0 : b;
char[] nextKey = new char[key.length + childKey.length];
System.arraycopy(key, 0, nextKey, 0, key.length);
System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
childKey = nextKey;
nextKey = null;
}
visit(root.lesser, depth + (anyKey ? 1 : 0), childKey, b);
childKey = null;
}
if (root.greater != null) {
// visit greater edge
char[] childKey = root.greater.key;
if (anyKey) {
char[] nextKey = new char[key.length + childKey.length];
System.arraycopy(key, 0, nextKey, 0, key.length);
System.arraycopy(childKey, 0, nextKey, key.length, childKey.length);
childKey = nextKey;
nextKey = null;
}
boolean addSpace = anyKey && root.greater.key.length > 0;
visit(root.greater, depth + (addSpace ? 1 : 0), childKey, b);
childKey = null;
}
}
public E get(String key) {
if (key == null) return get(EMPTY_STRING);
return get(new Chars(key.toCharArray()), 0, key.length());
}
public E get(char[] key) {
if (key == null) key = EMPTY_STRING;
return get(new Chars(key), 0, key.length);
}
public E get(char[] key, int pos, int end) {
if (key == null) key = EMPTY_STRING;
return get(new Chars(key, pos, end), pos, end);
}
public E get(final Chars keys, int pos, final int end) {
TrieEdge e = root;
while (e != null) {
// our test for success is always when we make it through a for loop
// which matches our key, and when the next search position = key length.
// if there was a value at this key, we would have returned it.
if (pos == end) return returnValue(e, keys, pos, end);
if (e.lesser != null) {
final char[] lesser = e.lesser.key;
testlesser: {
for (int i = 0; i < lesser.length; i++) {
if (end <= pos + i) return onEmpty(e, keys, pos, end);
final int delta = keys.charAt(pos + i) - lesser[i];
if (delta < 0) {
// if a lesser is greater than us, there's nothing to return
return onEmpty(e, keys, pos, end);
}
if (delta > 0) {
break testlesser;
}
}// end for
// if we didn't break, we equal the lesser. Descend into it.
e = e.lesser;
pos += lesser.length;
continue;
}// end test lesser
// requested key is greater than lesser key. Carry on.
}// end lesser
if (e.greater == null) return onEmpty(e, keys, pos, end);
final char[] greater = e.greater.key;
if (greater.length == 0) {
// deep node, just continue search
e = e.greater;
continue;
}
final int len = greater.length;
if (len + pos > end) return onEmpty(e, keys, pos, end);
for (int i = 0; i < len; i++) {
if (keys.charAt(pos + i) != greater[i]) return onEmpty(e, keys, pos, end);
}
pos += len;
// still haven't returned, so we match this greater
e = e.greater;
}
return onEmpty(e, keys, pos, end);
}
public Iterable findPrefixed(String prefix) {
return null;
}
public void compress(CharPoolTrie charPoolTrie) {
}
protected E returnValue(TrieEdge e, Chars keys, int pos, int end) {
return e.value;
}
protected E onEmpty(TrieEdge e, Chars keys, int pos, int end) {
return null;
}
}