com.firefly.utils.collection.ArrayTernaryTrie Maven / Gradle / Ivy
package com.firefly.utils.collection;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import com.firefly.utils.StringUtils;
/**
*
* A Ternary Trie String lookup data structure.
*
*
* This Trie is of a fixed size and cannot grow (which can be a good thing with
* regards to DOS when used as a cache).
*
*
* The Trie is stored in 3 arrays:
*
*
* - char[] _tree
* - This is semantically 2 dimensional array flattened into a 1 dimensional
* char array. The second dimension is that every 4 sequential elements
* represents a row of: character; hi index; eq index; low index, used to build
* a ternary trie of key strings.
* - String[] _key
* - An array of key values where each element matches a row in the _tree
* array. A non zero key element indicates that the _tree row is a complete key
* rather than an intermediate character of a longer key.
* - V[] _value
* - An array of values corresponding to the _key array
*
*
* The lookup of a value will iterate through the _tree array matching
* characters. If the equal tree branch is followed, then the _key array is
* looked up to see if this is a complete match. If a match is found then the
* _value array is looked up to return the matching value.
*
*
* This Trie may be instantiated either as case sensitive or insensitive.
*
*
* This Trie is not Threadsafe and contains no mutual exclusion or deliberate
* memory barriers. It is intended for an ArrayTrie to be built by a single
* thread and then used concurrently by multiple threads and not mutated during
* that access. If concurrent mutations of the Trie is required external locks
* need to be applied.
*
*
* @param
* the Entry type
*/
public class ArrayTernaryTrie extends AbstractTrie {
private static int LO = 1;
private static int EQ = 2;
private static int HI = 3;
/**
* The Size of a Trie row is the char, and the low, equal and high child
* pointers
*/
private static final int ROW_SIZE = 4;
/**
* The Trie rows in a single array which allows a lookup of row,character to
* the next row in the Trie. This is actually a 2 dimensional array that has
* been flattened to achieve locality of reference.
*/
private final char[] _tree;
/**
* The key (if any) for a Trie row. A row may be a leaf, a node or both in
* the Trie tree.
*/
private final String[] _key;
/**
* The value (if any) for a Trie row. A row may be a leaf, a node or both in
* the Trie tree.
*/
private final V[] _value;
/**
* The number of rows allocated
*/
private char _rows;
/**
* Create a case insensitive Trie of default capacity.
*/
public ArrayTernaryTrie() {
this(128);
}
/**
* Create a Trie of default capacity
*
* @param insensitive
* true if the Trie is insensitive to the case of the key.
*/
public ArrayTernaryTrie(boolean insensitive) {
this(insensitive, 128);
}
/**
* Create a case insensitive Trie
*
* @param capacity
* The capacity of the Trie, which is in the worst case is the
* total number of characters of all keys stored in the Trie. The
* capacity needed is dependent of the shared prefixes of the
* keys. For example, a capacity of 6 nodes is required to store
* keys "foo" and "bar", but a capacity of only 4 is required to
* store "bar" and "bat".
*/
public ArrayTernaryTrie(int capacity) {
this(true, capacity);
}
/**
* Create a Trie
*
* @param insensitive
* true if the Trie is insensitive to the case of the key.
* @param capacity
* The capacity of the Trie, which is in the worst case is the
* total number of characters of all keys stored in the Trie. The
* capacity needed is dependent of the shared prefixes of the
* keys. For example, a capacity of 6 nodes is required to store
* keys "foo" and "bar", but a capacity of only 4 is required to
* store "bar" and "bat".
*/
@SuppressWarnings("unchecked")
public ArrayTernaryTrie(boolean insensitive, int capacity) {
super(insensitive);
_value = (V[]) new Object[capacity];
_tree = new char[capacity * ROW_SIZE];
_key = new String[capacity];
}
/**
* Copy Trie and change capacity by a factor
*
* @param trie
* the trie to copy from
* @param factor
* the factor to grow the capacity by
*/
public ArrayTernaryTrie(ArrayTernaryTrie trie, double factor) {
super(trie.isCaseInsensitive());
int capacity = (int) (trie._value.length * factor);
_rows = trie._rows;
_value = Arrays.copyOf(trie._value, capacity);
_tree = Arrays.copyOf(trie._tree, capacity * ROW_SIZE);
_key = Arrays.copyOf(trie._key, capacity);
}
@Override
public boolean put(String s, V v) {
int t = 0;
int limit = s.length();
int last = 0;
for (int k = 0; k < limit; k++) {
char c = s.charAt(k);
if (isCaseInsensitive() && c < 128)
c = StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
// Do we need to create the new row?
if (t == _rows) {
_rows++;
if (_rows >= _key.length) {
_rows--;
return false;
}
_tree[row] = c;
}
char n = _tree[row];
int diff = n - c;
if (diff == 0)
t = _tree[last = (row + EQ)];
else if (diff < 0)
t = _tree[last = (row + LO)];
else
t = _tree[last = (row + HI)];
// do we need a new row?
if (t == 0) {
t = _rows;
_tree[last] = (char) t;
}
if (diff == 0)
break;
}
}
// Do we need to create the new row?
if (t == _rows) {
_rows++;
if (_rows >= _key.length) {
_rows--;
return false;
}
}
// Put the key and value
_key[t] = v == null ? null : s;
_value[t] = v;
return true;
}
@Override
public V get(String s, int offset, int len) {
int t = 0;
for (int i = 0; i < len;) {
char c = s.charAt(offset + i++);
if (isCaseInsensitive() && c < 128)
c = StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
char n = _tree[row];
int diff = n - c;
if (diff == 0) {
t = _tree[row + EQ];
if (t == 0)
return null;
break;
}
t = _tree[row + hilo(diff)];
if (t == 0)
return null;
}
}
return _value[t];
}
@Override
public V get(ByteBuffer b, int offset, int len) {
int t = 0;
offset += b.position();
for (int i = 0; i < len;) {
byte c = (byte) (b.get(offset + i++) & 0x7f);
if (isCaseInsensitive())
c = (byte) StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
char n = _tree[row];
int diff = n - c;
if (diff == 0) {
t = _tree[row + EQ];
if (t == 0)
return null;
break;
}
t = _tree[row + hilo(diff)];
if (t == 0)
return null;
}
}
return _value[t];
}
@Override
public V getBest(String s) {
return getBest(0, s, 0, s.length());
}
@Override
public V getBest(String s, int offset, int length) {
return getBest(0, s, offset, length);
}
private V getBest(int t, String s, int offset, int len) {
int node = t;
int end = offset + len;
loop: while (offset < end) {
char c = s.charAt(offset++);
len--;
if (isCaseInsensitive() && c < 128)
c = StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
char n = _tree[row];
int diff = n - c;
if (diff == 0) {
t = _tree[row + EQ];
if (t == 0)
break loop;
// if this node is a match, recurse to remember
if (_key[t] != null) {
node = t;
V better = getBest(t, s, offset, len);
if (better != null)
return better;
}
break;
}
t = _tree[row + hilo(diff)];
if (t == 0)
break loop;
}
}
return _value[node];
}
@Override
public V getBest(ByteBuffer b, int offset, int len) {
if (b.hasArray())
return getBest(0, b.array(), b.arrayOffset() + b.position()
+ offset, len);
return getBest(0, b, offset, len);
}
private V getBest(int t, byte[] b, int offset, int len) {
int node = t;
int end = offset + len;
loop: while (offset < end) {
byte c = (byte) (b[offset++] & 0x7f);
len--;
if (isCaseInsensitive())
c = (byte) StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
char n = _tree[row];
int diff = n - c;
if (diff == 0) {
t = _tree[row + EQ];
if (t == 0)
break loop;
// if this node is a match, recurse to remember
if (_key[t] != null) {
node = t;
V better = getBest(t, b, offset, len);
if (better != null)
return better;
}
break;
}
t = _tree[row + hilo(diff)];
if (t == 0)
break loop;
}
}
return _value[node];
}
private V getBest(int t, ByteBuffer b, int offset, int len) {
int node = t;
int o = offset + b.position();
loop: for (int i = 0; i < len; i++) {
byte c = (byte) (b.get(o + i) & 0x7f);
if (isCaseInsensitive())
c = (byte) StringUtils.lowercases[c];
while (true) {
int row = ROW_SIZE * t;
char n = _tree[row];
int diff = n - c;
if (diff == 0) {
t = _tree[row + EQ];
if (t == 0)
break loop;
// if this node is a match, recurse to remember
if (_key[t] != null) {
node = t;
V best = getBest(t, b, offset + i + 1, len - i - 1);
if (best != null)
return best;
}
break;
}
t = _tree[row + hilo(diff)];
if (t == 0)
break loop;
}
}
return _value[node];
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
for (int r = 0; r <= _rows; r++) {
if (_key[r] != null && _value[r] != null) {
buf.append(',');
buf.append(_key[r]);
buf.append('=');
buf.append(_value[r].toString());
}
}
if (buf.length() == 0)
return "{}";
buf.setCharAt(0, '{');
buf.append('}');
return buf.toString();
}
@Override
public Set keySet() {
Set keys = new HashSet<>();
for (int r = 0; r <= _rows; r++) {
if (_key[r] != null && _value[r] != null)
keys.add(_key[r]);
}
return keys;
}
@Override
public boolean isFull() {
return _rows + 1 == _key.length;
}
public static int hilo(int diff) {
// branchless equivalent to return ((diff<0)?LO:HI);
// return 3+2*((diff&Integer.MIN_VALUE)>>Integer.SIZE-1);
return 1 + (diff | Integer.MAX_VALUE) / (Integer.MAX_VALUE / 2);
}
public void dump() {
for (int r = 0; r < _rows; r++) {
char c = _tree[r * ROW_SIZE + 0];
System.err.printf("%4d [%s,%d,%d,%d] '%s':%s%n",
r,
(c < ' ' || c > 127) ? ("" + (int) c) : "'" + c + "'",
(int) _tree[r * ROW_SIZE + LO], (int) _tree[r * ROW_SIZE + EQ], (int) _tree[r * ROW_SIZE + HI], _key[r],
_value[r]);
}
}
}