org.eclipse.jetty.util.ArrayTrie Maven / Gradle / Ivy
//
// ========================================================================
// Copyright (c) 1995-2015 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.util;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Set;
/* ------------------------------------------------------------ */
/**
* A Trie String lookup data structure using a fixed size array.
* This implementation is always case insensitive and is optimal for
* a small number of fixed strings with few special characters. The
* Trie is stored in an array of lookup tables, each indexed by the
* next character of the key. Frequently used characters are directly
* indexed in each lookup table, whilst infrequently used characters
* must use a big character table.
*
* This Trie is very space efficient if the key characters are
* from ' ', '+', '-', ':', ';', '.', 'A' to 'Z' or 'a' to 'z'.
* Other ISO-8859-1 characters can be used by the key, but less space
* efficiently.
*
* This Trie is not Threadsafe and contains no mutual exclusion
* or deliberate memory barriers. It is intended for an ArrayTrie to be
* built by a single thread and then used concurrently by multiple threads
* and not mutated during that access. If concurrent mutations of the
* Trie is required external locks need to be applied.
*
* @param
*/
public class ArrayTrie extends AbstractTrie
{
/**
* The Size of a Trie row is how many characters can be looked
* up directly without going to a big index. This is set at
* 32 to cover case insensitive alphabet and a few other common
* characters.
*/
private static final int ROW_SIZE = 32;
/**
* The index lookup table, this maps a character as a byte
* (ISO-8859-1 or UTF8) to an index within a Trie row
*/
private static final int[] __lookup =
{ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
/*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1,
/*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1,
/*4*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
/*6*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
};
/**
* The Trie rows in a single array which allows a lookup of row,character
* to the next row in the Trie. This is actually a 2 dimensional
* array that has been flattened to achieve locality of reference.
* The first ROW_SIZE entries are for row 0, then next ROW_SIZE
* entries are for row 1 etc. So in general instead of using
* _rows[row][index], we use _rows[row*ROW_SIZE+index] to look up
* the next row for a given character.
*
* The array is of characters rather than integers to save space.
*/
private final char[] _rowIndex;
/**
* The key (if any) for a Trie row.
* A row may be a leaf, a node or both in the Trie tree.
*/
private final String[] _key;
/**
* The value (if any) for a Trie row.
* A row may be a leaf, a node or both in the Trie tree.
*/
private final V[] _value;
/**
* A big index for each row.
* If a character outside of the lookup map is needed,
* then a big index will be created for the row, with
* 256 entries, one for each possible byte.
*/
private char[][] _bigIndex;
/**
* The number of rows allocated
*/
private char _rows;
public ArrayTrie()
{
this(128);
}
/* ------------------------------------------------------------ */
/**
* @param capacity The capacity of the trie, which at the worst case
* is the total number of characters of all keys stored in the Trie.
* The capacity needed is dependent of the shared prefixes of the keys.
* For example, a capacity of 6 nodes is required to store keys "foo"
* and "bar", but a capacity of only 4 is required to
* store "bar" and "bat".
*/
@SuppressWarnings("unchecked")
public ArrayTrie(int capacity)
{
super(true);
_value=(V[])new Object[capacity];
_rowIndex=new char[capacity*32];
_key=new String[capacity];
}
/* ------------------------------------------------------------ */
@Override
public boolean put(String s, V v)
{
int t=0;
int k;
int limit = s.length();
for(k=0; k < limit; k++)
{
char c=s.charAt(k);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
{
if (++_rows>=_value.length)
return false;
t=_rowIndex[idx]=_rows;
}
}
else if (c>127)
throw new IllegalArgumentException("non ascii character");
else
{
if (_bigIndex==null)
_bigIndex=new char[_value.length][];
if (t>=_bigIndex.length)
return false;
char[] big=_bigIndex[t];
if (big==null)
big=_bigIndex[t]=new char[128];
t=big[c];
if (t==0)
{
if (_rows==_value.length)
return false;
t=big[c]=++_rows;
}
}
}
if (t>=_key.length)
{
_rows=(char)_key.length;
return false;
}
_key[t]=v==null?null:s;
_value[t] = v;
return true;
}
/* ------------------------------------------------------------ */
@Override
public V get(String s, int offset, int len)
{
int t = 0;
for(int i=0; i < len; i++)
{
char c=s.charAt(offset+i);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
return null;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
t=big[c];
if (t==0)
return null;
}
}
return _value[t];
}
/* ------------------------------------------------------------ */
@Override
public V get(ByteBuffer b,int offset,int len)
{
int t = 0;
for(int i=0; i < len; i++)
{
byte c=b.get(offset+i);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
return null;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
t=big[c];
if (t==0)
return null;
}
}
return (V)_value[t];
}
/* ------------------------------------------------------------ */
@Override
public V getBest(byte[] b,int offset,int len)
{
return getBest(0,b,offset,len);
}
/* ------------------------------------------------------------ */
@Override
public V getBest(ByteBuffer b,int offset,int len)
{
if (b.hasArray())
return getBest(0,b.array(),b.arrayOffset()+b.position()+offset,len);
return getBest(0,b,offset,len);
}
/* ------------------------------------------------------------ */
@Override
public V getBest(String s, int offset, int len)
{
return getBest(0,s,offset,len);
}
/* ------------------------------------------------------------ */
private V getBest(int t, String s, int offset, int len)
{
int pos=offset;
for(int i=0; i < len; i++)
{
char c=s.charAt(pos++);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,s,offset+i+1,len-i-1);
if (best!=null)
return best;
return (V)_value[t];
}
}
return (V)_value[t];
}
/* ------------------------------------------------------------ */
private V getBest(int t,byte[] b,int offset,int len)
{
for(int i=0; i < len; i++)
{
byte c=b[offset+i];
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,b,offset+i+1,len-i-1);
if (best!=null)
return best;
break;
}
}
return (V)_value[t];
}
private V getBest(int t,ByteBuffer b,int offset,int len)
{
int pos=b.position()+offset;
for(int i=0; i < len; i++)
{
byte c=b.get(pos++);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,b,offset+i+1,len-i-1);
if (best!=null)
return best;
break;
}
}
return (V)_value[t];
}
@Override
public String toString()
{
StringBuilder buf = new StringBuilder();
toString(buf,0);
if (buf.length()==0)
return "{}";
buf.setCharAt(0,'{');
buf.append('}');
return buf.toString();
}
private void toString(Appendable out, int t)
{
if (_value[t]!=null)
{
try
{
out.append(',');
out.append(_key[t]);
out.append('=');
out.append(_value[t].toString());
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
for(int i=0; i < ROW_SIZE; i++)
{
int idx=t*ROW_SIZE+i;
if (_rowIndex[idx] != 0)
toString(out,_rowIndex[idx]);
}
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big!=null)
{
for (int i:big)
if (i!=0)
toString(out,i);
}
}
@Override
public Set keySet()
{
Set keys = new HashSet<>();
keySet(keys,0);
return keys;
}
private void keySet(Set set, int t)
{
if (t<_value.length&&_value[t]!=null)
set.add(_key[t]);
for(int i=0; i < ROW_SIZE; i++)
{
int idx=t*ROW_SIZE+i;
if (idx<_rowIndex.length && _rowIndex[idx] != 0)
keySet(set,_rowIndex[idx]);
}
char[] big = _bigIndex==null||t>=_bigIndex.length?null:_bigIndex[t];
if (big!=null)
{
for (int i:big)
if (i!=0)
keySet(set,i);
}
}
@Override
public boolean isFull()
{
return _rows+1>=_key.length;
}
}