All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fireflysource.common.collection.trie.ArrayTrie Maven / Gradle / Ivy

There is a newer version: 5.0.2
Show newest version
package com.fireflysource.common.collection.trie;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

/**
 * 

A Trie String lookup data structure using a fixed size array.

*

This implementation is always case-insensitive and is optimal for * a few fixed strings with few special characters. The * Trie is stored in an array of lookup tables, each indexed by the * next character of the key. Frequently used characters directly * index in each lookup table, whilst infrequently used characters * must use a big character table. *

*

This Trie is very space efficient if the key characters are * from ' ', '+', '-', ':', ';', '.', 'A' to 'Z' or 'a' to 'z'. * Other ISO-8859-1 characters can be used by the key, but less space * efficiently. *

*

This Trie is not Threadsafe and contains no mutual exclusion * or deliberate memory barriers. It is intended for an ArrayTrie to be * built by a single thread and then used concurrently by multiple threads * and not mutated during that access. If concurrent mutations of the * Trie is required external locks need to be applied. *

* * @param the element of entry */ public class ArrayTrie extends AbstractTrie { /** * The Size of a Trie row is how many characters can be looked * up directly without going to a big index. This is set at * 32 to cover case-insensitive alphabet and a few other common * characters. */ private static final int ROW_SIZE = 32; /** * The index lookup table, this maps a character as a byte * (ISO-8859-1 or UTF8) to an index within a Trie row */ private static final int[] LOOKUP = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F /*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1, /*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1, /*4*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /*6*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, }; /** * The Trie rows in a single array which allows a lookup of row,character * to the next row in the Trie. This is actually a 2 dimensional * array that has been flattened to achieve locality of reference. * The first ROW_SIZE entries are for row 0, then next ROW_SIZE * entries are for row 1 etc. So in general instead of using * rows[row][index], we use rows[row*ROW_SIZE+index] to look up * the next row for a given character. *

* The array is of characters rather than integers to save space. */ private final char[] rowIndex; /** * The key (if any) for a Trie row. * A row may be a leaf, a node or both in the Trie tree. */ private final String[] key; /** * The value (if any) for a Trie row. * A row may be a leaf, a node or both in the Trie tree. */ private final V[] value; /** * A big index for each row. * If a character outside of the lookup map needs, * then a big index will be created for the row, with * 256 entries, one for each possible byte. */ private char[][] bigIndex; /** * The number of rows allocated */ private char rows; public ArrayTrie() { this(128); } /** * @param capacity The capacity of the trie, which at the worst case * is the total number of characters of all keys stored in the Trie. * The capacity needed is dependent of the shared prefixes of the keys. * For example, a capacity of 6 nodes require to store the keys "foo" * and "bar", but a capacity of only 4 is required to * store "bar" and "bat". */ @SuppressWarnings("unchecked") public ArrayTrie(int capacity) { super(true); value = (V[]) new Object[capacity]; rowIndex = new char[capacity * 32]; key = new String[capacity]; } @Override public void clear() { rows = 0; Arrays.fill(value, null); Arrays.fill(rowIndex, (char) 0); Arrays.fill(key, null); } @Override public boolean put(String s, V v) { int t = 0; int k; int limit = s.length(); for (k = 0; k < limit; k++) { char c = s.charAt(k); int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; t = rowIndex[idx]; if (t == 0) { if (++rows >= value.length) return false; t = rowIndex[idx] = rows; } } else if (c > 127) throw new IllegalArgumentException("non ascii character"); else { if (bigIndex == null) bigIndex = new char[value.length][]; if (t >= bigIndex.length) return false; char[] big = bigIndex[t]; if (big == null) big = bigIndex[t] = new char[128]; t = big[c]; if (t == 0) { if (rows == value.length) return false; t = big[c] = ++rows; } } } if (t >= key.length) { rows = (char) key.length; return false; } key[t] = v == null ? null : s; value[t] = v; return true; } @Override public V get(String s, int offset, int len) { int t = 0; for (int i = 0; i < len; i++) { char c = s.charAt(offset + i); int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; t = rowIndex[idx]; if (t == 0) return null; } else { char[] big = bigIndex == null ? null : bigIndex[t]; if (big == null) return null; t = big[c]; if (t == 0) return null; } } return value[t]; } @Override public V get(ByteBuffer b, int offset, int len) { int t = 0; for (int i = 0; i < len; i++) { byte c = b.get(offset + i); int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; t = rowIndex[idx]; if (t == 0) return null; } else { char[] big = bigIndex == null ? null : bigIndex[t]; if (big == null) return null; t = big[c]; if (t == 0) return null; } } return value[t]; } @Override public V getBest(byte[] b, int offset, int len) { return getBest(0, b, offset, len); } @Override public V getBest(ByteBuffer b, int offset, int len) { if (b.hasArray()) return getBest(0, b.array(), b.arrayOffset() + b.position() + offset, len); return getBest(0, b, offset, len); } @Override public V getBest(String s, int offset, int len) { return getBest(0, s, offset, len); } private V getBest(int t, String s, int offset, int len) { int pos = offset; for (int i = 0; i < len; i++) { char c = s.charAt(pos++); int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; int nt = rowIndex[idx]; if (nt == 0) break; t = nt; } else { char[] big = bigIndex == null ? null : bigIndex[t]; if (big == null) return null; int nt = big[c]; if (nt == 0) break; t = nt; } // Is the next Trie is a match if (key[t] != null) { // Recurse so we can remember this possibility V best = getBest(t, s, offset + i + 1, len - i - 1); if (best != null) return best; return value[t]; } } return value[t]; } private V getBest(int t, byte[] b, int offset, int len) { for (int i = 0; i < len; i++) { byte c = b[offset + i]; int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; int nt = rowIndex[idx]; if (nt == 0) break; t = nt; } else { char[] big = bigIndex == null ? null : bigIndex[t]; if (big == null) return null; int nt = big[c]; if (nt == 0) break; t = nt; } // Is the next Trie is a match if (key[t] != null) { // Recurse so we can remember this possibility V best = getBest(t, b, offset + i + 1, len - i - 1); if (best != null) return best; break; } } return value[t]; } private V getBest(int t, ByteBuffer b, int offset, int len) { int pos = b.position() + offset; for (int i = 0; i < len; i++) { byte c = b.get(pos++); int index = LOOKUP[c & 0x7f]; if (index >= 0) { int idx = t * ROW_SIZE + index; int nt = rowIndex[idx]; if (nt == 0) break; t = nt; } else { char[] big = bigIndex == null ? null : bigIndex[t]; if (big == null) return null; int nt = big[c]; if (nt == 0) break; t = nt; } // Is the next Trie is a match if (key[t] != null) { // Recurse so we can remember this possibility V best = getBest(t, b, offset + i + 1, len - i - 1); if (best != null) return best; break; } } return value[t]; } @Override public String toString() { StringBuilder buf = new StringBuilder(); toString(buf, 0); if (buf.length() == 0) return "{}"; buf.setCharAt(0, '{'); buf.append('}'); return buf.toString(); } private void toString(Appendable out, int t) { if (value[t] != null) { try { out.append(','); out.append(key[t]); out.append('='); out.append(value[t].toString()); } catch (IOException e) { throw new RuntimeException(e); } } for (int i = 0; i < ROW_SIZE; i++) { int idx = t * ROW_SIZE + i; if (rowIndex[idx] != 0) toString(out, rowIndex[idx]); } char[] big = bigIndex == null ? null : bigIndex[t]; if (big != null) { for (int i : big) if (i != 0) toString(out, i); } } @Override public Set keySet() { Set keys = new HashSet<>(); keySet(keys, 0); return keys; } private void keySet(Set set, int t) { if (t < value.length && value[t] != null) set.add(key[t]); for (int i = 0; i < ROW_SIZE; i++) { int idx = t * ROW_SIZE + i; if (idx < rowIndex.length && rowIndex[idx] != 0) keySet(set, rowIndex[idx]); } char[] big = bigIndex == null || t >= bigIndex.length ? null : bigIndex[t]; if (big != null) { for (int i : big) if (i != 0) keySet(set, i); } } @Override public boolean isFull() { return rows + 1 >= key.length; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy