All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.inject.internal.aop.ImmutableStringTrie Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2020 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.inject.internal.aop;

import static java.util.Arrays.binarySearch;

import java.util.Collection;
import java.util.function.ToIntFunction;

/**
 * Immutable space-efficient trie that provides a quick lookup index for a sorted set of non empty
 * strings. It assumes only those strings will be queried and therefore may produce false-positive
 * results for strings not in the array.
 *
 * 

Each node of the tree is represented as a series of {@code char}s using this layout: * *

 * +---------------------------------+
 * | number of branches              |
 * +---------------------------------+---------------------------------+----
 * | char for branch 0               | char for branch 1               | ...
 * +---------------------------------+---------------------------------+----
 * | key-delta/leaf/bud for branch 0 | key-delta/leaf/bud for branch 1 | ...
 * +---------------------------------+---------------------------------+----
 * | offset to jump to branch 1      | offset to jump to branch 2      | ...
 * +---------------------------------+---------------------------------+----
 * 
* * Each node is immediately followed by its child nodes according to branch order. * *

The key-delta is used to skip over a section of the input key when we know it should always * match given the recently matched char (assumes only strings from the original list are queried). * *

Leaves mark a definite end of the match, while buds mark a potential end which could continue * down the trie if there are more characters to match. The key-delta for buds is implicitly 1. * *

The jump for branch 0 is assumed to be 0 and is always ommitted, that is any continuation of * the trie for branch 0 immediately follows the current node. The entire jump section is omitted * when all the branches from a node are leaves. * *

Simple example trie with 2 strings "getValue" and "setValue": * *

 * +---+---+---+--------+--------+
 * | 2 | g | s | 0x8000 | 0x8001 |
 * +---+---+---+--------+--------+
 * 
* * In this case the first character is enough to determine the index result. * *

Example of a trie with a 'bud' that contains 2 strings "getName" and "getNameAndValue": * *

 * +---+---+---+---+---+--------+---+---+--------+
 * | 1 | g | 6 | 1 | e | 0x4000 | 1 | A | 0x8001 |
 * +---+---+---+---+---+--------+---+---+--------+
 * 
* * After matching 'g' we skip to the end of 'getName' before checking if there are any more * characters to match. * *

More complex example with 3 strings "getName", "getValue", "getVersion": * *

 * +---+---+---+---+---+---+--------+---+---+---+---+---+--------+--------+
 * | 1 | g | 3 | 2 | N | V | 0x8000 | 1 | 0 | 2 | a | e | 0x8001 | 0x8002 |
 * +---+---+---+---+---+---+--------+---+---+---+---+---+--------+--------+
 * 
* * After matching 'g' we skip past the 'get'. If the next character is 'N' we know this is 'getName' * otherwise we skip over the 'V' and jump to the last check between '...alue' and '...ersion'. * * @author [email protected] (Stuart McCulloch) */ final class ImmutableStringTrie implements ToIntFunction { private static int singletonTrie(String key) { return 0; } /** Marks a leaf in the trie, where the rest of the bits are the index to be returned. */ private static final char LEAF_MARKER = 0x8000; /** Marks a 'bud' in the tree; the same as a leaf except the trie continues beneath it. */ private static final char BUD_MARKER = 0x4000; /** Maximum number of rows that can be indexed by a single trie. */ private static final int MAX_ROWS_PER_TRIE = 0x4000; /** The compressed trie. */ private final char[] trie; /** * Returns the index assigned in the trie to the given string. * *

Note: a return value of {@code -1} means the string is definitely not in the trie, but a * non-negative index may be returned for strings that closely match those in the trie. This is * acceptable because we will only call this method with strings that we know exist in the trie. */ @Override public int applyAsInt(String key) { int keyLength = key.length(); int keyIndex = 0; int dataIndex = 0; char[] data = trie; while (keyIndex < keyLength) { // trie is ordered, so we can use binary search to pick the right branch int branchCount = data[dataIndex++]; int branchIndex = binarySearch(data, dataIndex, dataIndex + branchCount, key.charAt(keyIndex)); if (branchIndex < 0) { break; // definitely no match } int resultIndex = branchIndex + branchCount; char result = data[resultIndex]; if ((result & LEAF_MARKER) != 0) { return result & ~LEAF_MARKER; } // 'buds' are just like leaves unless the key still has characters left if ((result & BUD_MARKER) != 0) { if (keyIndex == keyLength - 1) { return result & ~BUD_MARKER; } result = 1; // more characters to match, continue search with next character } // move the key to the next potential decision point keyIndex += result; // move the data to the appropriate branch... if (branchIndex > dataIndex) { int jumpIndex = resultIndex + branchCount - 1; dataIndex += data[jumpIndex]; } // ...always include moving past the current node dataIndex += (branchCount * 3) - 1; } return -1; } /** * Builds an immutable trie that indexes the given table of strings. * *

The table of strings must be sorted in lexical order. */ public static ToIntFunction buildTrie(Collection table) { int numRows = table.size(); if (numRows > 1) { return buildTrie(new StringBuilder(), table.toArray(new String[numRows]), 0, numRows); } return ImmutableStringTrie::singletonTrie; } /** Builds a trie, overflowing to additional tries if there are too many rows */ private static ToIntFunction buildTrie( StringBuilder buf, String[] table, int row, int rowLimit) { int trieLimit = row + MAX_ROWS_PER_TRIE; if (rowLimit <= trieLimit) { buildSubTrie(buf, table, 0, row, rowLimit); char[] data = new char[buf.length()]; buf.getChars(0, data.length, data, 0); return new ImmutableStringTrie(data); } // overflow, build as big a trie as we can and put the rest in additional tries buildSubTrie(buf, table, 0, row, trieLimit); char[] data = new char[buf.length()]; buf.getChars(0, data.length, data, 0); buf.setLength(0); // reset buffer for re-use return new Overflow(data, table[trieLimit], buildTrie(buf, table, trieLimit, rowLimit)); } ImmutableStringTrie(char[] data) { this.trie = data; } /** Recursively builds a trie for a slice of rows at a particular column. */ private static void buildSubTrie( StringBuilder buf, String[] table, int column, int row, int rowLimit) { int trieStart = buf.length(); int prevRow = row; int branchCount = 0; int nextJump = 0; boolean allLeaves = true; while (prevRow < rowLimit) { String cells = table[prevRow]; int columnLimit = cells.length(); char pivot = cells.charAt(column); // find the row that marks the start of the next branch, and the end of this one int nextRow = nextPivotRow(table, pivot, column, prevRow, rowLimit); // find the column along this branch that marks the next decision point/pivot int nextColumn = nextPivotColumn(table, column, prevRow, nextRow); // adjust pivot point if it would involve adding a bud spanning more than one column if (nextColumn == columnLimit && nextColumn - column > 1 && nextRow - prevRow > 1) { // move it back so this becomes a jump branch followed immediately by sub-trie bud nextColumn--; } // record the character for this branch int branchIndex = trieStart + branchCount; buf.insert(branchIndex, pivot); int resultIndex = branchIndex + 1 + branchCount; // any sub tries will start after the result (to be inserted) int subTrieStart = buf.length() + 1; if (nextColumn < columnLimit) { // record key-delta and process rest of the row as sub trie buf.insert(resultIndex, (char) (nextColumn - column)); buildSubTrie(buf, table, nextColumn, prevRow, nextRow); allLeaves = false; } else { // process rest of next row as sub trie to see if this row ends in a leaf/buf buildSubTrie(buf, table, nextColumn, prevRow + 1, nextRow); // must be leaf if sub trie doesn't exist, ie. it wasn't added to buffer boolean isLeaf = subTrieStart > buf.length(); char marker = isLeaf ? LEAF_MARKER : BUD_MARKER; buf.insert(resultIndex, (char) ((prevRow & (MAX_ROWS_PER_TRIE - 1)) | marker)); allLeaves = allLeaves && isLeaf; } if (nextRow < rowLimit) { // child sub-tries have been added, so can now calculate jump to next branch int jumpIndex = resultIndex + 1 + branchCount; nextJump += buf.length() - subTrieStart; buf.insert(jumpIndex, (char) nextJump); } prevRow = nextRow; branchCount++; } if (branchCount > 0) { buf.insert(trieStart, (char) branchCount); if (allLeaves) { // no need for jumps when every branch is a leaf int jumpStart = trieStart + 1 + (branchCount * 2); buf.delete(jumpStart, jumpStart + branchCount); } } } /** * Finds the next row that has a different character in the selected column to the given one, or * is too short to include the column. This determines the span of rows that fall under the given * character in the trie. * *

Returns the row just after the end of the range if all rows have the same character. */ private static int nextPivotRow(String[] table, char pivot, int column, int row, int rowLimit) { for (int r = row + 1; r < rowLimit; r++) { String cells = table[r]; if (cells.length() <= column || cells.charAt(column) != pivot) { return r; } } return rowLimit; } /** * Finds the next column in the current row whose character differs in at least one other row. * This helps identify the longest common prefix from the current pivot point to the next one. * *

Returns the column just after the end of the current row if all rows are identical. */ private static int nextPivotColumn(String[] table, int column, int row, int rowLimit) { String cells = table[row]; int columnLimit = cells.length(); for (int c = column + 1; c < columnLimit; c++) { if (nextPivotRow(table, cells.charAt(c), c, row, rowLimit) < rowLimit) { return c; } } return columnLimit; } /** Immutable trie that delegates searches that lie outside its range to an overflow trie. */ private static final class Overflow implements ToIntFunction { private final ImmutableStringTrie trie; private final String overflowKey; private final ToIntFunction next; Overflow(char[] data, String overflowKey, ToIntFunction next) { this.trie = new ImmutableStringTrie(data); this.overflowKey = overflowKey; this.next = next; } @Override public int applyAsInt(String key) { return key.compareTo(overflowKey) < 0 ? trie.applyAsInt(key) : MAX_ROWS_PER_TRIE + next.applyAsInt(key); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy