com.google.common.flogger.context.SegmentTrie Maven / Gradle / Ivy
/*
* Copyright (C) 2019 The Flogger Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.flogger.context;
import static com.google.common.flogger.util.Checks.checkNotNull;
import static java.lang.Math.min;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* A fast prefix-Trie implementation for segmented keys. For example given the mapping:
*
* {@code
* "foo" = FOO
* "foo.bar" = FOO_BAR
* }
*
* (where {@code '.'} is the segment separator) and a default value of {@code DEFAULT}, we get:
*
*
* - {@code find("foo") == FOO} (exact match)
*
- {@code find("foo.bar") == FOO_BAR} (exact match)
*
- {@code find("foo.foo") == FOO} (nearest parent)
*
- {@code find("bar") == DEFAULT} (no match)
*
*
* This implementation supports empty segments (e.g. keys like {@code ""} or {@code "..."})
* correctly and never allocates any memory during lookup. It also supports {@code null} values, but
* will not continue to look for a parent match if it finds one (if you want a mapping to be
* ignored, don't include it in the map at all).
*
*
This implementation was designed for high performance in situations in which the key/value map
* is small compared to the set of possible search keys and the likelihood of finding an exact match
* is relatively low (i.e. most lookups will match the nearest parent or not match anything).
*
*
This implementation is immutable and thread safe (the given map is copied during the
* construction of the Trie, and subsequent changes to the map are not reflected in the Trie).
*
*
Limitations: Separators are chars only (not Unicode code-points or strings) and cannot
* represent anything outside the basic-multilingual plane (e.g. no string or Emoji separators).
*/
// This class could easily be made a shareable utility class if need by anyone else.
abstract class SegmentTrie {
/**
* Returns a prefix Trie for the given mapping, where keys are segmented via the given separator.
*/
public static SegmentTrie create(
Map map, char separator, T defaultValue) {
switch (map.size()) {
case 0:
return new EmptyTrie(defaultValue);
case 1:
Map.Entry e = map.entrySet().iterator().next();
return new SingletonTrie(e.getKey(), e.getValue(), separator, defaultValue);
default:
return new SortedTrie(map, separator, defaultValue);
}
}
private final T defaultValue;
SegmentTrie(T defaultValue) {
this.defaultValue = defaultValue;
}
public final T getDefaultValue() {
return defaultValue;
}
/** Returns the value of the entry which most closely matches the given key. */
public abstract T find(String key);
/** Returns an immutable view of the entries in this Trie. */
public abstract Map getEntryMap();
// Trivial implementation for the empty map (always return the default value).
private static final class EmptyTrie extends SegmentTrie {
EmptyTrie(T defaultValue) {
super(defaultValue);
}
@Override
public T find(String k) {
return getDefaultValue();
}
@Override
public Map getEntryMap() {
return Collections.emptyMap();
}
}
// Trivial implementation for a map with one entry.
private static final class SingletonTrie extends SegmentTrie {
private final String key;
private final T value;
private final char separator;
SingletonTrie(String key, T value, char separator, T defaultValue) {
super(defaultValue);
this.key = checkNotNull(key, "key");
this.value = value;
this.separator = separator;
}
@Override
public T find(String k) {
// Remember that just being a prefix isn't enough, it must match up to the end of a segment.
return k.regionMatches(0, key, 0, key.length())
&& (k.length() == key.length() || k.charAt(key.length()) == separator)
? value
: getDefaultValue();
}
@Override
public Map getEntryMap() {
Map map = new HashMap();
map.put(key, value);
return Collections.unmodifiableMap(map);
}
}
// General purpose implementation using a custom binary search to reduce repeated re-comparing of
// keys. Nothing in or called by the "find" method is allowed to allocate any memory.
private static final class SortedTrie extends SegmentTrie {
private final String[] keys;
private final List values;
private final int[] parent;
private final char separator;
SortedTrie(Map entries, char separator, T defaultValue) {
super(defaultValue);
TreeMap sorted = new TreeMap(entries);
this.keys = sorted.keySet().toArray(new String[0]);
this.values = new ArrayList(sorted.values());
this.parent = buildParentMap(keys, separator);
this.separator = separator;
}
@Override
public T find(String key) {
int keyLen = key.length();
// Find the left-hand-side bound and get the size of the common prefix.
int lhsIdx = 0;
int lhsPrefix = prefixCompare(key, keys[lhsIdx], 0);
if (lhsPrefix == keyLen) {
// If equal, just return the element.
return values.get(lhsIdx);
}
if (lhsPrefix < 0) {
// If the key is before the first element it has no parent.
return getDefaultValue();
}
// Find the right-hand-side bound and get the size of the common prefix.
int rhsIdx = keys.length - 1;
int rhsPrefix = prefixCompare(key, keys[rhsIdx], 0);
if (rhsPrefix == keyLen) {
// If equal, just return the element.
return values.get(rhsIdx);
}
if (rhsPrefix >= 0) {
// If the key is after the last element it may have a parent.
return findParent(key, rhsIdx, rhsPrefix);
}
// If rhsPrefix is negative, it's the bitwise-NOT of what we want.
rhsPrefix = ~rhsPrefix;
// Binary search: At the top of the loop, lhsPrefix & rhsPrefix are positive.
while (true) {
// Determine the pivot index.
// NOTE: In theory we might be able to improve performance by biasing the pivot index
// towards the side with the larger common prefix length.
int midIdx = (lhsIdx + rhsIdx) >>> 1;
if (midIdx == lhsIdx) {
// No match found: The left-hand-side is the nearest lexicographical entry (but not
// equal), but we know that if the search key has a parent in the trie, then it must be
// a parent of this entry (even if this entry is not a direct sibling).
return findParent(key, lhsIdx, lhsPrefix);
}
// Find the prefix length of the pivot value (using the minimum prefix length of the
// current bounds to limit the work done).
int midPrefix = prefixCompare(key, keys[midIdx], min(lhsPrefix, rhsPrefix));
if (keyLen == midPrefix) {
// If equal, just return the element.
return values.get(midIdx);
}
if (midPrefix >= 0) {
// key > pivot, so reset left-hand bound
lhsIdx = midIdx;
lhsPrefix = midPrefix;
} else {
// key < pivot, so reset right-hand bound
rhsIdx = midIdx;
rhsPrefix = ~midPrefix;
}
}
}
/**
* Finds the value of the nearest parent of the given key, starting at the element
* lexicographically preceding the key (but which is not equal to the key).
*
* @param k the key whose parent value we wish to find.
* @param idx the index of the closest matching key in the trie ({@code k < keys[idx]}).
* @param len the common prefix length between {@code k} and {@code keys[idx]}.
* @return the value of the nearest parent of {@code k}.
*/
private T findParent(String k, int idx, int len) {
while (!isParent(keys[idx], k, len)) {
idx = parent[idx];
if (idx == -1) {
return getDefaultValue();
}
}
return values.get(idx);
}
/**
* Determines if a given candidate value {@code p} is the parent of a key {@code k}.
*
* We know that {@code p < k} (lexicographically) and (importantly) {@code p != k}. We also
* know that {@code len} is common prefix length.
*
*
Thus either:
*
*
* - The common prefix is a strict prefix of k (i.e. {@code k.length() > len}).
*
- The common prefix is equal to {@code k}, but {@code p} must be longer (or else {@code p
* == k}).
*
*
* Thus if {@code (p.length <= len)} then {@code (k.length() > p.length())}.
*
* @param p the candidate parent key to check.
* @param k the key whose parent we are looking for.
* @param len the maximum length of any possible parent of {@code k}.
*/
private boolean isParent(String p, String k, int len) {
return p.length() <= len && k.charAt(p.length()) == separator;
}
/**
* Returns the common prefix between two strings, encoding the returned value to indicate
* lexicographical order. That is:
*
*
* - If {@code lhs >= rhs}, the returned value is the common prefix length.
*
- If {@code lhs < rhs}, the returned value is the bitwise-NOT of the common prefix
* length.
*
*
* This permits the function to be used for both comparison, and for determining the common
* prefix length (if the returned prefix length is non-negative and equal to {@code
* lhs.length()} then {@code lhs == rhs}).
*
*
By allowing a known existing lower bound for the prefix length to be provided, this method
* can skip re-comparing the beginning of values repeatedly when used in a binary search. The
* given lower bound value is expected to be the result of previous calls the this function (or
* {@code 0}).
*
* @param lhs first value to compare.
* @param rhs second value to compare.
* @param start a lower bound for the common prefix length of the given keys, which must be
* {@code <= min(lhs.length(), rhs.length())}.
* @return the common prefix length, encoded to indicate lexicographical ordering.
*/
private static int prefixCompare(String lhs, String rhs, int start) {
if (start < 0) {
throw new IllegalStateException("lhs=" + lhs + ", rhs=" + rhs + ", start=" + start);
}
int len = min(lhs.length(), rhs.length());
for (int n = start; n < len; n++) {
int diff = lhs.charAt(n) - rhs.charAt(n);
if (diff != 0) {
return diff < 0 ? ~n : n;
}
}
return (len < rhs.length()) ? ~len : len;
}
/**
* Builds an index mapping array {@code pmap} such that {@code pmap[idx]} is the index of the
* parent element of {@code keys[idx]}, or {@code -1} if no parent exists.
*/
private static int[] buildParentMap(String[] keys, char separator) {
int[] pmap = new int[keys.length];
// The first key cannot have a parent.
pmap[0] = -1;
for (int n = 1; n < keys.length; n++) {
// Assume no parent will be found (just makes things a bit easier later).
pmap[n] = -1;
// Generate each parent key in turn until a match is found.
String key = keys[n];
for (int sidx = key.lastIndexOf(separator); sidx >= 0; sidx = key.lastIndexOf(separator)) {
key = key.substring(0, sidx);
int i = Arrays.binarySearch(keys, 0, n, key);
if (i >= 0) {
// Match found, so set index and exit.
pmap[n] = i;
break;
}
}
}
return pmap;
}
@Override
public Map getEntryMap() {
Map map = new LinkedHashMap();
for (int n = 0; n < keys.length; n++) {
map.put(keys[n], values.get(n));
}
return Collections.unmodifiableMap(map);
}
}
}