com.github.benmanes.caffeine.cache.FrequencySketch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of caffeine Show documentation
Show all versions of caffeine Show documentation
A high performance caching library
/*
* Copyright 2015 Ben Manes. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.benmanes.caffeine.cache;
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.NotThreadSafe;
/**
* A probabilistic multiset for estimating the popularity of an element within a time window. The
* maximum frequency of an element is limited to 15 (4-bits) and an aging process periodically
* halves the popularity of all elements.
*
* @author [email protected] (Ben Manes)
*/
@NotThreadSafe
final class FrequencySketch {
/*
* This class maintains a 4-bit CountMinSketch [1] with periodic aging to provide the popularity
* history for the TinyLfu admission policy [2]. The time and space efficiency of the sketch
* allows it to cheaply estimate the frequency of an entry in a stream of cache access events.
*
* The counter matrix is represented as a single dimensional array holding 16 counters per slot. A
* fixed depth of four balances the accuracy and cost, resulting in a width of four times the
* length of the array. To retain an accurate estimation the array's length equals the maximum
* number of entries in the cache, increased to the closest power-of-two to exploit more efficient
* bit masking. This configuration results in a confidence of 93.75% and error bound of e / width.
*
* The frequency of all entries is aged periodically using a sampling window based on the maximum
* number of entries in the cache. This is referred to as the reset operation by TinyLfu and keeps
* the sketch fresh by dividing all counters by two and subtracting based on the number of odd
* counters found. The O(n) cost of aging is amortized, ideal for hardware prefetching, and uses
* inexpensive bit manipulations per array location.
*
* A per instance smear is used to help protect against hash flooding [3], which would result
* in the admission policy always rejecting new candidates. The use of a pseudo random hashing
* function resolves the concern of a denial of service attack by exploiting the hash codes.
*
* [1] An Improved Data Stream Summary: The Count-Min Sketch and its Applications
* http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
* [2] TinyLFU: A Highly Efficient Cache Admission Policy
* http://arxiv.org/pdf/1512.00727.pdf
* [3] Denial of Service via Algorithmic Complexity Attack
* https://www.usenix.org/legacy/events/sec03/tech/full_papers/crosby/crosby.pdf
*/
static final long[] SEED = new long[] { // A mixture of seeds from FNV-1a, CityHash, and Murmur3
0xc3a5c85c97cb3127L, 0xb492b66fbe98f273L, 0x9ae16a3b2f90404fL, 0xcbf29ce484222325L};
static final long RESET_MASK = 0x7777777777777777L;
static final long ONE_MASK = 0x1111111111111111L;
final int randomSeed;
int sampleSize;
int tableMask;
long[] table;
int size;
/**
* Creates a lazily initialized frequency sketch, requiring {@link #ensureCapacity} be called
* when the maximum size of the cache has been determined.
*/
public FrequencySketch() {
int seed = ThreadLocalRandom.current().nextInt();
this.randomSeed = ((seed & 1) == 0) ? seed + 1 : seed;
}
/**
* Initializes and increases the capacity of this FrequencySketch instance, if necessary,
* to ensure that it can accurately estimate the popularity of elements given the maximum size of
* the cache. This operation forgets all previous counts when resizing.
*
* @param maximumSize the maximum size of the cache
*/
public void ensureCapacity(@Nonnegative long maximumSize) {
Caffeine.requireArgument(maximumSize >= 0);
int maximum = (int) Math.min(maximumSize, Integer.MAX_VALUE >>> 1);
if ((table != null) && (table.length >= maximum)) {
return;
}
table = new long[(maximum == 0) ? 1 : ceilingNextPowerOfTwo(maximum)];
tableMask = Math.max(0, table.length - 1);
sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);
if (sampleSize <= 0) {
sampleSize = Integer.MAX_VALUE;
}
size = 0;
}
/**
* Returns if the sketch has not yet been initialized, requiring that {@link #ensureCapacity} is
* called before it begins to track frequencies.
*/
public boolean isNotInitialized() {
return (table == null);
}
/**
* Returns the estimated number of occurrences of an element, up to the maximum (15).
*
* @param e the element to count occurrences of
* @return the estimated number of occurrences of the element; possibly zero but never negative
*/
@Nonnegative
public int frequency(@Nonnull E e) {
if (isNotInitialized()) {
return 0;
}
int hash = spread(e.hashCode());
int start = (hash & 3) << 2;
int frequency = Integer.MAX_VALUE;
for (int i = 0; i < 4; i++) {
int index = indexOf(hash, i);
int count = (int) ((table[index] >>> ((start + i) << 2)) & 0xfL);
frequency = Math.min(frequency, count);
}
return frequency;
}
/**
* Increments the popularity of the element if it does not exceed the maximum (15). The popularity
* of all elements will be periodically down sampled when the observed events exceeds a threshold.
* This process provides a frequency aging to allow expired long term entries to fade away.
*
* @param e the element to add
*/
public void increment(@Nonnull E e) {
if (isNotInitialized()) {
return;
}
int hash = spread(e.hashCode());
int start = (hash & 3) << 2;
// Loop unrolling improves throughput by 5m ops/s
int index0 = indexOf(hash, 0);
int index1 = indexOf(hash, 1);
int index2 = indexOf(hash, 2);
int index3 = indexOf(hash, 3);
boolean added = incrementAt(index0, start);
added |= incrementAt(index1, start + 1);
added |= incrementAt(index2, start + 2);
added |= incrementAt(index3, start + 3);
if (added && (++size == sampleSize)) {
reset();
}
}
/**
* Increments the specified counter by 1 if it is not already at the maximum value (15).
*
* @param i the table index (16 counters)
* @param j the counter to increment
* @return if incremented
*/
boolean incrementAt(int i, int j) {
int offset = j << 2;
long mask = (0xfL << offset);
if ((table[i] & mask) != mask) {
table[i] += (1L << offset);
return true;
}
return false;
}
/** Reduces every counter by half of its original value. */
void reset() {
int count = 0;
for (int i = 0; i < table.length; i++) {
count += Long.bitCount(table[i] & ONE_MASK);
table[i] = (table[i] >>> 1) & RESET_MASK;
}
size = (size >>> 1) - (count >>> 2);
}
/**
* Returns the table index for the counter at the specified depth.
*
* @param item the element's hash
* @param i the counter depth
* @return the table index
*/
int indexOf(int item, int i) {
long hash = SEED[i] * item;
hash += hash >> 32;
return ((int) hash) & tableMask;
}
/**
* Applies a supplemental hash function to a given hashCode, which defends against poor quality
* hash functions.
*/
int spread(int x) {
x = ((x >>> 16) ^ x) * 0x45d9f3b;
x = ((x >>> 16) ^ x) * randomSeed;
return (x >>> 16) ^ x;
}
static int ceilingNextPowerOfTwo(int x) {
// From Hacker's Delight, Chapter 3, Harry S. Warren Jr.
return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(x - 1));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy