org.elasticsearch.common.util.AbstractPagedHashMap Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import com.carrotsearch.hppc.BitMixer;
import com.google.common.base.Preconditions;
import org.elasticsearch.common.lease.Releasable;
/**
* Base implementation for a hash table that is paged, recycles arrays and grows in-place.
*/
abstract class AbstractPagedHashMap implements Releasable {
// Open addressing typically requires having smaller load factors compared to linked lists because
// collisions may result into worse lookup performance.
static final float DEFAULT_MAX_LOAD_FACTOR = 0.6f;
static long hash(long value) {
// Don't use the value directly. Under some cases eg dates, it could be that the low bits don't carry much value and we would like
// all bits of the hash to carry as much value
return BitMixer.mix64(value);
}
static long hash(double value) {
return hash(Double.doubleToLongBits(value));
}
final BigArrays bigArrays;
final float maxLoadFactor;
long size, maxSize;
long mask;
AbstractPagedHashMap(long capacity, float maxLoadFactor, BigArrays bigArrays) {
Preconditions.checkArgument(capacity >= 0, "capacity must be >= 0");
Preconditions.checkArgument(maxLoadFactor > 0 && maxLoadFactor < 1, "maxLoadFactor must be > 0 and < 1");
this.bigArrays = bigArrays;
this.maxLoadFactor = maxLoadFactor;
long buckets = 1L + (long) (capacity / maxLoadFactor);
buckets = Math.max(1, Long.highestOneBit(buckets - 1) << 1); // next power of two
assert buckets == Long.highestOneBit(buckets);
maxSize = (long) (buckets * maxLoadFactor);
assert maxSize >= capacity;
size = 0;
mask = buckets - 1;
}
/**
* Return the number of allocated slots to store this hash table.
*/
public long capacity() {
return mask + 1;
}
/**
* Return the number of longs in this hash table.
*/
public long size() {
return size;
}
static long slot(long hash, long mask) {
return hash & mask;
}
static long nextSlot(long curSlot, long mask) {
return (curSlot + 1) & mask; // linear probing
}
/** Resize to the given capacity. */
protected abstract void resize(long capacity);
protected abstract boolean used(long bucket);
/** Remove the entry at the given index and add it back */
protected abstract void removeAndAdd(long index);
protected final void grow() {
// The difference of this implementation of grow() compared to standard hash tables is that we are growing in-place, which makes
// the re-mapping of keys to slots a bit more tricky.
assert size == maxSize;
final long prevSize = size;
final long buckets = capacity();
// Resize arrays
final long newBuckets = buckets << 1;
assert newBuckets == Long.highestOneBit(newBuckets) : newBuckets; // power of 2
resize(newBuckets);
mask = newBuckets - 1;
// First let's remap in-place: most data will be put in its final position directly
for (long i = 0; i < buckets; ++i) {
if (used(i)) {
removeAndAdd(i);
}
}
// The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
// is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and colision
// resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
// allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
for (long i = buckets; i < newBuckets; ++i) {
if (used(i)) {
removeAndAdd(i); // add it back
} else {
break;
}
}
assert size == prevSize;
maxSize = (long) (newBuckets * maxLoadFactor);
assert size < maxSize;
}
}