org.elasticsearch.common.util.AbstractPagedHashMap Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.15.1
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.common.util;

import org.apache.lucene.util.hppc.BitMixer;
import org.elasticsearch.core.Releasable;

/**
 * Base implementation for a hash table that is paged, recycles arrays and grows in-place.
 */
abstract class AbstractPagedHashMap implements Releasable {

    // Open addressing typically requires having smaller load factors compared to linked lists because
    // collisions may result into worse lookup performance.
    static final float DEFAULT_MAX_LOAD_FACTOR = 0.6f;

    static long hash(long value) {
        // Don't use the value directly. Under some cases eg dates, it could be that the low bits don't carry much value and we would like
        // all bits of the hash to carry as much value
        return BitMixer.mix64(value);
    }

    final BigArrays bigArrays;
    final float maxLoadFactor;
    long size, maxSize;
    long mask;

    AbstractPagedHashMap(long capacity, float maxLoadFactor, BigArrays bigArrays) {
        if (capacity < 0) {
            throw new IllegalArgumentException("capacity must be >= 0");
        }
        if (maxLoadFactor <= 0 || maxLoadFactor >= 1) {
            throw new IllegalArgumentException("maxLoadFactor must be > 0 and < 1");
        }
        this.bigArrays = bigArrays;
        this.maxLoadFactor = maxLoadFactor;
        long buckets = 1L + (long) (capacity / maxLoadFactor);
        buckets = Math.max(1, Long.highestOneBit(buckets - 1) << 1); // next power of two
        assert buckets == Long.highestOneBit(buckets);
        maxSize = (long) (buckets * maxLoadFactor);
        assert maxSize >= capacity;
        size = 0;
        mask = buckets - 1;
    }

    /**
     * Return the number of allocated slots to store this hash table.
     */
    public long capacity() {
        return mask + 1;
    }

    /**
     * Return the number of longs in this hash table.
     */
    public long size() {
        return size;
    }

    static long slot(long hash, long mask) {
        return hash & mask;
    }

    static long nextSlot(long curSlot, long mask) {
        return (curSlot + 1) & mask; // linear probing
    }

    /** Resize to the given capacity. */
    protected abstract void resize(long capacity);

    protected abstract boolean used(long bucket);

    /** Remove the entry at the given index and add it back */
    protected abstract void removeAndAdd(long index);

    protected final void grow() {
        // The difference of this implementation of grow() compared to standard hash tables is that we are growing in-place, which makes
        // the re-mapping of keys to slots a bit more tricky.
        assert size == maxSize;
        final long prevSize = size;
        final long buckets = capacity();
        // Resize arrays
        final long newBuckets = buckets << 1;
        assert newBuckets == Long.highestOneBit(newBuckets) : newBuckets; // power of 2
        resize(newBuckets);
        mask = newBuckets - 1;
        // First let's remap in-place: most data will be put in its final position directly
        for (long i = 0; i < buckets; ++i) {
            if (used(i)) {
                removeAndAdd(i);
            }
        }
        // The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
        // is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and collision
        // resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
        // allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
        for (long i = buckets; i < newBuckets; ++i) {
            if (used(i)) {
                removeAndAdd(i); // add it back
            } else {
                break;
            }
        }
        assert size == prevSize;
        maxSize = (long) (newBuckets * maxLoadFactor);
        assert size < maxSize;
    }

}