com.carrotsearch.hppcrt.sets.IntOpenCustomHashSet Maven / Gradle / Ivy
Show all versions of hppcrt Show documentation
package com.carrotsearch.hppcrt.sets;
import com.carrotsearch.hppcrt.*;
import com.carrotsearch.hppcrt.cursors.*;
import com.carrotsearch.hppcrt.predicates.*;
import com.carrotsearch.hppcrt.procedures.*;
import com.carrotsearch.hppcrt.strategies.*;
import com.carrotsearch.hppcrt.hash.*;
//If RH is defined, RobinHood Hashing is in effect :
/**
* A hash set of int
s, implemented using using open
* addressing with linear probing for collision resolution.
*
* The difference with {@link IntOpenHashSet} is that it uses a
* {@link IntHashingStrategy} to compare objects externally instead of using
* the built-in hashCode() / equals(). In particular, the management of null
* keys is up to the {@link IntHashingStrategy} implementation.
*
* The internal buffers of this implementation ({@link #keys}, etc...)
* are always allocated to the nearest size that is a power of two. When
* the capacity exceeds the given load factor, the buffer size is doubled.
*
* Important note. The implementation uses power-of-two tables and linear
* probing, which may cause poor performance (many collisions) if hash values are
* not properly distributed. Therefore, it is up to the {@link IntHashingStrategy} to
* assure good performance.
*
*
*
* @author This code is inspired by the collaboration and implementation in the fastutil project.
*
* Robin-Hood hashing algorithm is also used to minimize variance
* in insertion and search-related operations, for an all-around smother operation at the cost
* of smaller peak performance:
* - Pedro Celis (1986) for the original Robin-Hood hashing paper,
* - MoonPolySoft/Cliff Moon for the initial Robin-hood on HPPC implementation,
* - Vincent Sonnier for the present implementation using cached hashes.
*
*/
@javax.annotation.Generated(date = "2015-02-27T19:21:17+0100", value = "HPPC-RT generated from: IntOpenCustomHashSet.java")
public class IntOpenCustomHashSet
extends AbstractIntCollection
implements IntLookupContainer, IntSet, Cloneable
{
/**
* Minimum capacity for the map.
*/
public final static int MIN_CAPACITY = HashContainerUtils.MIN_CAPACITY;
/**
* Default capacity.
*/
public final static int DEFAULT_CAPACITY = HashContainerUtils.DEFAULT_CAPACITY;
/**
* Default load factor.
*/
public final static float DEFAULT_LOAD_FACTOR = HashContainerUtils.DEFAULT_LOAD_FACTOR;
/**
* Hash-indexed array holding all set entries.
*
* Direct set iteration: iterate {keys[i]} for i in [0; keys.length[ where keys[i] != 0/null, then also
* {0/null} is in the set if {@link #allocatedDefaultKey} = true.
*
*
* Direct iteration warning:
* If the iteration goal is to fill another hash container, please iterate {@link #keys} in reverse to prevent performance losses.
* @see #keys
*/
public int[] keys;
/**
* * Caches the hash value = HASH(keys[i]) & mask, if keys[i] != 0/null,
* for every index i.
* * @see #assigned
*/
protected int[] hash_cache;
/**
*True if key = 0/null is in the map.
*/
public boolean allocatedDefaultKey = false;
/**
* Cached number of assigned slots in {@link #keys}.
*/
protected int assigned;
/**
* The load factor for this map (fraction of allocated slots
* before the buffers must be rehashed or reallocated).
*/
protected float loadFactor;
/**
* Resize buffers when {@link #keys} hits this value.
*/
protected int resizeAt;
/**
* The most recent slot accessed in {@link #contains}.
*
* @see #contains
* @see #lkey
*/
protected int lastSlot;
/**
* Custom hashing strategy :
* comparisons and hash codes of keys will be computed
* with the strategy methods instead of the native Object equals() and hashCode() methods.
*/
protected final IntHashingStrategy hashStrategy;
/**
* Creates a hash set with the default capacity of {@value #DEFAULT_CAPACITY},
* load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link IntHashingStrategy}
*/
public IntOpenCustomHashSet(final IntHashingStrategy hashStrategy)
{
this(IntOpenCustomHashSet.DEFAULT_CAPACITY, IntOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy);
}
/**
* Creates a hash set with the given capacity,
* load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link IntHashingStrategy}
*/
public IntOpenCustomHashSet(final int initialCapacity, final IntHashingStrategy hashStrategy)
{
this(initialCapacity, IntOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy);
}
/**
* Creates a hash set with the given capacity and load factor, using the hashStrategy as {@link IntHashingStrategy}
*/
public IntOpenCustomHashSet(final int initialCapacity, final float loadFactor, final IntHashingStrategy hashStrategy)
{
//only accept not-null strategies.
if (hashStrategy != null)
{
this.hashStrategy = hashStrategy;
}
else {
throw new IllegalArgumentException("IntOpenCustomHashSet() cannot have a null hashStrategy !");
}
assert loadFactor > 0 && loadFactor <= 1 : "Load factor must be between (0, 1].";
this.loadFactor = loadFactor;
//take into account of the load factor to garantee no reallocations before reaching initialCapacity.
int internalCapacity = (int) (initialCapacity / loadFactor) + IntOpenCustomHashSet.MIN_CAPACITY;
//align on next power of two
internalCapacity = HashContainerUtils.roundCapacity(internalCapacity);
this.keys = (new int[internalCapacity]);
this.hash_cache = new int[internalCapacity];
//Take advantage of the rounding so that the resize occur a bit later than expected.
//allocate so that there is at least one slot that remains allocated = false
//this is compulsory to guarantee proper stop in searching loops
this.resizeAt = Math.max(3, (int) (internalCapacity * loadFactor)) - 2;
}
/**
* Creates a hash set from elements of another container. Default load factor is used.
*/
public IntOpenCustomHashSet(final IntContainer container, final IntHashingStrategy hashStrategy)
{
this(container.size(), hashStrategy);
addAll(container);
}
/**
* {@inheritDoc}
*/
@Override
public boolean add(int e)
{
if (e == (0)) {
if (this.allocatedDefaultKey) {
return false;
}
this.allocatedDefaultKey = true;
return true;
}
final int mask = this.keys.length - 1;
final IntHashingStrategy strategy = this.hashStrategy;
final int[] keys = this.keys;
//copied straight from fastutil "fast-path"
int slot;
int curr;
//1.1 The rehashed key slot is occupied...
if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(e)) & mask]) != (0)) {
//1.2 the occupied place is indeed key, return false
if (strategy.equals(curr, e)) {
return false;
}
//1.3 key is colliding, manage below :
}
else if (this.assigned < this.resizeAt) {
//1.4 key is not colliding, without resize, so insert, return true.
keys[slot] = e;
this.assigned++;
this.hash_cache[slot] = slot;
return true;
}
final int[] cached = this.hash_cache;
int tmpKey;
int tmpAllocated;
int initial_slot = slot;
int dist = 0;
int existing_distance = 0;
while ((keys[slot] != (0)))
{
if (strategy.equals(e, keys[slot]))
{
return false;
}
//re-shuffle keys to minimize variance
existing_distance = (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]);
if (dist > existing_distance)
{
//swap current (key, value, initial_slot) with slot places
tmpKey = keys[slot];
keys[slot] = e;
e = tmpKey;
tmpAllocated = cached[slot];
cached[slot] = initial_slot;
initial_slot = tmpAllocated;
dist = existing_distance;
}
slot = (slot + 1) & mask;
dist++;
}
// Check if we need to grow. If so, reallocate new data,
// fill in the last element and rehash.
if (this.assigned == this.resizeAt) {
expandAndAdd(e, slot);
}
else {
this.assigned++;
cached[slot] = initial_slot;
keys[slot] = e;
}
return true;
}
/**
* Adds two elements to the set.
*/
public int add(final int e1, final int e2)
{
int count = 0;
if (add(e1)) {
count++;
}
if (add(e2)) {
count++;
}
return count;
}
/**
* Vararg-signature method for adding elements to this set.
*
This method is handy, but costly if used in tight loops (anonymous
* array passing)
*
* @return Returns the number of elements that were added to the set
* (were not present in the set).
*/
public int add(final int... elements)
{
int count = 0;
for (final int e : elements) {
if (add(e)) {
count++;
}
}
return count;
}
/**
* Adds all elements from a given container to this set.
*
* @return Returns the number of elements actually added as a result of this
* call (not previously present in the set).
*/
public int addAll(final IntContainer container)
{
return addAll((Iterable extends IntCursor>) container);
}
/**
* Adds all elements from a given iterable to this set.
*
* @return Returns the number of elements actually added as a result of this
* call (not previously present in the set).
*/
public int addAll(final Iterable extends IntCursor> iterable)
{
int count = 0;
for (final IntCursor cursor : iterable)
{
if (add(cursor.value)) {
count++;
}
}
return count;
}
/**
* Expand the internal storage buffers (capacity) or rehash current
* keys and values if there are a lot of deleted slots.
*/
private void expandAndAdd(final int pendingKey, final int freeSlot)
{
assert this.assigned == this.resizeAt;
//default sentinel value is never in the keys[] array, so never trigger reallocs
assert (pendingKey != (0));
// Try to allocate new buffers first. If we OOM, it'll be now without
// leaving the data structure in an inconsistent state.
final int[] oldKeys = this.keys;
allocateBuffers(HashContainerUtils.nextCapacity(this.keys.length));
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
this.lastSlot = -1;
this.assigned++;
oldKeys[freeSlot] = pendingKey;
//Variables for adding
final int mask = this.keys.length - 1;
final IntHashingStrategy strategy = this.hashStrategy;
int e = (0);
//adding phase
int slot = -1;
final int[] keys = this.keys;
final int[] cached = this.hash_cache;
int tmpKey = (0);
int tmpAllocated = -1;
int initial_slot = -1;
int dist = -1;
int existing_distance = -1;
//iterate all the old arrays to add in the newly allocated buffers
//It is important to iterate backwards to minimize the conflict chain length !
for (int i = oldKeys.length; --i >= 0;)
{
if ((oldKeys[i] != (0)))
{
e = oldKeys[i];
slot = PhiMix.hash(strategy.computeHashCode(e)) & mask;
initial_slot = slot;
dist = 0;
while ((keys[slot] != (0)))
{
//re-shuffle keys to minimize variance
existing_distance = (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]);
if (dist > existing_distance)
{
//swap current (key, value, initial_slot) with slot places
tmpKey = keys[slot];
keys[slot] = e;
e = tmpKey;
tmpAllocated = cached[slot];
cached[slot] = initial_slot;
initial_slot = tmpAllocated;
dist = existing_distance;
} //endif
slot = (slot + 1) & mask;
dist++;
} //end while
//place it at that position
cached[slot] = initial_slot;
keys[slot] = e;
}
}
}
/**
* Allocate internal buffers for a given capacity.
*
* @param capacity New capacity (must be a power of two).
*/
private void allocateBuffers(final int capacity)
{
final int[] keys = (new int[capacity]);
final int[] allocated = new int[capacity];
this.keys = keys;
this.hash_cache = allocated;
//allocate so that there is at least one slot that remains allocated = false
//this is compulsory to guarantee proper stop in searching loops
this.resizeAt = Math.max(3, (int) (capacity * this.loadFactor)) - 2;
}
/**
* {@inheritDoc}
*/
@Override
public int removeAllOccurrences(final int key)
{
return remove(key) ? 1 : 0;
}
/**
* An alias for the (preferred) {@link #removeAllOccurrences}.
*/
public boolean remove(final int key)
{
if (key == (0)) {
if (this.allocatedDefaultKey) {
this.allocatedDefaultKey = false;
return true;
}
return false;
}
final int mask = this.keys.length - 1;
final IntHashingStrategy strategy = this.hashStrategy;
final int[] keys = this.keys;
//copied straight from fastutil "fast-path"
int slot;
int curr;
//1.1 The rehashed slot is free, nothing to remove, return false
if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(key)) & mask]) == (0)) {
return false;
}
//1.2) The rehashed entry is occupied by the key, remove it, return true
if (strategy.equals(curr, key)) {
this.assigned--;
shiftConflictingKeys(slot);
return true;
}
//2. Hash collision, search for the key along the path
slot = (slot + 1) & mask;
int dist = 0;
final int[] cached = this.hash_cache;
while ((keys[slot] != (0))
&& dist <= (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]) )
{
if (strategy.equals(key, keys[slot]))
{
this.assigned--;
shiftConflictingKeys(slot);
return true;
}
slot = (slot + 1) & mask;
dist++;
} //end while true
return false;
}
/**
* Shift all the slot-conflicting keys allocated to (and including) slot
.
*/
protected void shiftConflictingKeys(int slotCurr)
{
// Copied nearly verbatim from fastutil's impl.
final int mask = this.keys.length - 1;
int slotPrev, slotOther;
final IntHashingStrategy strategy = this.hashStrategy;
final int[] keys = this.keys;
final int[] cached = this.hash_cache;
while (true)
{
slotCurr = ((slotPrev = slotCurr) + 1) & mask;
while ((keys[slotCurr] != (0)))
{
//use the cached value, no need to recompute
slotOther = cached[slotCurr];
if (slotPrev <= slotCurr)
{
// We are on the right of the original slot.
if (slotPrev >= slotOther || slotOther > slotCurr) {
break;
}
}
else
{
// We have wrapped around.
if (slotPrev >= slotOther && slotOther > slotCurr) {
break;
}
}
slotCurr = (slotCurr + 1) & mask;
}
if (!(keys[slotCurr] != (0)))
{
break;
}
// Shift key/allocated pair.
keys[slotPrev] = keys[slotCurr];
cached[slotPrev] = cached[slotCurr];
}
//means not allocated
keys[slotPrev] = (0);
}
/**
* Returns the last key saved in a call to {@link #contains} if it returned true
.
* Precondition : {@link #contains} must have been called previously !
* @see #contains
*/
public int lkey()
{
if (this.lastSlot == -2) {
return (0);
}
assert this.lastSlot >= 0 : "Call containsKey() first.";
assert (this.keys[this.lastSlot] != (0)) : "Last call to exists did not have any associated value.";
return this.keys[this.lastSlot];
}
/**
* @return Returns the slot of the last key looked up in a call to {@link #contains} if
* it returned true
.
* or else -2 if {@link #contains} were succesfull on key = 0/null
* @see #contains
*/
public int lslot()
{
assert this.lastSlot >= 0 || this.lastSlot == -2 : "Call contains() first.";
return this.lastSlot;
}
/**
* {@inheritDoc}
*
* */
@Override
public boolean contains(final int key)
{
if (key == (0)) {
if (this.allocatedDefaultKey) {
this.lastSlot = -2;
}
else {
this.lastSlot = -1;
}
return this.allocatedDefaultKey;
}
final int mask = this.keys.length - 1;
final IntHashingStrategy strategy = this.hashStrategy;
//copied straight from fastutil "fast-path"
int slot;
int curr;
//1.1 The rehashed slot is free, return false
if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(key)) & mask]) == (0)) {
this.lastSlot = -1;
return false;
}
//1.2) The rehashed entry is occupied by the key, return true
if (strategy.equals(curr, key)) {
this.lastSlot = slot;
return true;
}
//2. Hash collision, search for the key along the path
slot = (slot + 1) & mask;
final int[] cached = this.hash_cache;
int dist = 0;
while ((keys[slot] != (0))
&& dist <= (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]) )
{
if (strategy.equals(key, keys[slot]))
{
this.lastSlot = slot;
return true;
}
slot = (slot + 1) & mask;
dist++;
} //end while true
//unsuccessful search
this.lastSlot = -1;
return false;
}
/**
* {@inheritDoc}
*
* Does not release internal buffers.
*/
@Override
public void clear()
{
this.assigned = 0;
this.lastSlot = -1;
// States are always cleared.
this.allocatedDefaultKey = false;
//Faster than Arrays.fill(keys, null); // Help the GC.
IntArrays.blankArray(this.keys, 0, this.keys.length);
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
return this.assigned + (this.allocatedDefaultKey ? 1 : 0);
}
/**
* {@inheritDoc}
*/
@Override
public int capacity() {
return this.resizeAt - 1;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
final IntHashingStrategy strategy = this.hashStrategy;
int h = 0;
if (this.allocatedDefaultKey) {
h += 0;
}
final int[] keys = this.keys;
for (int i = keys.length; --i >= 0;)
{
if ((keys[i] != (0)))
{
h += PhiMix.hash(strategy.computeHashCode(keys[i]));
}
}
return h;
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(final Object obj)
{
if (obj != null)
{
if (obj == this) {
return true;
}
if (!(obj instanceof IntOpenCustomHashSet)) {
return false;
}
if (!this.hashStrategy.equals(((IntOpenCustomHashSet) obj).hashStrategy)) {
return false;
}
final IntOpenCustomHashSet other = (IntOpenCustomHashSet) obj;
if (other.size() == this.size())
{
final EntryIterator it = this.iterator();
while (it.hasNext())
{
if (!other.contains(it.next().value))
{
//recycle
it.release();
return false;
}
}
return true;
}
}
return false;
}
/**
* An iterator implementation for {@link #iterator}.
*/
public final class EntryIterator extends AbstractIterator
{
public final IntCursor cursor;
public EntryIterator()
{
this.cursor = new IntCursor();
this.cursor.index = -2;
}
/**
* Iterate backwards w.r.t the buffer, to
* minimize collision chains when filling another hash container (ex. with putAll())
*/
@Override
protected IntCursor fetch()
{
if (this.cursor.index == IntOpenCustomHashSet.this.keys.length + 1) {
if (IntOpenCustomHashSet.this.allocatedDefaultKey) {
this.cursor.index = IntOpenCustomHashSet.this.keys.length;
this.cursor.value = (0);
return this.cursor;
}
//no value associated with the default key, continue iteration...
this.cursor.index = IntOpenCustomHashSet.this.keys.length;
}
int i = this.cursor.index - 1;
while (i >= 0 &&
!(IntOpenCustomHashSet.this.keys[i] != (0)))
{
i--;
}
if (i == -1) {
return done();
}
this.cursor.index = i;
this.cursor.value = IntOpenCustomHashSet.this.keys[i];
return this.cursor;
}
}
/**
* internal pool of EntryIterator
*/
protected final IteratorPool entryIteratorPool = new IteratorPool(
new ObjectFactory() {
@Override
public EntryIterator create() {
return new EntryIterator();
}
@Override
public void initialize(final EntryIterator obj) {
obj.cursor.index = IntOpenCustomHashSet.this.keys.length + 1;
}
@Override
public void reset(final EntryIterator obj) {
// nothing
}
});
/**
* {@inheritDoc}
* @return
*/
@Override
public EntryIterator iterator()
{
//return new EntryIterator();
return this.entryIteratorPool.borrow();
}
/**
* {@inheritDoc}
*/
@Override
public T forEach(final T procedure)
{
if (this.allocatedDefaultKey) {
procedure.apply((0));
}
final int[] keys = this.keys;
//Iterate in reverse for side-stepping the longest conflict chain
//in another hash, in case apply() is actually used to fill another hash container.
for (int i = keys.length - 1; i >= 0; i--)
{
if ((keys[i] != (0))) {
procedure.apply(keys[i]);
}
}
return procedure;
}
/**
* {@inheritDoc}
*/
@Override
public int[] toArray(final int[] target)
{
int count = 0;
if (this.allocatedDefaultKey) {
target[count++] = (0);
}
final int[] keys = this.keys;
for (int i = 0; i < keys.length; i++)
{
if ((keys[i] != (0)))
{
target[count++] = keys[i];
}
}
assert count == this.size();
return target;
}
/**
* Clone this object.
* */
@Override
public IntOpenCustomHashSet clone()
{
final IntOpenCustomHashSet cloned = new IntOpenCustomHashSet(this.size(), this.loadFactor, this.hashStrategy);
cloned.addAll(this);
cloned.allocatedDefaultKey = this.allocatedDefaultKey;
cloned.defaultValue = this.defaultValue;
return cloned;
}
/**
* {@inheritDoc}
*/
@Override
public T forEach(final T predicate)
{
if (this.allocatedDefaultKey) {
if (!predicate.apply((0))) {
return predicate;
}
}
final int[] keys = this.keys;
//Iterate in reverse for side-stepping the longest conflict chain
//in another hash, in case apply() is actually used to fill another hash container.
for (int i = keys.length - 1; i >= 0; i--)
{
if ((keys[i] != (0)))
{
if (!predicate.apply(keys[i])) {
break;
}
}
}
return predicate;
}
/**
* {@inheritDoc}
* Important!
* If the predicate actually injects the removed keys in another hash container, you may experience performance losses.
*/
@Override
public int removeAll(final IntPredicate predicate)
{
final int before = this.size();
if (this.allocatedDefaultKey) {
if (predicate.apply((0)))
{
this.allocatedDefaultKey = false;
}
}
final int[] keys = this.keys;
for (int i = 0; i < keys.length;)
{
if ((keys[i] != (0)))
{
if (predicate.apply(keys[i]))
{
this.assigned--;
shiftConflictingKeys(i);
// Repeat the check for the same i.
continue;
}
}
i++;
}
return before - this.size();
}
/**
* Create a set from a variable number of arguments or an array of int
.
*/
public static IntOpenCustomHashSet from(final IntHashingStrategy hashStrategy, final int... elements)
{
final IntOpenCustomHashSet set = new IntOpenCustomHashSet(elements.length, hashStrategy);
set.add(elements);
return set;
}
/**
* Create a set from elements of another container.
*/
public static IntOpenCustomHashSet from(final IntContainer container, final IntHashingStrategy hashStrategy)
{
return new IntOpenCustomHashSet(container, hashStrategy);
}
/**
* Create a new hash set with default parameters (shortcut
* instead of using a constructor).
*/
public static IntOpenCustomHashSet newInstance(final IntHashingStrategy hashStrategy)
{
return new IntOpenCustomHashSet(hashStrategy);
}
/**
* Returns a new object of this class with no need to declare generic type (shortcut
* instead of using a constructor).
*/
public static IntOpenCustomHashSet newInstanceWithCapacity(final int initialCapacity, final float loadFactor, final IntHashingStrategy hashStrategy)
{
return new IntOpenCustomHashSet(initialCapacity, loadFactor, hashStrategy);
}
/**
* Return the current {@link HashingStrategy} in use.
* @return
*/
public IntHashingStrategy strategy()
{
return this.hashStrategy;
}
//Test for existence in template
}