All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.carrotsearch.hppcrt.sets.IntOpenCustomHashSet Maven / Gradle / Ivy

Go to download

High Performance Primitive Collections Realtime (fork of HPPC of Carrotsearch) Fundamental data structures (maps, sets, lists, stacks, queues, heaps, sorts) generated for combinations of object and primitive types to conserve JVM memory and speed up execution. The Realtime fork intend of extending collections while tweaking and optimizations to remove any dynamic allocations at runtime, and low variance execution times.

There is a newer version: 0.7.5
Show newest version
package com.carrotsearch.hppcrt.sets;

import com.carrotsearch.hppcrt.*;
import com.carrotsearch.hppcrt.cursors.*;
import com.carrotsearch.hppcrt.predicates.*;
import com.carrotsearch.hppcrt.procedures.*;
import com.carrotsearch.hppcrt.strategies.*;
import com.carrotsearch.hppcrt.hash.*;

  
  
  
  
//If RH is defined, RobinHood Hashing is in effect :
  

/**
 * A hash set of ints, implemented using using open
 * addressing with linear probing for collision resolution.
 *
 * The difference with {@link IntOpenHashSet} is that it uses a
 * {@link IntHashingStrategy} to compare objects externally instead of using
 * the built-in hashCode() /  equals(). In particular, the management of null
 * keys is up to the {@link IntHashingStrategy} implementation.
 * 

* The internal buffers of this implementation ({@link #keys}, etc...) * are always allocated to the nearest size that is a power of two. When * the capacity exceeds the given load factor, the buffer size is doubled. *

*

Important note. The implementation uses power-of-two tables and linear * probing, which may cause poor performance (many collisions) if hash values are * not properly distributed. Therefore, it is up to the {@link IntHashingStrategy} to * assure good performance.

* * * * @author This code is inspired by the collaboration and implementation in the fastutil project. * *

Robin-Hood hashing algorithm is also used to minimize variance * in insertion and search-related operations, for an all-around smother operation at the cost * of smaller peak performance:

*

- Pedro Celis (1986) for the original Robin-Hood hashing paper,

*

- MoonPolySoft/Cliff Moon for the initial Robin-hood on HPPC implementation,

*

- Vincent Sonnier for the present implementation using cached hashes.

* */ @javax.annotation.Generated(date = "2015-02-27T19:21:17+0100", value = "HPPC-RT generated from: IntOpenCustomHashSet.java") public class IntOpenCustomHashSet extends AbstractIntCollection implements IntLookupContainer, IntSet, Cloneable { /** * Minimum capacity for the map. */ public final static int MIN_CAPACITY = HashContainerUtils.MIN_CAPACITY; /** * Default capacity. */ public final static int DEFAULT_CAPACITY = HashContainerUtils.DEFAULT_CAPACITY; /** * Default load factor. */ public final static float DEFAULT_LOAD_FACTOR = HashContainerUtils.DEFAULT_LOAD_FACTOR; /** * Hash-indexed array holding all set entries. *

* Direct set iteration: iterate {keys[i]} for i in [0; keys.length[ where keys[i] != 0/null, then also * {0/null} is in the set if {@link #allocatedDefaultKey} = true. *

* *

Direct iteration warning: * If the iteration goal is to fill another hash container, please iterate {@link #keys} in reverse to prevent performance losses. * @see #keys */ public int[] keys; /** * * Caches the hash value = HASH(keys[i]) & mask, if keys[i] != 0/null, * for every index i. * * @see #assigned */ protected int[] hash_cache; /** *True if key = 0/null is in the map. */ public boolean allocatedDefaultKey = false; /** * Cached number of assigned slots in {@link #keys}. */ protected int assigned; /** * The load factor for this map (fraction of allocated slots * before the buffers must be rehashed or reallocated). */ protected float loadFactor; /** * Resize buffers when {@link #keys} hits this value. */ protected int resizeAt; /** * The most recent slot accessed in {@link #contains}. * * @see #contains * @see #lkey */ protected int lastSlot; /** * Custom hashing strategy : * comparisons and hash codes of keys will be computed * with the strategy methods instead of the native Object equals() and hashCode() methods. */ protected final IntHashingStrategy hashStrategy; /** * Creates a hash set with the default capacity of {@value #DEFAULT_CAPACITY}, * load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link IntHashingStrategy} */ public IntOpenCustomHashSet(final IntHashingStrategy hashStrategy) { this(IntOpenCustomHashSet.DEFAULT_CAPACITY, IntOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy); } /** * Creates a hash set with the given capacity, * load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link IntHashingStrategy} */ public IntOpenCustomHashSet(final int initialCapacity, final IntHashingStrategy hashStrategy) { this(initialCapacity, IntOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy); } /** * Creates a hash set with the given capacity and load factor, using the hashStrategy as {@link IntHashingStrategy} */ public IntOpenCustomHashSet(final int initialCapacity, final float loadFactor, final IntHashingStrategy hashStrategy) { //only accept not-null strategies. if (hashStrategy != null) { this.hashStrategy = hashStrategy; } else { throw new IllegalArgumentException("IntOpenCustomHashSet() cannot have a null hashStrategy !"); } assert loadFactor > 0 && loadFactor <= 1 : "Load factor must be between (0, 1]."; this.loadFactor = loadFactor; //take into account of the load factor to garantee no reallocations before reaching initialCapacity. int internalCapacity = (int) (initialCapacity / loadFactor) + IntOpenCustomHashSet.MIN_CAPACITY; //align on next power of two internalCapacity = HashContainerUtils.roundCapacity(internalCapacity); this.keys = (new int[internalCapacity]); this.hash_cache = new int[internalCapacity]; //Take advantage of the rounding so that the resize occur a bit later than expected. //allocate so that there is at least one slot that remains allocated = false //this is compulsory to guarantee proper stop in searching loops this.resizeAt = Math.max(3, (int) (internalCapacity * loadFactor)) - 2; } /** * Creates a hash set from elements of another container. Default load factor is used. */ public IntOpenCustomHashSet(final IntContainer container, final IntHashingStrategy hashStrategy) { this(container.size(), hashStrategy); addAll(container); } /** * {@inheritDoc} */ @Override public boolean add(int e) { if (e == (0)) { if (this.allocatedDefaultKey) { return false; } this.allocatedDefaultKey = true; return true; } final int mask = this.keys.length - 1; final IntHashingStrategy strategy = this.hashStrategy; final int[] keys = this.keys; //copied straight from fastutil "fast-path" int slot; int curr; //1.1 The rehashed key slot is occupied... if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(e)) & mask]) != (0)) { //1.2 the occupied place is indeed key, return false if (strategy.equals(curr, e)) { return false; } //1.3 key is colliding, manage below : } else if (this.assigned < this.resizeAt) { //1.4 key is not colliding, without resize, so insert, return true. keys[slot] = e; this.assigned++; this.hash_cache[slot] = slot; return true; } final int[] cached = this.hash_cache; int tmpKey; int tmpAllocated; int initial_slot = slot; int dist = 0; int existing_distance = 0; while ((keys[slot] != (0))) { if (strategy.equals(e, keys[slot])) { return false; } //re-shuffle keys to minimize variance existing_distance = (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]); if (dist > existing_distance) { //swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = e; e = tmpKey; tmpAllocated = cached[slot]; cached[slot] = initial_slot; initial_slot = tmpAllocated; dist = existing_distance; } slot = (slot + 1) & mask; dist++; } // Check if we need to grow. If so, reallocate new data, // fill in the last element and rehash. if (this.assigned == this.resizeAt) { expandAndAdd(e, slot); } else { this.assigned++; cached[slot] = initial_slot; keys[slot] = e; } return true; } /** * Adds two elements to the set. */ public int add(final int e1, final int e2) { int count = 0; if (add(e1)) { count++; } if (add(e2)) { count++; } return count; } /** * Vararg-signature method for adding elements to this set. *

This method is handy, but costly if used in tight loops (anonymous * array passing)

* * @return Returns the number of elements that were added to the set * (were not present in the set). */ public int add(final int... elements) { int count = 0; for (final int e : elements) { if (add(e)) { count++; } } return count; } /** * Adds all elements from a given container to this set. * * @return Returns the number of elements actually added as a result of this * call (not previously present in the set). */ public int addAll(final IntContainer container) { return addAll((Iterable) container); } /** * Adds all elements from a given iterable to this set. * * @return Returns the number of elements actually added as a result of this * call (not previously present in the set). */ public int addAll(final Iterable iterable) { int count = 0; for (final IntCursor cursor : iterable) { if (add(cursor.value)) { count++; } } return count; } /** * Expand the internal storage buffers (capacity) or rehash current * keys and values if there are a lot of deleted slots. */ private void expandAndAdd(final int pendingKey, final int freeSlot) { assert this.assigned == this.resizeAt; //default sentinel value is never in the keys[] array, so never trigger reallocs assert (pendingKey != (0)); // Try to allocate new buffers first. If we OOM, it'll be now without // leaving the data structure in an inconsistent state. final int[] oldKeys = this.keys; allocateBuffers(HashContainerUtils.nextCapacity(this.keys.length)); // We have succeeded at allocating new data so insert the pending key/value at // the free slot in the old arrays before rehashing. this.lastSlot = -1; this.assigned++; oldKeys[freeSlot] = pendingKey; //Variables for adding final int mask = this.keys.length - 1; final IntHashingStrategy strategy = this.hashStrategy; int e = (0); //adding phase int slot = -1; final int[] keys = this.keys; final int[] cached = this.hash_cache; int tmpKey = (0); int tmpAllocated = -1; int initial_slot = -1; int dist = -1; int existing_distance = -1; //iterate all the old arrays to add in the newly allocated buffers //It is important to iterate backwards to minimize the conflict chain length ! for (int i = oldKeys.length; --i >= 0;) { if ((oldKeys[i] != (0))) { e = oldKeys[i]; slot = PhiMix.hash(strategy.computeHashCode(e)) & mask; initial_slot = slot; dist = 0; while ((keys[slot] != (0))) { //re-shuffle keys to minimize variance existing_distance = (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]); if (dist > existing_distance) { //swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = e; e = tmpKey; tmpAllocated = cached[slot]; cached[slot] = initial_slot; initial_slot = tmpAllocated; dist = existing_distance; } //endif slot = (slot + 1) & mask; dist++; } //end while //place it at that position cached[slot] = initial_slot; keys[slot] = e; } } } /** * Allocate internal buffers for a given capacity. * * @param capacity New capacity (must be a power of two). */ private void allocateBuffers(final int capacity) { final int[] keys = (new int[capacity]); final int[] allocated = new int[capacity]; this.keys = keys; this.hash_cache = allocated; //allocate so that there is at least one slot that remains allocated = false //this is compulsory to guarantee proper stop in searching loops this.resizeAt = Math.max(3, (int) (capacity * this.loadFactor)) - 2; } /** * {@inheritDoc} */ @Override public int removeAllOccurrences(final int key) { return remove(key) ? 1 : 0; } /** * An alias for the (preferred) {@link #removeAllOccurrences}. */ public boolean remove(final int key) { if (key == (0)) { if (this.allocatedDefaultKey) { this.allocatedDefaultKey = false; return true; } return false; } final int mask = this.keys.length - 1; final IntHashingStrategy strategy = this.hashStrategy; final int[] keys = this.keys; //copied straight from fastutil "fast-path" int slot; int curr; //1.1 The rehashed slot is free, nothing to remove, return false if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(key)) & mask]) == (0)) { return false; } //1.2) The rehashed entry is occupied by the key, remove it, return true if (strategy.equals(curr, key)) { this.assigned--; shiftConflictingKeys(slot); return true; } //2. Hash collision, search for the key along the path slot = (slot + 1) & mask; int dist = 0; final int[] cached = this.hash_cache; while ((keys[slot] != (0)) && dist <= (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]) ) { if (strategy.equals(key, keys[slot])) { this.assigned--; shiftConflictingKeys(slot); return true; } slot = (slot + 1) & mask; dist++; } //end while true return false; } /** * Shift all the slot-conflicting keys allocated to (and including) slot. */ protected void shiftConflictingKeys(int slotCurr) { // Copied nearly verbatim from fastutil's impl. final int mask = this.keys.length - 1; int slotPrev, slotOther; final IntHashingStrategy strategy = this.hashStrategy; final int[] keys = this.keys; final int[] cached = this.hash_cache; while (true) { slotCurr = ((slotPrev = slotCurr) + 1) & mask; while ((keys[slotCurr] != (0))) { //use the cached value, no need to recompute slotOther = cached[slotCurr]; if (slotPrev <= slotCurr) { // We are on the right of the original slot. if (slotPrev >= slotOther || slotOther > slotCurr) { break; } } else { // We have wrapped around. if (slotPrev >= slotOther && slotOther > slotCurr) { break; } } slotCurr = (slotCurr + 1) & mask; } if (!(keys[slotCurr] != (0))) { break; } // Shift key/allocated pair. keys[slotPrev] = keys[slotCurr]; cached[slotPrev] = cached[slotCurr]; } //means not allocated keys[slotPrev] = (0); } /** * Returns the last key saved in a call to {@link #contains} if it returned true. * Precondition : {@link #contains} must have been called previously ! * @see #contains */ public int lkey() { if (this.lastSlot == -2) { return (0); } assert this.lastSlot >= 0 : "Call containsKey() first."; assert (this.keys[this.lastSlot] != (0)) : "Last call to exists did not have any associated value."; return this.keys[this.lastSlot]; } /** * @return Returns the slot of the last key looked up in a call to {@link #contains} if * it returned true. * or else -2 if {@link #contains} were succesfull on key = 0/null * @see #contains */ public int lslot() { assert this.lastSlot >= 0 || this.lastSlot == -2 : "Call contains() first."; return this.lastSlot; } /** * {@inheritDoc} * * */ @Override public boolean contains(final int key) { if (key == (0)) { if (this.allocatedDefaultKey) { this.lastSlot = -2; } else { this.lastSlot = -1; } return this.allocatedDefaultKey; } final int mask = this.keys.length - 1; final IntHashingStrategy strategy = this.hashStrategy; //copied straight from fastutil "fast-path" int slot; int curr; //1.1 The rehashed slot is free, return false if ((curr = keys[slot = PhiMix.hash(strategy.computeHashCode(key)) & mask]) == (0)) { this.lastSlot = -1; return false; } //1.2) The rehashed entry is occupied by the key, return true if (strategy.equals(curr, key)) { this.lastSlot = slot; return true; } //2. Hash collision, search for the key along the path slot = (slot + 1) & mask; final int[] cached = this.hash_cache; int dist = 0; while ((keys[slot] != (0)) && dist <= (slot < cached[slot] ? slot + cached.length - cached[slot] : slot - cached[slot]) ) { if (strategy.equals(key, keys[slot])) { this.lastSlot = slot; return true; } slot = (slot + 1) & mask; dist++; } //end while true //unsuccessful search this.lastSlot = -1; return false; } /** * {@inheritDoc} * *

Does not release internal buffers.

*/ @Override public void clear() { this.assigned = 0; this.lastSlot = -1; // States are always cleared. this.allocatedDefaultKey = false; //Faster than Arrays.fill(keys, null); // Help the GC. IntArrays.blankArray(this.keys, 0, this.keys.length); } /** * {@inheritDoc} */ @Override public int size() { return this.assigned + (this.allocatedDefaultKey ? 1 : 0); } /** * {@inheritDoc} */ @Override public int capacity() { return this.resizeAt - 1; } /** * {@inheritDoc} */ @Override public int hashCode() { final IntHashingStrategy strategy = this.hashStrategy; int h = 0; if (this.allocatedDefaultKey) { h += 0; } final int[] keys = this.keys; for (int i = keys.length; --i >= 0;) { if ((keys[i] != (0))) { h += PhiMix.hash(strategy.computeHashCode(keys[i])); } } return h; } /** * {@inheritDoc} */ @Override public boolean equals(final Object obj) { if (obj != null) { if (obj == this) { return true; } if (!(obj instanceof IntOpenCustomHashSet)) { return false; } if (!this.hashStrategy.equals(((IntOpenCustomHashSet) obj).hashStrategy)) { return false; } final IntOpenCustomHashSet other = (IntOpenCustomHashSet) obj; if (other.size() == this.size()) { final EntryIterator it = this.iterator(); while (it.hasNext()) { if (!other.contains(it.next().value)) { //recycle it.release(); return false; } } return true; } } return false; } /** * An iterator implementation for {@link #iterator}. */ public final class EntryIterator extends AbstractIterator { public final IntCursor cursor; public EntryIterator() { this.cursor = new IntCursor(); this.cursor.index = -2; } /** * Iterate backwards w.r.t the buffer, to * minimize collision chains when filling another hash container (ex. with putAll()) */ @Override protected IntCursor fetch() { if (this.cursor.index == IntOpenCustomHashSet.this.keys.length + 1) { if (IntOpenCustomHashSet.this.allocatedDefaultKey) { this.cursor.index = IntOpenCustomHashSet.this.keys.length; this.cursor.value = (0); return this.cursor; } //no value associated with the default key, continue iteration... this.cursor.index = IntOpenCustomHashSet.this.keys.length; } int i = this.cursor.index - 1; while (i >= 0 && !(IntOpenCustomHashSet.this.keys[i] != (0))) { i--; } if (i == -1) { return done(); } this.cursor.index = i; this.cursor.value = IntOpenCustomHashSet.this.keys[i]; return this.cursor; } } /** * internal pool of EntryIterator */ protected final IteratorPool entryIteratorPool = new IteratorPool( new ObjectFactory() { @Override public EntryIterator create() { return new EntryIterator(); } @Override public void initialize(final EntryIterator obj) { obj.cursor.index = IntOpenCustomHashSet.this.keys.length + 1; } @Override public void reset(final EntryIterator obj) { // nothing } }); /** * {@inheritDoc} * @return */ @Override public EntryIterator iterator() { //return new EntryIterator(); return this.entryIteratorPool.borrow(); } /** * {@inheritDoc} */ @Override public T forEach(final T procedure) { if (this.allocatedDefaultKey) { procedure.apply((0)); } final int[] keys = this.keys; //Iterate in reverse for side-stepping the longest conflict chain //in another hash, in case apply() is actually used to fill another hash container. for (int i = keys.length - 1; i >= 0; i--) { if ((keys[i] != (0))) { procedure.apply(keys[i]); } } return procedure; } /** * {@inheritDoc} */ @Override public int[] toArray(final int[] target) { int count = 0; if (this.allocatedDefaultKey) { target[count++] = (0); } final int[] keys = this.keys; for (int i = 0; i < keys.length; i++) { if ((keys[i] != (0))) { target[count++] = keys[i]; } } assert count == this.size(); return target; } /** * Clone this object. * */ @Override public IntOpenCustomHashSet clone() { final IntOpenCustomHashSet cloned = new IntOpenCustomHashSet(this.size(), this.loadFactor, this.hashStrategy); cloned.addAll(this); cloned.allocatedDefaultKey = this.allocatedDefaultKey; cloned.defaultValue = this.defaultValue; return cloned; } /** * {@inheritDoc} */ @Override public T forEach(final T predicate) { if (this.allocatedDefaultKey) { if (!predicate.apply((0))) { return predicate; } } final int[] keys = this.keys; //Iterate in reverse for side-stepping the longest conflict chain //in another hash, in case apply() is actually used to fill another hash container. for (int i = keys.length - 1; i >= 0; i--) { if ((keys[i] != (0))) { if (!predicate.apply(keys[i])) { break; } } } return predicate; } /** * {@inheritDoc} *

Important! * If the predicate actually injects the removed keys in another hash container, you may experience performance losses. */ @Override public int removeAll(final IntPredicate predicate) { final int before = this.size(); if (this.allocatedDefaultKey) { if (predicate.apply((0))) { this.allocatedDefaultKey = false; } } final int[] keys = this.keys; for (int i = 0; i < keys.length;) { if ((keys[i] != (0))) { if (predicate.apply(keys[i])) { this.assigned--; shiftConflictingKeys(i); // Repeat the check for the same i. continue; } } i++; } return before - this.size(); } /** * Create a set from a variable number of arguments or an array of int. */ public static IntOpenCustomHashSet from(final IntHashingStrategy hashStrategy, final int... elements) { final IntOpenCustomHashSet set = new IntOpenCustomHashSet(elements.length, hashStrategy); set.add(elements); return set; } /** * Create a set from elements of another container. */ public static IntOpenCustomHashSet from(final IntContainer container, final IntHashingStrategy hashStrategy) { return new IntOpenCustomHashSet(container, hashStrategy); } /** * Create a new hash set with default parameters (shortcut * instead of using a constructor). */ public static IntOpenCustomHashSet newInstance(final IntHashingStrategy hashStrategy) { return new IntOpenCustomHashSet(hashStrategy); } /** * Returns a new object of this class with no need to declare generic type (shortcut * instead of using a constructor). */ public static IntOpenCustomHashSet newInstanceWithCapacity(final int initialCapacity, final float loadFactor, final IntHashingStrategy hashStrategy) { return new IntOpenCustomHashSet(initialCapacity, loadFactor, hashStrategy); } /** * Return the current {@link HashingStrategy} in use. * @return */ public IntHashingStrategy strategy() { return this.hashStrategy; } //Test for existence in template }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy