All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.carrotsearch.hppcrt.sets.DoubleOpenCustomHashSet Maven / Gradle / Ivy

package com.carrotsearch.hppcrt.sets;

import java.util.*;

import com.carrotsearch.hppcrt.*;
import com.carrotsearch.hppcrt.cursors.*;
import com.carrotsearch.hppcrt.predicates.*;
import com.carrotsearch.hppcrt.procedures.*;
import com.carrotsearch.hppcrt.strategies.*;

  
  
  
  
// If RH is defined, RobinHood Hashing is in effect :
  
/**
 * A hash set of doubles, implemented using using open
 * addressing with linear probing for collision resolution.
 *
 * The difference with {@link DoubleOpenHashSet} is that it uses a
 * {@link DoubleHashingStrategy} to compare objects externally instead of using
 * the built-in hashCode() /  equals(). In particular, the management of null
 * keys is up to the {@link DoubleHashingStrategy} implementation.
 * 

* The internal buffers of this implementation ({@link #keys}, etc...) * are always allocated to the nearest size that is a power of two. When * the capacity exceeds the given load factor, the buffer size is doubled. *

*

Important note. The implementation uses power-of-two tables and linear * probing, which may cause poor performance (many collisions) if hash values are * not properly distributed. Therefore, it is up to the {@link DoubleHashingStrategy} to * assure good performance.

* * * * @author This code is inspired by the collaboration and implementation in the fastutil project. * *

Robin-Hood hashing algorithm is also used to minimize variance * in insertion and search-related operations, for an all-around smother operation at the cost * of smaller peak performance:

*

- Pedro Celis (1986) for the original Robin-Hood hashing paper,

*

- MoonPolySoft/Cliff Moon for the initial Robin-hood on HPPC implementation,

*

- Vincent Sonnier for the present implementation using cached hashes.

* */ @javax.annotation.Generated(date = "2014-10-25T20:54:12+0200", value = "HPPC-RT generated from: DoubleOpenCustomHashSet.java") public class DoubleOpenCustomHashSet extends AbstractDoubleCollection implements DoubleLookupContainer, DoubleSet, Cloneable { /** * Minimum capacity for the map. */ public final static int MIN_CAPACITY = HashContainerUtils.MIN_CAPACITY; /** * Default capacity. */ public final static int DEFAULT_CAPACITY = HashContainerUtils.DEFAULT_CAPACITY; /** * Default load factor. */ public final static float DEFAULT_LOAD_FACTOR = HashContainerUtils.DEFAULT_LOAD_FACTOR; /** * Hash-indexed array holding all set entries. *

* Direct set iteration: iterate keys[i] for i in [0; keys.length[ where this.allocated[i] is true. *

* *

Direct iteration warning: * If the iteration goal is to fill another hash container, please iterate {@link #keys} in reverse to prevent performance losses. * @see #allocated */ public double[] keys; /** * Information if an entry (slot) in the {@link #values} table is allocated * or empty. * * In addition it caches hash value : If = -1, it means not allocated, else = HASH(keys[i]) & mask * for every index i. * * @see #assigned */ public int[] allocated; /** * Cached number of assigned slots in {@link #allocated}. */ protected int assigned; /** * The load factor for this map (fraction of allocated slots * before the buffers must be rehashed or reallocated). */ protected float loadFactor; /** * Resize buffers when {@link #allocated} hits this value. */ protected int resizeAt; /** * The most recent slot accessed in {@link #contains}. * * @see #contains * @see #lkey */ protected int lastSlot; /** * Custom hashing strategy : * comparisons and hash codes of keys will be computed * with the strategy methods instead of the native Object equals() and hashCode() methods. */ protected final DoubleHashingStrategy hashStrategy; /** * Creates a hash set with the default capacity of {@value #DEFAULT_CAPACITY}, * load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link DoubleHashingStrategy} */ public DoubleOpenCustomHashSet(final DoubleHashingStrategy hashStrategy) { this(DoubleOpenCustomHashSet.DEFAULT_CAPACITY, DoubleOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy); } /** * Creates a hash set with the given capacity, * load factor of {@value #DEFAULT_LOAD_FACTOR}, using the hashStrategy as {@link DoubleHashingStrategy} */ public DoubleOpenCustomHashSet(final int initialCapacity, final DoubleHashingStrategy hashStrategy) { this(initialCapacity, DoubleOpenCustomHashSet.DEFAULT_LOAD_FACTOR, hashStrategy); } /** * Creates a hash set with the given capacity and load factor, using the hashStrategy as {@link DoubleHashingStrategy} */ public DoubleOpenCustomHashSet(final int initialCapacity, final float loadFactor, final DoubleHashingStrategy hashStrategy) { //only accept not-null strategies. if (hashStrategy != null) { this.hashStrategy = hashStrategy; } else { throw new IllegalArgumentException("DoubleOpenCustomHashSet() cannot have a null hashStrategy !"); } assert loadFactor > 0 && loadFactor <= 1 : "Load factor must be between (0, 1]."; this.loadFactor = loadFactor; //take into account of the load factor to garantee no reallocations before reaching initialCapacity. int internalCapacity = (int) (initialCapacity / loadFactor) + DoubleOpenCustomHashSet.MIN_CAPACITY; //align on next power of two internalCapacity = HashContainerUtils.roundCapacity(internalCapacity); this.keys = new double [internalCapacity]; //fill with "not allocated" value this.allocated = new int[internalCapacity]; Internals.blankIntArrayMinusOne(this.allocated, 0, this.allocated.length); //Take advantage of the rounding so that the resize occur a bit later than expected. //allocate so that there is at least one slot that remains allocated = false //this is compulsory to guarantee proper stop in searching loops this.resizeAt = Math.max(3, (int) (internalCapacity * loadFactor)) - 2; } /** * Creates a hash set from elements of another container. Default load factor is used. */ public DoubleOpenCustomHashSet(final DoubleContainer container, final DoubleHashingStrategy hashStrategy) { this(container.size(), hashStrategy); addAll(container); } /** * {@inheritDoc} */ @Override public boolean add(double e) { assert this.assigned < this.allocated.length; final int mask = this.allocated.length - 1; final DoubleHashingStrategy strategy = this.hashStrategy; int slot = Internals.rehash(strategy.computeHashCode(e)) & mask; final double[] keys = this.keys; final int[] allocated = this.allocated; double tmpKey; int tmpAllocated; int initial_slot = slot; int dist = 0; int existing_distance = 0; while (allocated[slot] != -1 ) { if (strategy.equals(e, keys[slot])) { return false; } //re-shuffle keys to minimize variance existing_distance = (slot < allocated[slot] ? slot + allocated.length - allocated[slot] : slot - allocated[slot]); if (dist > existing_distance) { //swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = e; e = tmpKey; tmpAllocated = allocated[slot]; allocated[slot] = initial_slot; initial_slot = tmpAllocated; dist = existing_distance; } slot = (slot + 1) & mask; dist++; } // Check if we need to grow. If so, reallocate new data, // fill in the last element and rehash. if (this.assigned == this.resizeAt) { expandAndAdd(e, slot); } else { this.assigned++; allocated[slot] = initial_slot; keys[slot] = e; } return true; } /** * Adds two elements to the set. */ public int add(final double e1, final double e2) { int count = 0; if (add(e1)) { count++; } if (add(e2)) { count++; } return count; } /** * Vararg-signature method for adding elements to this set. *

This method is handy, but costly if used in tight loops (anonymous * array passing)

* * @return Returns the number of elements that were added to the set * (were not present in the set). */ public int add(final double... elements) { int count = 0; for (final double e : elements) { if (add(e)) { count++; } } return count; } /** * Adds all elements from a given container to this set. * * @return Returns the number of elements actually added as a result of this * call (not previously present in the set). */ public int addAll(final DoubleContainer container) { return addAll((Iterable) container); } /** * Adds all elements from a given iterable to this set. * * @return Returns the number of elements actually added as a result of this * call (not previously present in the set). */ public int addAll(final Iterable iterable) { int count = 0; for (final DoubleCursor cursor : iterable) { if (add(cursor.value)) { count++; } } return count; } /** * Expand the internal storage buffers (capacity) or rehash current * keys and values if there are a lot of deleted slots. */ private void expandAndAdd(final double pendingKey, final int freeSlot) { assert this.assigned == this.resizeAt; assert this.allocated[freeSlot] == -1; // Try to allocate new buffers first. If we OOM, it'll be now without // leaving the data structure in an inconsistent state. final double[] oldKeys = this.keys; final int[] oldAllocated = this.allocated; allocateBuffers(HashContainerUtils.nextCapacity(this.keys.length)); // We have succeeded at allocating new data so insert the pending key/value at // the free slot in the old arrays before rehashing. this.lastSlot = -1; this.assigned++; //We don't care of the oldAllocated value, so long it means "allocated = true", since the whole set is rebuilt from scratch. oldAllocated[freeSlot] = 1; oldKeys[freeSlot] = pendingKey; //Variables for adding final int mask = this.allocated.length - 1; final DoubleHashingStrategy strategy = this.hashStrategy; double e = (0.0D); //adding phase int slot = -1; final double[] keys = this.keys; final int[] allocated = this.allocated; double tmpKey = (0.0D); int tmpAllocated = -1; int initial_slot = -1; int dist = -1; int existing_distance = -1; //iterate all the old arrays to add in the newly allocated buffers //It is important to iterate backwards to minimize the conflict chain length ! for (int i = oldAllocated.length; --i >= 0;) { if (oldAllocated[i] != -1 ) { e = oldKeys[i]; slot = Internals.rehash(strategy.computeHashCode(e)) & mask; initial_slot = slot; dist = 0; while (allocated[slot] != -1 ) { //re-shuffle keys to minimize variance existing_distance = (slot < allocated[slot] ? slot + allocated.length - allocated[slot] : slot - allocated[slot]); if (dist > existing_distance) { //swap current (key, value, initial_slot) with slot places tmpKey = keys[slot]; keys[slot] = e; e = tmpKey; tmpAllocated = allocated[slot]; allocated[slot] = initial_slot; initial_slot = tmpAllocated; dist = existing_distance; } //endif slot = (slot + 1) & mask; dist++; } //end while //place it at that position allocated[slot] = initial_slot; keys[slot] = e; } } } /** * Allocate internal buffers for a given capacity. * * @param capacity New capacity (must be a power of two). */ private void allocateBuffers(final int capacity) { final double[] keys = new double [capacity]; final int[] allocated = new int[capacity]; Internals.blankIntArrayMinusOne(allocated, 0, allocated.length); this.keys = keys; this.allocated = allocated; //allocate so that there is at least one slot that remains allocated = false //this is compulsory to guarantee proper stop in searching loops this.resizeAt = Math.max(3, (int) (capacity * this.loadFactor)) - 2; } /** * {@inheritDoc} */ @Override public int removeAllOccurrences(final double key) { return remove(key) ? 1 : 0; } /** * An alias for the (preferred) {@link #removeAllOccurrences}. */ public boolean remove(final double key) { final int mask = this.allocated.length - 1; final DoubleHashingStrategy strategy = this.hashStrategy; int slot = Internals.rehash(strategy.computeHashCode(key)) & mask; int dist = 0; final double[] keys = this.keys; final int[] states = this.allocated; while (states[slot] != -1 && dist <= (slot < states[slot] ? slot + states.length - states[slot] : slot - states[slot]) ) { if (strategy.equals(key, keys[slot])) { this.assigned--; shiftConflictingKeys(slot); return true; } slot = (slot + 1) & mask; dist++; } //end while true return false; } /** * Shift all the slot-conflicting keys allocated to (and including) slot. */ protected void shiftConflictingKeys(int slotCurr) { // Copied nearly verbatim from fastutil's impl. final int mask = this.allocated.length - 1; int slotPrev, slotOther; final DoubleHashingStrategy strategy = this.hashStrategy; final double[] keys = this.keys; final int[] allocated = this.allocated; while (true) { slotCurr = ((slotPrev = slotCurr) + 1) & mask; while (allocated[slotCurr] != -1 ) { //use the cached value, no need to recompute slotOther = allocated[slotCurr]; if (slotPrev <= slotCurr) { // We are on the right of the original slot. if (slotPrev >= slotOther || slotOther > slotCurr) { break; } } else { // We have wrapped around. if (slotPrev >= slotOther && slotOther > slotCurr) { break; } } slotCurr = (slotCurr + 1) & mask; } if ( allocated[slotCurr] == -1 ) { break; } // Shift key/allocated pair. keys[slotPrev] = keys[slotCurr]; allocated[slotPrev] = allocated[slotCurr]; } //means not allocated allocated[slotPrev] = -1; /* */ } /** * Returns the last key saved in a call to {@link #contains} if it returned true. * Precondition : {@link #contains} must have been called previously ! * @see #contains */ public double lkey() { assert this.lastSlot >= 0 : "Call contains() first."; assert this.allocated[this.lastSlot] != -1 : "Last call to exists did not have any associated value."; return this.keys[this.lastSlot]; } /** * @return Returns the slot of the last key looked up in a call to {@link #contains} if * it returned true. * @see #contains */ public int lslot() { assert this.lastSlot >= 0 : "Call contains() first."; return this.lastSlot; } /** * {@inheritDoc} * * */ @Override public boolean contains(final double key) { final int mask = this.allocated.length - 1; final DoubleHashingStrategy strategy = this.hashStrategy; int slot = Internals.rehash(strategy.computeHashCode(key)) & mask; int dist = 0; final double[] keys = this.keys; final int[] states = this.allocated; while (states[slot] != -1 && dist <= (slot < states[slot] ? slot + states.length - states[slot] : slot - states[slot]) ) { if (strategy.equals(key, keys[slot])) { this.lastSlot = slot; return true; } slot = (slot + 1) & mask; dist++; } //end while true //unsuccessful search this.lastSlot = -1; return false; } /** * {@inheritDoc} * *

Does not release internal buffers.

*/ @Override public void clear() { this.assigned = 0; this.lastSlot = -1; // States are always cleared. Internals.blankIntArrayMinusOne(this.allocated, 0, this.allocated.length); } /** * {@inheritDoc} */ @Override public int size() { return this.assigned; } /** * {@inheritDoc} */ @Override public int capacity() { return this.resizeAt - 1; } /** * {@inheritDoc} */ @Override public int hashCode() { int h = 0; final double[] keys = this.keys; final int[] states = this.allocated; final DoubleHashingStrategy strategy = this.hashStrategy; for (int i = states.length; --i >= 0;) { if (states[i] != -1 ) { //This hash is an intrinsic property of the container contents, //consequently is independent from the DoubleHashStrategy, so do not use it ! h += Internals.rehash(strategy.computeHashCode(keys[i])); } } return h; } /** * this instance and obj can only be equal if :
     * (both are  DoubleOpenCustomHashSet)
     * and
     * (both have equal hash strategies defined by {@link #DoubleHashingStrategy}.equals(obj.hashStrategy))
* then, both sets are compared as follows, using their {@link #DoubleHashingStrategy}. */ @SuppressWarnings("unchecked") @Override public boolean equals(final Object obj) { if (obj != null) { if (obj == this) { return true; } if (!(obj instanceof DoubleOpenCustomHashSet)) { return false; } if (!this.hashStrategy.equals(((DoubleOpenCustomHashSet) obj).hashStrategy)) { return false; } final DoubleOpenCustomHashSet other = (DoubleOpenCustomHashSet) obj; if (other.size() == this.size()) { final EntryIterator it = this.iterator(); while (it.hasNext()) { if (!other.contains(it.next().value)) { //recycle it.release(); return false; } } return true; } } return false; } /** * An iterator implementation for {@link #iterator}. */ public final class EntryIterator extends AbstractIterator { public final DoubleCursor cursor; public EntryIterator() { this.cursor = new DoubleCursor(); this.cursor.index = -2; } /** * Iterate backwards w.r.t the buffer, to * minimize collision chains when filling another hash container (ex. with putAll()) */ @Override protected DoubleCursor fetch() { int i = this.cursor.index - 1; while (i >= 0 && DoubleOpenCustomHashSet.this.allocated[i] == -1 ) { i--; } if (i == -1) { return done(); } this.cursor.index = i; this.cursor.value = DoubleOpenCustomHashSet.this.keys[i]; return this.cursor; } } /** * internal pool of EntryIterator */ protected final IteratorPool entryIteratorPool = new IteratorPool( new ObjectFactory() { @Override public EntryIterator create() { return new EntryIterator(); } @Override public void initialize(final EntryIterator obj) { obj.cursor.index = DoubleOpenCustomHashSet.this.keys.length; } @Override public void reset(final EntryIterator obj) { // nothing } }); /** * {@inheritDoc} * @return */ @Override public EntryIterator iterator() { //return new EntryIterator(); return this.entryIteratorPool.borrow(); } /** * {@inheritDoc} */ @Override public T forEach(final T procedure) { final double[] keys = this.keys; final int[] states = this.allocated; //Iterate in reverse for side-stepping the longest conflict chain //in another hash, in case apply() is actually used to fill another hash container. for (int i = states.length - 1; i >= 0; i--) { if (states[i] != -1 ) { procedure.apply(keys[i]); } } return procedure; } /** * {@inheritDoc} */ @Override public double[] toArray(final double[] target) { final double[] keys = this.keys; final int[] states = this.allocated; for (int i = 0, j = 0; i < keys.length; i++) { if (states[i] != -1 ) { target[j++] = keys[i]; } } return target; } /** * Clone this object. * */ @Override public DoubleOpenCustomHashSet clone() { final DoubleOpenCustomHashSet cloned = new DoubleOpenCustomHashSet(this.size(), this.loadFactor, this.hashStrategy); cloned.addAll(this); cloned.defaultValue = this.defaultValue; return cloned; } /** * {@inheritDoc} */ @Override public T forEach(final T predicate) { final double[] keys = this.keys; final int[] states = this.allocated; //Iterate in reverse for side-stepping the longest conflict chain //in another hash, in case apply() is actually used to fill another hash container. for (int i = states.length - 1; i >= 0; i--) { if (states[i] != -1 ) { if (!predicate.apply(keys[i])) { break; } } } return predicate; } /** * {@inheritDoc} *

Important! * If the predicate actually injects the removed keys in another hash container, you may experience performance losses. */ @Override public int removeAll(final DoublePredicate predicate) { final double[] keys = this.keys; final int[] states = this.allocated; final int before = this.assigned; for (int i = 0; i < states.length;) { if (states[i] != -1 ) { if (predicate.apply(keys[i])) { this.assigned--; shiftConflictingKeys(i); // Repeat the check for the same i. continue; } } i++; } return before - this.assigned; } /** * Create a set from a variable number of arguments or an array of double. */ public static DoubleOpenCustomHashSet from(final DoubleHashingStrategy hashStrategy, final double... elements) { final DoubleOpenCustomHashSet set = new DoubleOpenCustomHashSet(elements.length, hashStrategy); set.add(elements); return set; } /** * Create a set from elements of another container. */ public static DoubleOpenCustomHashSet from(final DoubleContainer container, final DoubleHashingStrategy hashStrategy) { return new DoubleOpenCustomHashSet(container, hashStrategy); } /** * Create a new hash set with default parameters (shortcut * instead of using a constructor). */ public static DoubleOpenCustomHashSet newInstance(final DoubleHashingStrategy hashStrategy) { return new DoubleOpenCustomHashSet(hashStrategy); } /** * Returns a new object of this class with no need to declare generic type (shortcut * instead of using a constructor). */ public static DoubleOpenCustomHashSet newInstanceWithCapacity(final int initialCapacity, final float loadFactor, final DoubleHashingStrategy hashStrategy) { return new DoubleOpenCustomHashSet(initialCapacity, loadFactor, hashStrategy); } /** * Return the current {@link HashingStrategy} in use. * @return */ public DoubleHashingStrategy strategy() { return this.hashStrategy; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy