All Downloads are FREE. Search and download functionalities are using the official Maven repository.

drv.OpenHashBigSet.drv Maven / Gradle / Ivy

Go to download

fastutil extends the Java Collections Framework by providing type-specific maps, sets, lists, and queues with a small memory footprint and fast operations; it provides also big (64-bit) arrays, sets, and lists, sorting algorithms, fast, practical I/O classes for binary and text files, and facilities for memory mapping large files. This jar (fastutil-core.jar) contains data structures based on integers, longs, doubles, and objects, only; fastutil.jar contains all classes. If you have both jars in your dependencies, this jar should be excluded.

The newest version!
/*
 * Copyright (C) 2002-2024 Sebastiano Vigna
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package PACKAGE;

import static it.unimi.dsi.fastutil.BigArrays.copy;
import static it.unimi.dsi.fastutil.BigArrays.fill;
import static it.unimi.dsi.fastutil.BigArrays.set;

import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.HashCommon;
import static it.unimi.dsi.fastutil.HashCommon.bigArraySize;
import static it.unimi.dsi.fastutil.HashCommon.maxFill;
#if KEYS_REFERENCE
import java.util.function.Consumer;
import java.util.stream.Collector;
#endif

import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;


/**  A type-specific hash big set with with a fast, small-footprint implementation.
 *
 * 

Instances of this class use a hash table to represent a big set: the number * of elements in the set is limited only by the amount of core memory. The table * (backed by a {@linkplain it.unimi.dsi.fastutil.BigArrays big array}) is * filled up to a specified load factor, and then doubled in size to * accommodate new entries. If the table is emptied below one fourth * of the load factor, it is halved in size; however, the table is never reduced to a * size smaller than that at creation time: this approach makes it * possible to create sets with a large capacity in which insertions and * deletions do not cause immediately rehashing. Moreover, halving is * not performed when deleting entries from an iterator, as it would interfere * with the iteration process. * *

Note that {@link #clear()} does not modify the hash table size. * Rather, a family of {@linkplain #trim() trimming * methods} lets you control the size of the table; this is particularly useful * if you reuse instances of this class. * *

The methods of this class are about 30% slower than those of the corresponding non-big set. * * @see Hash * @see HashCommon */ public class OPEN_HASH_BIG_SET KEY_GENERIC extends ABSTRACT_SET KEY_GENERIC implements java.io.Serializable, Cloneable, Hash, Size64 { private static final long serialVersionUID = 0L; private static final boolean ASSERTS = ASSERTS_VALUE; /** The big array of keys. */ protected transient KEY_GENERIC_TYPE[][] key; /** The mask for wrapping a position counter. */ protected transient long mask; /** The mask for wrapping a segment counter. */ protected transient int segmentMask; /** The mask for wrapping a base counter. */ protected transient int baseMask; /** Whether this set contains the null key. */ protected transient boolean containsNull; /** The current table size (always a power of 2). */ protected transient long n; /** Threshold after which we rehash. It must be the table size times {@link #f}. */ protected transient long maxFill; /** We never resize below this threshold, which is the construction-time {#n}. */ protected final transient long minN; /** The acceptable load factor. */ protected final float f; /** Number of entries in the set. */ protected long size; /** Initialises the mask values. */ private void initMasks() { mask = n - 1; /* Note that either we have more than one segment, and in this case all segments * are BigArrays.SEGMENT_SIZE long, or we have exactly one segment whose length * is a power of two. */ segmentMask = key[0].length - 1; baseMask = key.length - 1; } /** Creates a new hash big set. * *

The actual table size will be the least power of two greater than {@code expected}/{@code f}. * * @param expected the expected number of elements in the set. * @param f the load factor. */ SUPPRESS_WARNINGS_KEY_UNCHECKED public OPEN_HASH_BIG_SET(final long expected, final float f) { if (f <= 0 || f > 1) throw new IllegalArgumentException("Load factor must be greater than 0 and smaller than or equal to 1"); if (n < 0) throw new IllegalArgumentException("The expected number of elements must be nonnegative"); this.f = f; minN = n = bigArraySize(expected, f); maxFill = maxFill(n, f); key = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray(n); initMasks(); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor. * * @param expected the expected number of elements in the hash big set. */ public OPEN_HASH_BIG_SET(final long expected) { this(expected, DEFAULT_LOAD_FACTOR); } /** Creates a new hash big set with initial expected {@link Hash#DEFAULT_INITIAL_SIZE} elements * and {@link Hash#DEFAULT_LOAD_FACTOR} as load factor. */ public OPEN_HASH_BIG_SET() { this(DEFAULT_INITIAL_SIZE, DEFAULT_LOAD_FACTOR); } /** Creates a new hash big set copying a given collection. * * @param c a {@link Collection} to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final Collection c, final float f) { this(Size64.sizeOf(c), f); addAll(c); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying a given collection. * * @param c a {@link Collection} to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET(final Collection c) { this(c, DEFAULT_LOAD_FACTOR); } /** Creates a new hash big set copying a given type-specific collection. * * @param c a type-specific collection to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final COLLECTION KEY_EXTENDS_GENERIC c, final float f) { this(Size64.sizeOf(c), f); addAll(c); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying a given type-specific collection. * * @param c a type-specific collection to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET(final COLLECTION KEY_EXTENDS_GENERIC c) { this(c, DEFAULT_LOAD_FACTOR); } /** Creates a new hash big set using elements provided by a type-specific iterator. * * @param i a type-specific iterator whose elements will fill the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final STD_KEY_ITERATOR KEY_EXTENDS_GENERIC i, final float f) { this(DEFAULT_INITIAL_SIZE, f); while(i.hasNext()) add(i.NEXT_KEY()); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor using elements provided by a type-specific iterator. * * @param i a type-specific iterator whose elements will fill the new hash big set. */ public OPEN_HASH_BIG_SET(final STD_KEY_ITERATOR KEY_EXTENDS_GENERIC i) { this(i, DEFAULT_LOAD_FACTOR); } #if KEYS_PRIMITIVE /** Creates a new hash big set using elements provided by an iterator. * * @param i an iterator whose elements will fill the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final Iterator i, final float f) { this(ITERATORS.AS_KEY_ITERATOR(i), f); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor using elements provided by an iterator. * * @param i an iterator whose elements will fill the new hash big set. */ public OPEN_HASH_BIG_SET(final Iterator i) { this(ITERATORS.AS_KEY_ITERATOR(i)); } #endif /** Creates a new hash big set and fills it with the elements of a given array. * * @param a an array whose elements will be used to fill the new hash big set. * @param offset the first element to use. * @param length the number of elements to use. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final KEY_GENERIC_TYPE[] a, final int offset, final int length, final float f) { this(length < 0 ? 0 : length, f); ARRAYS.ensureOffsetLength(a, offset, length); for(int i = 0; i < length; i++) add(a[offset + i]); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor and fills it with the elements of a given array. * * @param a an array whose elements will be used to fill the new hash big set. * @param offset the first element to use. * @param length the number of elements to use. */ public OPEN_HASH_BIG_SET(final KEY_GENERIC_TYPE[] a, final int offset, final int length) { this(a, offset, length, DEFAULT_LOAD_FACTOR); } /** Creates a new hash big set copying the elements of an array. * * @param a an array to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET(final KEY_GENERIC_TYPE[] a, final float f) { this(a, 0, a.length, f); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying the elements of an array. * * @param a an array to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET(final KEY_GENERIC_TYPE[] a) { this(a, DEFAULT_LOAD_FACTOR); } #ifndef Custom #if KEYS_INT_LONG_DOUBLE /** Collects the result of a primitive {@code Stream} into a new big hash set. * *

This method performs a terminal operation on the given {@code Stream} * * @apiNote Taking a primitive stream instead of returning something like a * {@link java.util.stream.Collector Collector} is necessary because there is no * primitive {@code Collector} equivalent in the Java API. */ public static KEY_GENERIC OPEN_HASH_BIG_SET KEY_GENERIC toBigSet(JDK_PRIMITIVE_STREAM stream) { return stream.collect( OPEN_HASH_BIG_SET::new, OPEN_HASH_BIG_SET::add, OPEN_HASH_BIG_SET::addAll); } /** Collects the result of a primitive {@code Stream} into a new big hash set. * *

This method performs a terminal operation on the given {@code Stream} * * @apiNote Taking a primitive stream instead returning something like a * {@link java.util.stream.Collector Collector} is necessary because there is no * primitive {@code Collector} equivalent in the Java API. */ public static KEY_GENERIC OPEN_HASH_BIG_SET KEY_GENERIC toBigSetWithExpectedSize(JDK_PRIMITIVE_STREAM stream, long expectedSize) { return stream.collect( () -> new OPEN_HASH_BIG_SET KEY_GENERIC(expectedSize), OPEN_HASH_BIG_SET::add, OPEN_HASH_BIG_SET::addAll); } #elif KEYS_REFERENCE // Collector wants a function that returns the collection being added to. private OPEN_HASH_BIG_SET KEY_GENERIC combine(OPEN_HASH_BIG_SET KEY_EXTENDS_GENERIC toAddFrom) { addAll(toAddFrom); return this; } private static final Collector> TO_SET_COLLECTOR = Collector.of( OPEN_HASH_BIG_SET::new, OPEN_HASH_BIG_SET::add, OPEN_HASH_BIG_SET::combine); /** Returns a {@link Collector} that collects a {@code Stream}'s elements into a new big hash set. */ SUPPRESS_WARNINGS_KEY_UNCHECKED_RAWTYPES public static KEY_GENERIC Collector toBigSet() { return (Collector) TO_SET_COLLECTOR; } /** Returns a {@link Collector} that collects a {@code Stream}'s elements into a new big hash set. */ public static KEY_GENERIC Collector toBigSetWithExpectedSize(long expectedSize) { return Collector.of( () -> new OPEN_HASH_BIG_SET KEY_GENERIC(expectedSize), OPEN_HASH_BIG_SET::add, OPEN_HASH_BIG_SET::combine); } #endif #endif private long realSize() { return containsNull ? size - 1 : size; } /** Ensures that this big set can hold a certain number of elements without rehashing. * * @param capacity a number of elements; there will be no rehashing unless * the set {@linkplain #size64() size} exceeds this number. */ public void ensureCapacity(final long capacity) { final long needed = bigArraySize(capacity, f); if (needed > n) rehash(needed); } @Override public boolean addAll(Collection c) { final long size = Size64.sizeOf(c); // The resulting collection will be at least c.size() big if (f <= .5) ensureCapacity(size); // The resulting collection will be sized for c.size() elements else ensureCapacity(size64() + size); // The resulting collection will be sized for size() + c.size() elements return super.addAll(c); } #if KEYS_PRIMITIVE @Override public boolean addAll(COLLECTION c) { final long size = Size64.sizeOf(c); if (f <= .5) ensureCapacity(size); // The resulting collection will be size for c.size() elements else ensureCapacity(size64() + size); // The resulting collection will be sized for size() + c.size() elements return super.addAll(c); } #endif @Override public boolean add(final KEY_GENERIC_TYPE k) { int displ, base; if (KEY_IS_NULL(k)) { if (containsNull) return false; containsNull = true; } else { KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH(k); // The starting point. if (! KEY_IS_NULL(curr = key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) { if (KEY_EQUALS_NOT_NULL(curr, k)) return false; while(! KEY_IS_NULL(curr = key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])) if (KEY_EQUALS_NOT_NULL(curr, k)) return false; } key[base][displ] = k; } if (size++ >= maxFill) rehash(2 * n); if (ASSERTS) checkTable(); return true; } #if KEY_CLASS_Object /** Add a random element if not present, get the existing value if already present. * * This is equivalent to (but faster than) doing a: *

	 * K exist = set.get(k);
	 * if (exist == null) {
	 *   set.add(k);
	 *   exist = k;
	 * }
	 * 
*/ public KEY_GENERIC_TYPE addOrGet(final KEY_GENERIC_TYPE k) { int displ, base; if (KEY_IS_NULL(k)) { if (containsNull) return null; containsNull = true; } else { KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH(k); // The starting point. if (! KEY_IS_NULL(curr = key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) { if (KEY_EQUALS_NOT_NULL(curr, k)) return curr; while(! KEY_IS_NULL(curr = key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])) if (KEY_EQUALS_NOT_NULL(curr, k)) return curr; } key[base][displ] = k; } if (size++ >= maxFill) rehash(2 * n); if (ASSERTS) checkTable(); return k; } #endif /** Shifts left entries with the specified hash code, starting at the specified position, * and empties the resulting free entry. * * @param pos a starting position. */ protected final void shiftKeys(long pos) { // Shift entries with the same hash. long last, slot; final KEY_GENERIC_TYPE[][] key = this.key; for(;;) { pos = ((last = pos) + 1) & mask; for(;;) { if (KEY_IS_NULL(BigArrays.get(key, pos))) { set(key, last, KEY_NULL); return; } slot = KEY2LONGHASH(BigArrays.get(key, pos)) & mask; if (last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos) break; pos = (pos + 1) & mask; } set(key, last, BigArrays.get(key, pos)); } } private boolean removeEntry(final int base, final int displ) { size--; shiftKeys(base * (long)BigArrays.SEGMENT_SIZE + displ); if (n > minN && size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2); return true; } private boolean removeNullEntry() { containsNull = false; size--; if (n > minN && size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE) rehash(n / 2); return true; } @Override public boolean remove(final KEY_TYPE k) { if (KEY_IS_NULL(k)) { if (containsNull) return removeNullEntry(); return false; } KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH(k); int displ, base; // The starting point. if (KEY_IS_NULL(curr = key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) return false; if (KEY_EQUALS_NOT_NULL(curr, k)) return removeEntry(base, displ); while(true) { if (KEY_IS_NULL(curr = key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])) return false; if (KEY_EQUALS_NOT_NULL(curr, k)) return removeEntry(base, displ); } } @Override public boolean contains(final KEY_TYPE k) { if (KEY_IS_NULL(k)) return containsNull; KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH(k); int displ, base; // The starting point. if (KEY_IS_NULL(curr = key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) return false; if (KEY_EQUALS_NOT_NULL(curr, k)) return true; while(true) { if (KEY_IS_NULL(curr = key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])) return false; if (KEY_EQUALS_NOT_NULL(curr, k)) return true; } } #if KEY_CLASS_Object /** Returns the element of this set that is equal to the given key, or {@code null}. * @return the element of this set that is equal to the given key, or {@code null}. */ public K get(final KEY_TYPE k) { if (k == null) return null; // This is correct independently of the value of containsNull KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH(k); int displ, base; // The starting point. if (KEY_IS_NULL(curr = key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) return null; if (KEY_EQUALS_NOT_NULL(curr, k)) return curr; while(true) { if (KEY_IS_NULL(curr = key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])) return null; if (KEY_EQUALS_NOT_NULL(curr, k)) return curr; } } #endif /* Removes all elements from this set. * */ /** {@inheritDoc} * *

To increase object reuse, this method does not change the table size. * If you want to reduce the table size, you must use {@link #trim(long)}. */ @Override public void clear() { if (size == 0) return; size = 0; containsNull = false; fill(key, KEY_NULL); } /** An iterator over a hash big set. */ private class SetIterator implements KEY_ITERATOR KEY_GENERIC { /** The base of the last entry returned, if positive or zero; initially, the number of components of the key array. If negative, the last element returned was that of index {@code - base - 1} from the {@link #wrapped} list. */ int base = key.length; /** The displacement of the last entry returned; initially, zero. */ int displ; /** The index of the last entry that has been returned (or {@link Long#MIN_VALUE} if {@link #base} is negative). It is -1 if either we did not return an entry yet, or the last returned entry has been removed. */ long last = -1; /** A downward counter measuring how many entries must still be returned. */ long c = size; /** A boolean telling us whether we should return the null key. */ boolean mustReturnNull = OPEN_HASH_BIG_SET.this.containsNull; /** A lazily allocated list containing elements that have wrapped around the table because of removals. */ ARRAY_LIST KEY_GENERIC wrapped; @Override public boolean hasNext() { return c != 0; } @Override public KEY_GENERIC_TYPE NEXT_KEY() { if (! hasNext()) throw new NoSuchElementException(); c--; if (mustReturnNull) { mustReturnNull = false; last = n; return KEY_NULL; } final KEY_GENERIC_TYPE[][] key = OPEN_HASH_BIG_SET.this.key; for(;;) { if (displ == 0 && base <= 0) { // We are just enumerating elements from the wrapped list. last = Long.MIN_VALUE; return wrapped.GET_KEY(- (--base) - 1); } if (displ-- == 0) displ = key[--base].length - 1; final KEY_GENERIC_TYPE k = key[base][displ]; if (! KEY_IS_NULL(k)) { last = base * (long)BigArrays.SEGMENT_SIZE + displ; return k; } } } /** Shifts left entries with the specified hash code, starting at the specified position, * and empties the resulting free entry. * * @param pos a starting position. */ private final void shiftKeys(long pos) { // Shift entries with the same hash. long last, slot; KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = OPEN_HASH_BIG_SET.this.key; for(;;) { pos = ((last = pos) + 1) & mask; for(;;) { if(KEY_IS_NULL(curr = BigArrays.get(key, pos))) { set(key, last, KEY_NULL); return; } slot = KEY2LONGHASH(curr) & mask; if (last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos) break; pos = (pos + 1) & mask; } if (pos < last) { // Wrapped entry. if (wrapped == null) wrapped = new ARRAY_LIST KEY_GENERIC_DIAMOND(); wrapped.add(BigArrays.get(key, pos)); } set(key, last, curr); } } @Override public void remove() { if (last == -1) throw new IllegalStateException(); if (last == n) OPEN_HASH_BIG_SET.this.containsNull = false; else if (base >= 0) shiftKeys(last); else { // We're removing wrapped entries. #if KEYS_REFERENCE OPEN_HASH_BIG_SET.this.remove(wrapped.set(- base - 1, null)); #else OPEN_HASH_BIG_SET.this.remove(wrapped.GET_KEY(- base - 1)); #endif last = -1; // Note that we must not decrement size return; } size--; last = -1; // You can no longer remove this entry. if (ASSERTS) checkTable(); } } @Override public KEY_ITERATOR KEY_GENERIC iterator() { return new SetIterator(); } private class SetSpliterator implements KEY_SPLITERATOR KEY_GENERIC { /* For the sake of keeping things at least somewhat simple * (aka. my sanity), the spliterator will NOT handle the indexing * of the subarrays directly, like iterator does. Instead, it will * delegate to BigArrays and have only a single, unified index it * will fence on. This is probably less effecient, but it avoids having * to track what it means to split on two sets of indexes. * This may change in the future if the performance hit high. */ private static final int POST_SPLIT_CHARACTERISTICS = SPLITERATORS.SET_SPLITERATOR_CHARACTERISTICS & ~java.util.Spliterator.SIZED; /** The index (which bucket) of the next item to give to the action. */ long pos = 0; /** The maximum bucket (exclusive) to iterate to */ long max = n; /** An upwards counter counting how many we have given */ long c = 0; /** A boolean telling us whether we should return the null key. */ boolean mustReturnNull = OPEN_HASH_BIG_SET.this.containsNull; boolean hasSplit = false; SetSpliterator() {} SetSpliterator(long pos, long max, boolean mustReturnNull, boolean hasSplit) { this.pos = pos; this.max = max; this.mustReturnNull = mustReturnNull; this.hasSplit = hasSplit; } @Override public boolean tryAdvance(final METHOD_ARG_KEY_CONSUMER action) { if (mustReturnNull) { mustReturnNull = false; ++c; action.accept(KEY_NULL); return true; } final KEY_GENERIC_TYPE key[][] = OPEN_HASH_BIG_SET.this.key; while (pos < max) { KEY_GENERIC_TYPE gotten = BigArrays.get(key, pos); if (! KEY_IS_NULL(gotten)) { ++c; ++pos; action.accept(gotten); return true; } else { ++pos; } } return false; } @Override public void forEachRemaining(final METHOD_ARG_KEY_CONSUMER action) { if (mustReturnNull) { mustReturnNull = false; action.accept(KEY_NULL); ++c; } final KEY_GENERIC_TYPE key[][] = OPEN_HASH_BIG_SET.this.key; while (pos < max) { KEY_GENERIC_TYPE gotten = BigArrays.get(key, pos); if (! KEY_IS_NULL(gotten)) { action.accept(gotten); ++c; } ++pos; } } @Override public int characteristics() { return hasSplit ? POST_SPLIT_CHARACTERISTICS : SPLITERATORS.SET_SPLITERATOR_CHARACTERISTICS; } @Override public long estimateSize() { if (!hasSplit) { // Root spliterator; we know how many are remaining. return size - c; } else { // After we split, we can no longer know exactly how many we have (or at least not efficiently). // (size / n) * (max - pos) aka currentTableDensity * numberOfBucketsLeft seems like a good estimate. return Math.min(size - c, (long)(((double)realSize() / n) * (max - pos)) + (mustReturnNull ? 1 : 0)); } } @Override public SetSpliterator trySplit() { if (pos >= max - 1) return null; long retLen = (max - pos) >> 1; if (retLen <= 1) return null; long myNewPos = pos + retLen; // Align to an outer array boundary if possible // We add/subtract one to the bounds to ensure the new pos will always shrink the range myNewPos = BigArrays.nearestSegmentStart(myNewPos, pos + 1, max - 1); long retPos = pos; long retMax = myNewPos; // Since null is returned first, and the convention is that the returned split is the prefix of elements, // the split will take care of returning null (if needed), and we won't return it anymore. SetSpliterator split = new SetSpliterator(retPos, retMax, mustReturnNull, true); this.pos = myNewPos; this.mustReturnNull = false; this.hasSplit = true; return split; } @Override public long skip(long n) { if (n < 0) throw new IllegalArgumentException("Argument must be nonnegative: " + n); if (n == 0) return 0; long skipped = 0; if (mustReturnNull) { mustReturnNull = false; ++skipped; --n; } final KEY_GENERIC_TYPE key[][] = OPEN_HASH_BIG_SET.this.key; while (pos < max && n > 0) { if (! KEY_IS_NULL(BigArrays.get(key, pos++))) { ++skipped; --n; } } return skipped; } } @Override public KEY_SPLITERATOR KEY_GENERIC spliterator() { return new SetSpliterator(); } @Override public void forEach(final METHOD_ARG_KEY_CONSUMER action) { if (containsNull) { action.accept(KEY_NULL); } long pos = 0; final long max = n; final KEY_GENERIC_TYPE key[][] = this.key; while (pos < max) { KEY_GENERIC_TYPE gotten = BigArrays.get(key, pos++); if (! KEY_IS_NULL(gotten)) { action.accept(gotten); } } } /** Rehashes this set, making the table as small as possible. * *

This method rehashes the table to the smallest size satisfying the * load factor. It can be used when the set will not be changed anymore, so * to optimize access speed and size. * *

If the table size is already the minimum possible, this method * does nothing. * * @return true if there was enough memory to trim the set. * @see #trim(long) */ public boolean trim() { return trim(size); } /** Rehashes this set if the table is too large. * *

Let N be the smallest table size that can hold * max(n,{@link #size64()}) entries, still satisfying the load factor. If the current * table size is smaller than or equal to N, this method does * nothing. Otherwise, it rehashes this set in a table of size * N. * *

This method is useful when reusing sets. {@linkplain #clear() Clearing a * set} leaves the table size untouched. If you are reusing a set * many times, you can call this method with a typical * size to avoid keeping around a very large table just * because of a few large transient sets. * * @param n the threshold for the trimming. * @return true if there was enough memory to trim the set. * @see #trim() */ public boolean trim(final long n) { final long l = bigArraySize(n, f); if (l >= this.n || size > maxFill(l, f)) return true; try { rehash(l); } catch(OutOfMemoryError cantDoIt) { return false; } return true; } /** Resizes the set. * *

This method implements the basic rehashing strategy, and may be * overriden by subclasses implementing different rehashing strategies (e.g., * disk-based rehashing). However, you should not override this method * unless you understand the internal workings of this class. * * @param newN the new size */ SUPPRESS_WARNINGS_KEY_UNCHECKED protected void rehash(final long newN) { final KEY_GENERIC_TYPE key[][] = this.key; final KEY_GENERIC_TYPE newKey[][] = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray(newN); final long mask = newN - 1; // Note that this is used by the hashing macro final int newSegmentMask = newKey[0].length - 1; final int newBaseMask = newKey.length - 1; int base = 0, displ = 0, b, d; long h; KEY_GENERIC_TYPE k; for(long i = realSize(); i-- != 0;) { while(KEY_IS_NULL(key[base][displ])) base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)); k = key[base][displ]; h = KEY2LONGHASH(k); // The starting point. if (! KEY_IS_NULL(newKey[b = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][d = (int)(h & newSegmentMask)])) while(! KEY_IS_NULL(newKey[b = (b + ((d = (d + 1) & newSegmentMask) == 0 ? 1 : 0)) & newBaseMask][d])); newKey[b][d] = k; base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)); } this.n = newN; this.key = newKey; initMasks(); maxFill = maxFill(n, f); } @Deprecated @Override public int size() { return (int)Math.min(Integer.MAX_VALUE, size); } @Override public long size64() { return size; } @Override public boolean isEmpty() { return size == 0; } /** Returns a deep copy of this big set. * *

This method performs a deep copy of this big hash set; the data stored in the * set, however, is not cloned. Note that this makes a difference only for object keys. * * @return a deep copy of this big set. */ @Override SUPPRESS_WARNINGS_KEY_UNCHECKED public OPEN_HASH_BIG_SET KEY_GENERIC clone() { OPEN_HASH_BIG_SET KEY_GENERIC c; try { c = (OPEN_HASH_BIG_SET KEY_GENERIC)super.clone(); } catch(CloneNotSupportedException cantHappen) { throw new InternalError(); } c.key = copy(key); c.containsNull = containsNull; return c; } /** Returns a hash code for this set. * * This method overrides the generic method provided by the superclass. * Since {@code equals()} is not overriden, it is important * that the value returned by this method is the same value as * the one returned by the overriden method. * * @return a hash code for this set. */ @Override public int hashCode() { final KEY_GENERIC_TYPE key[][] = this.key; int h = 0, base = 0, displ = 0; for(long j = realSize(); j-- != 0;) { while(KEY_IS_NULL(key[base][displ])) base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)); #if KEYS_REFERENCE if (this != key[base][displ]) #endif h += KEY2JAVAHASH_NOT_NULL(key[base][displ]); base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)); } return h; } private void writeObject(java.io.ObjectOutputStream s) throws java.io.IOException { final KEY_ITERATOR KEY_GENERIC i = iterator(); s.defaultWriteObject(); for(long j = size; j-- != 0;) s.WRITE_KEY(i.NEXT_KEY()); } SUPPRESS_WARNINGS_KEY_UNCHECKED private void readObject(java.io.ObjectInputStream s) throws java.io.IOException, ClassNotFoundException { s.defaultReadObject(); n = bigArraySize(size, f); maxFill = maxFill(n, f); final KEY_GENERIC_TYPE[][] key = this.key = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray(n); initMasks(); long h; KEY_GENERIC_TYPE k; int base, displ; for(long i = size; i-- != 0;) { k = KEY_GENERIC_CAST s.READ_KEY(); if (KEY_IS_NULL(k)) containsNull = true; else { h = KEY2LONGHASH(k); if (! KEY_IS_NULL(key[base = (int)((h & mask) >>> BigArrays.SEGMENT_SHIFT)][displ = (int)(h & segmentMask)])) while(! KEY_IS_NULL(key[base = (base + ((displ = (displ + 1) & segmentMask) == 0 ? 1 : 0)) & baseMask][displ])); key[base][displ] = k; } } if (ASSERTS) checkTable(); } #ifdef ASSERTS_CODE private void checkTable() { assert (n & -n) == n : "Table length is not a power of two: " + n; assert n == BigArrays.length(key); long n = this.n; while(n-- != 0) if (! KEY_IS_NULL(BigArrays.get(key, n)) && ! contains(BigArrays.get(key, n))) throw new AssertionError("Hash table has key " + BigArrays.get(key, n) + " marked as occupied, but the key does not belong to the table"); #if KEYS_PRIMITIVE java.util.HashSet s = new java.util.HashSet (); #else java.util.HashSet s = new java.util.HashSet(); #endif for(long i = size(); i-- != 0;) if (! KEY_IS_NULL(BigArrays.get(key, i)) && ! s.add(BigArrays.get(key, i))) throw new AssertionError("Key " + BigArrays.get(key, i) + " appears twice"); } #else private void checkTable() {} #endif #ifdef TEST private static long seed = System.currentTimeMillis(); private static java.util.Random r = new java.util.Random(seed); private static KEY_TYPE genKey() { #if KEY_CLASS_Byte || KEY_CLASS_Short || KEY_CLASS_Character return (KEY_TYPE)(r.nextInt()); #elif KEYS_PRIMITIVE return r.NEXT_KEY(); #elif KEY_CLASS_Object return Integer.toBinaryString(r.nextInt()); #else return new java.io.Serializable() {}; #endif } private static final class ArrayComparator implements java.util.Comparator { public int compare(Object a, Object b) { byte[] aa = (byte[])a; byte[] bb = (byte[])b; int length = Math.min(aa.length, bb.length); for(int i = 0; i < length; i++) { if (aa[i] < bb[i]) return -1; if (aa[i] > bb[i]) return 1; } return aa.length == bb.length ? 0 : (aa.length < bb.length ? -1 : 1); } } private static final class MockSet extends java.util.TreeSet { private java.util.List list = new java.util.ArrayList(); public MockSet(java.util.Comparator c) { super(c); } public boolean add(Object k) { if (! contains(k)) list.add(k); return super.add(k); } public boolean addAll(Collection c) { java.util.Iterator i = c.iterator(); boolean result = false; while(i.hasNext()) result |= add(i.next()); return result; } public boolean removeAll(Collection c) { java.util.Iterator i = c.iterator(); boolean result = false; while(i.hasNext()) result |= remove(i.next()); return result; } public boolean remove(Object k) { if (contains(k)) { int i = list.size(); while(i-- != 0) if (comparator().compare(list.get(i), k) == 0) { list.remove(i); break; } } return super.remove(k); } private void justRemove(Object k) { super.remove(k); } public java.util.Iterator iterator() { return new java.util.Iterator() { final java.util.Iterator iterator = list.iterator(); Object curr; public Object next() { return curr = iterator.next(); } public boolean hasNext() { return iterator.hasNext(); } public void remove() { justRemove(curr); iterator.remove(); } }; } } private static java.text.NumberFormat format = new java.text.DecimalFormat("#,###.00"); private static java.text.FieldPosition fp = new java.text.FieldPosition(0); private static String format(double d) { StringBuffer s = new StringBuffer(); return format.format(d, s, fp).toString(); } // TODO Use a ASSERTS like preprocessor variable? private static final boolean PARALLEL_STREAMS = Boolean.getBoolean("useParallelStreams"); private static void speedTest(int n, float f, boolean comp) { int i, j; OPEN_HASH_BIG_SET m; java.util.HashSet t; KEY_TYPE k[] = new KEY_TYPE[n]; KEY_TYPE nk[] = new KEY_TYPE[n]; long ms; for(i = 0; i < n; i++) { k[i] = genKey(); nk[i] = genKey(); } double totAdd = 0, totYes = 0, totNo = 0, totIter = 0, totRemYes = 0, totRemNo = 0, d; if (comp) { for(j = 0; j < 20; j++) { t = new java.util.HashSet(16); /* We add pairs to t. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) t.add(KEY2OBJ(k[i])); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totAdd += d; System.out.print("Add: " + format(d) +" K/s "); /* We check for pairs in t. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) t.contains(KEY2OBJ(k[i])); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totYes += d; System.out.print("Yes: " + format(d) +" K/s "); /* We check for pairs not in t. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) t.contains(KEY2OBJ(nk[i])); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totNo += d; System.out.print("No: " + format(d) +" K/s "); /* We iterate on t. */ ms = System.currentTimeMillis(); for(java.util.Iterator it = t.iterator(); it.hasNext(); it.next()); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totIter += d; System.out.print("Iter: " + format(d) +" K/s "); /* We delete pairs not in t. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) t.remove(KEY2OBJ(nk[i])); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totRemNo += d; System.out.print("RemNo: " + format(d) +" K/s "); /* We delete pairs in t. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) t.remove(KEY2OBJ(k[i])); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totRemYes += d; System.out.print("RemYes: " + format(d) +" K/s "); System.out.println(); } System.out.println(); System.out.println("java.util Add: " + format(totAdd/(j-3)) + " K/s Yes: " + format(totYes/(j-3)) + " K/s No: " + format(totNo/(j-3)) + " K/s Iter: " + format(totIter/(j-3)) + " K/s RemNo: " + format(totRemNo/(j-3)) + " K/s RemYes: " + format(totRemYes/(j-3)) + "K/s"); System.out.println(); totAdd = totYes = totNo = totIter = totRemYes = totRemNo = 0; } for(j = 0; j < 20; j++) { m = new OPEN_HASH_BIG_SET(16, f); /* We add pairs to m. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) m.add(k[i]); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totAdd += d; System.out.print("Add: " + format(d) +" K/s "); /* We check for pairs in m. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) m.contains(k[i]); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totYes += d; System.out.print("Yes: " + format(d) +" K/s "); /* We check for pairs not in m. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) m.contains(nk[i]); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totNo += d; System.out.print("No: " + format(d) +" K/s "); /* We iterate on m. */ ms = System.currentTimeMillis(); for(KEY_ITERATOR it = (KEY_ITERATOR)m.iterator(); it.hasNext(); it.NEXT_KEY()); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totIter += d; System.out.print("Iter: " + format(d) +" K/s "); /* We delete pairs not in m. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) m.remove(nk[i]); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totRemNo += d; System.out.print("RemNo: " + format(d) +" K/s "); /* We delete pairs in m. */ ms = System.currentTimeMillis(); for(i = 0; i < n; i++) m.remove(k[i]); d = 1.0 * n / (System.currentTimeMillis() - ms); if (j > 2) totRemYes += d; System.out.print("RemYes: " + format(d) +" K/s "); System.out.println(); } System.out.println(); System.out.println("fastutil Add: " + format(totAdd/(j-3)) + " K/s Yes: " + format(totYes/(j-3)) + " K/s No: " + format(totNo/(j-3)) + " K/s Iter: " + format(totIter/(j-3)) + " K/s RemNo: " + format(totRemNo/(j-3)) + " K/s RemYes: " + format(totRemYes/(j-3)) + " K/s"); System.out.println(); } private static void fatal(String msg) { throw new AssertionError(msg); } private static void ensure(boolean cond, String msg) { if (cond) return; fatal(msg); } private static void printProbes(OPEN_HASH_BIG_SET m) { long totProbes = 0; double totSquareProbes = 0; int maxProbes = 0; final double f = (double)m.size / m.n; for(int i = 0, c = 0; i < m.n; i++) { if (! KEY_IS_NULL(BigArrays.get(m.key, i))) c++; else { if (c != 0) { final long p = (c + 1) * (c + 2) / 2; totProbes += p; totSquareProbes += (double)p * p; } maxProbes = Math.max(c, maxProbes); c = 0; totProbes++; totSquareProbes++; } } final double expected = (double)totProbes / m.n; System.err.println("Expected probes: " + ( 3 * Math.sqrt(3) * (f / ((1 - f) * (1 - f))) + 4 / (9 * f) - 1 ) + "; actual: " + expected + "; stddev: " + Math.sqrt(totSquareProbes / m.n - expected * expected) + "; max probes: " + maxProbes); } private static void runTest(int n, float f) throws Exception { int c; OPEN_HASH_BIG_SET m = new OPEN_HASH_BIG_SET(Hash.DEFAULT_INITIAL_SIZE, f); java.util.Set t = new java.util.HashSet(); /* First of all, we fill t with random data. */ for(int i=0; i i = m.stream(); java.util.stream.Stream j = t.stream(); #elif KEY_CLASS_Boolean java.util.stream.Stream i = m.stream(); java.util.stream.Stream j = t.stream(); #else JDK_PRIMITIVE_STREAM i = m.KEY_WIDENED_STREAM_METHOD(); java.util.stream.Stream j = t.stream(); #endif if (PARALLEL_STREAMS) { i = i.parallel(); j = j.parallel(); } i = i.sorted(); j = j.sorted(); #if KEYS_REFERENCE || KEY_CLASS_Boolean Object[] iArray = i.toArray(); Object[] jArray = j.toArray(); #elif KEY_CLASS_Character int[] iArray = i.toArray(); int[] jArray = j.mapToInt(c -> (int)c.charValue()).toArray(); #else KEY_TYPE_WIDENED[] iArray = i.toArray(); KEY_TYPE_WIDENED[] jArray = j.MAP_TO_KEY_WIDENED(Number::KEY_WIDENED_VALUE).toArray(); #endif ensure(java.util.Arrays.equals(iArray, jArray), "! sorted arrays equal"); } /* Now we take out of m everything, and check that it is empty. */ for(java.util.Iterator i=m.iterator(); i.hasNext();) { i.next(); i.remove();} ensure(m.isEmpty(), "Error (" + seed + "): m is not empty (as it should be)"); #if KEY_CLASS_Integer || KEY_CLASS_Long m = new OPEN_HASH_BIG_SET(n, f); t.clear(); int x; /* Now we torture-test the hash table. This part is implemented only for integers and longs. */ int p = m.key.length - 1; for(int i=0; i2) f = Float.parseFloat(args[2]); if (args.length > 3) r = new java.util.Random(seed = Long.parseLong(args[3])); try { if ("speedTest".equals(args[0]) || "speedComp".equals(args[0])) speedTest(n, f, "speedComp".equals(args[0])); else if ("test".equals(args[0])) runTest(n, f); } catch(Throwable e) { e.printStackTrace(System.err); System.err.println("seed: " + seed); throw e; } } #endif }