All Downloads are FREE. Search and download functionalities are using the official Maven repository.

drv.OpenHashBigSet.drv Maven / Gradle / Ivy

Go to download

fastutil extends the Java Collections Framework by providing type-specific maps, sets, lists and priority queues with a small memory footprint and fast access and insertion; provides also big (64-bit) arrays, sets and lists, and fast, practical I/O classes for binary and text files.

There is a newer version: 8.5.15
Show newest version
/*		 
 * Copyright (C) 2002-2016 Sebastiano Vigna
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 */


package PACKAGE;

import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.HashCommon;
import static it.unimi.dsi.fastutil.HashCommon.bigArraySize;
import static it.unimi.dsi.fastutil.HashCommon.maxFill;

import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;


/**  A type-specific hash big set with with a fast, small-footprint implementation.
 *
 * 

Instances of this class use a hash table to represent a big set: the number * of elements in the set is limited only by the amount of core memory. The table * (backed by a {@linkplain it.unimi.dsi.fastutil.BigArrays big array}) is * filled up to a specified load factor, and then doubled in size to * accommodate new entries. If the table is emptied below one fourth * of the load factor, it is halved in size. However, halving is * not performed when deleting entries from an iterator, as it would interfere * with the iteration process. * *

Note that {@link #clear()} does not modify the hash table size. * Rather, a family of {@linkplain #trim() trimming * methods} lets you control the size of the table; this is particularly useful * if you reuse instances of this class. * *

The methods of this class are about 30% slower than those of the corresponding non-big set. * * @see Hash * @see HashCommon */ public class OPEN_HASH_BIG_SET KEY_GENERIC extends ABSTRACT_SET KEY_GENERIC implements java.io.Serializable, Cloneable, Hash, Size64 { private static final long serialVersionUID = 0L; private static final boolean ASSERTS = ASSERTS_VALUE; /** The big array of keys. */ protected transient KEY_GENERIC_TYPE[][] key; /** The mask for wrapping a position counter. */ protected transient long mask; /** The mask for wrapping a segment counter. */ protected transient int segmentMask; /** The mask for wrapping a base counter. */ protected transient int baseMask; /** Whether this set contains the null key. */ protected transient boolean containsNull; /** The current table size (always a power of 2). */ protected transient long n; /** Threshold after which we rehash. It must be the table size times {@link #f}. */ protected transient long maxFill; /** The acceptable load factor. */ protected final float f; /** Number of entries in the set. */ protected long size; /** Initialises the mask values. */ private void initMasks() { mask = n - 1; /* Note that either we have more than one segment, and in this case all segments * are BigArrays.SEGMENT_SIZE long, or we have exactly one segment whose length * is a power of two. */ segmentMask = key[ 0 ].length - 1; baseMask = key.length - 1; } /** Creates a new hash big set. * *

The actual table size will be the least power of two greater than expected/f. * * @param expected the expected number of elements in the set. * @param f the load factor. */ SUPPRESS_WARNINGS_KEY_UNCHECKED public OPEN_HASH_BIG_SET( final long expected, final float f ) { if ( f <= 0 || f > 1 ) throw new IllegalArgumentException( "Load factor must be greater than 0 and smaller than or equal to 1" ); if ( n < 0 ) throw new IllegalArgumentException( "The expected number of elements must be nonnegative" ); this.f = f; n = bigArraySize( expected, f ); maxFill = maxFill( n, f ); key = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray( n ); initMasks(); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor. * * @param expected the expected number of elements in the hash big set. */ public OPEN_HASH_BIG_SET( final long expected ) { this( expected, DEFAULT_LOAD_FACTOR ); } /** Creates a new hash big set with initial expected {@link Hash#DEFAULT_INITIAL_SIZE} elements * and {@link Hash#DEFAULT_LOAD_FACTOR} as load factor. */ public OPEN_HASH_BIG_SET() { this( DEFAULT_INITIAL_SIZE, DEFAULT_LOAD_FACTOR ); } /** Creates a new hash big set copying a given collection. * * @param c a {@link Collection} to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final Collection c, final float f ) { this( c.size(), f ); addAll( c ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying a given collection. * * @param c a {@link Collection} to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET( final Collection c ) { this( c, DEFAULT_LOAD_FACTOR ); } /** Creates a new hash big set copying a given type-specific collection. * * @param c a type-specific collection to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final COLLECTION KEY_EXTENDS_GENERIC c, final float f ) { this( c.size(), f ); addAll( c ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying a given type-specific collection. * * @param c a type-specific collection to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET( final COLLECTION KEY_EXTENDS_GENERIC c ) { this( c, DEFAULT_LOAD_FACTOR ); } /** Creates a new hash big set using elements provided by a type-specific iterator. * * @param i a type-specific iterator whose elements will fill the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final STD_KEY_ITERATOR KEY_EXTENDS_GENERIC i, final float f ) { this( DEFAULT_INITIAL_SIZE, f ); while( i.hasNext() ) add( i.NEXT_KEY() ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor using elements provided by a type-specific iterator. * * @param i a type-specific iterator whose elements will fill the new hash big set. */ public OPEN_HASH_BIG_SET( final STD_KEY_ITERATOR KEY_EXTENDS_GENERIC i ) { this( i, DEFAULT_LOAD_FACTOR ); } #if KEYS_PRIMITIVE /** Creates a new hash big set using elements provided by an iterator. * * @param i an iterator whose elements will fill the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final Iterator i, final float f ) { this( ITERATORS.AS_KEY_ITERATOR( i ), f ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor using elements provided by an iterator. * * @param i an iterator whose elements will fill the new hash big set. */ public OPEN_HASH_BIG_SET( final Iterator i ) { this( ITERATORS.AS_KEY_ITERATOR( i ) ); } #endif /** Creates a new hash big set and fills it with the elements of a given array. * * @param a an array whose elements will be used to fill the new hash big set. * @param offset the first element to use. * @param length the number of elements to use. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final KEY_GENERIC_TYPE[] a, final int offset, final int length, final float f ) { this( length < 0 ? 0 : length, f ); ARRAYS.ensureOffsetLength( a, offset, length ); for( int i = 0; i < length; i++ ) add( a[ offset + i ] ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor and fills it with the elements of a given array. * * @param a an array whose elements will be used to fill the new hash big set. * @param offset the first element to use. * @param length the number of elements to use. */ public OPEN_HASH_BIG_SET( final KEY_GENERIC_TYPE[] a, final int offset, final int length ) { this( a, offset, length, DEFAULT_LOAD_FACTOR ); } /** Creates a new hash big set copying the elements of an array. * * @param a an array to be copied into the new hash big set. * @param f the load factor. */ public OPEN_HASH_BIG_SET( final KEY_GENERIC_TYPE[] a, final float f ) { this( a, 0, a.length, f ); } /** Creates a new hash big set with {@link Hash#DEFAULT_LOAD_FACTOR} as load factor * copying the elements of an array. * * @param a an array to be copied into the new hash big set. */ public OPEN_HASH_BIG_SET( final KEY_GENERIC_TYPE[] a ) { this( a, DEFAULT_LOAD_FACTOR ); } private long realSize() { return containsNull ? size - 1 : size; } private void ensureCapacity( final long capacity ) { final long needed = bigArraySize( capacity, f ); if ( needed > n ) rehash( needed ); } #if KEYS_PRIMITIVE /** {@inheritDoc} */ public boolean addAll( COLLECTION c ) { final long size = c instanceof Size64 ? ((Size64)c).size64() : c.size(); if ( f <= .5 ) ensureCapacity( size ); // The resulting collection will be size for c.size() elements else ensureCapacity( size64() + size ); // The resulting collection will be sized for size() + c.size() elements return super.addAll( c ); } #endif /** {@inheritDoc} */ public boolean addAll( Collection c ) { final long size = c instanceof Size64 ? ((Size64)c).size64() : c.size(); // The resulting collection will be at least c.size() big if ( f <= .5 ) ensureCapacity( size ); // The resulting collection will be sized for c.size() elements else ensureCapacity( size64() + size ); // The resulting collection will be sized for size() + c.size() elements return super.addAll( c ); } public boolean add( final KEY_GENERIC_TYPE k ) { int displ, base; if ( KEY_IS_NULL( k ) ) { if ( containsNull ) return false; containsNull = true; } else { KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH( k ); // The starting point. if ( ! KEY_IS_NULL( curr = key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) { if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return false; while( ! KEY_IS_NULL( curr = key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ) if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return false; } key[ base ][ displ ] = k; } if ( size++ >= maxFill ) rehash( 2 * n ); if ( ASSERTS ) checkTable(); return true; } #if KEY_CLASS_Object /** Add a random element if not present, get the existing value if already present. * * This is equivalent to (but faster than) doing a: *

	 * K exist = set.get(k);
	 * if (exist == null) {
	 *   set.add(k);
	 *   exist = k;
	 * }
	 * 
*/ public KEY_GENERIC_TYPE addOrGet( final KEY_GENERIC_TYPE k ) { int displ, base; if ( KEY_IS_NULL( k ) ) { if ( containsNull ) return null; containsNull = true; } else { KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH( k ); // The starting point. if ( ! KEY_IS_NULL( curr = key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) { if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return curr; while( ! KEY_IS_NULL( curr = key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ) if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return curr; } key[ base ][ displ ] = k; } if ( size++ >= maxFill ) rehash( 2 * n ); if ( ASSERTS ) checkTable(); return k; } #endif /** Shifts left entries with the specified hash code, starting at the specified position, * and empties the resulting free entry. * * @param pos a starting position. */ protected final void shiftKeys( long pos ) { // Shift entries with the same hash. long last, slot; final KEY_GENERIC_TYPE[][] key = this.key; for(;;) { pos = ( ( last = pos ) + 1 ) & mask; for(;;) { if ( KEY_IS_NULL( BIG_ARRAYS.get( key, pos ) ) ) { BIG_ARRAYS.set( key, last, KEY_NULL ); return; } slot = KEY2LONGHASH( BIG_ARRAYS.get( key, pos ) ) & mask; if ( last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos ) break; pos = ( pos + 1 ) & mask; } BIG_ARRAYS.set( key, last, BIG_ARRAYS.get( key, pos ) ); } } private boolean removeEntry( final int base, final int displ ) { shiftKeys( base * (long)BigArrays.SEGMENT_SIZE + displ ); if ( --size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE ) rehash( n / 2 ); return true; } private boolean removeNullEntry() { containsNull = false; if ( --size < maxFill / 4 && n > DEFAULT_INITIAL_SIZE ) rehash( n / 2 ); return true; } public boolean rem( final KEY_TYPE k ) { if ( KEY_IS_NULL( k ) ) { if ( containsNull ) return removeNullEntry(); return false; } KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH( k ); int displ, base; // The starting point. if ( KEY_IS_NULL( curr = key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) return false; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return removeEntry( base, displ ); while( true ) { if ( KEY_IS_NULL( curr = key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ) return false; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return removeEntry( base, displ ); } } public boolean contains( final KEY_TYPE k ) { if ( KEY_IS_NULL( k ) ) return containsNull; KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH( k ); int displ, base; // The starting point. if ( KEY_IS_NULL( curr = key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) return false; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return true; while( true ) { if ( KEY_IS_NULL( curr = key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ) return false; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return true; } } #if KEY_CLASS_Object /** Returns the element of this set that is equal to the given key, or null. * @return the element of this set that is equal to the given key, or null. */ public K get( final KEY_TYPE k ) { if ( k == null ) return null; // This is correct independently of the value of containsNull KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = this.key; final long h = KEY2LONGHASH( k ); int displ, base; // The starting point. if ( KEY_IS_NULL( curr = key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) return null; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return curr; while( true ) { if ( KEY_IS_NULL( curr = key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ) return null; if ( KEY_EQUALS_NOT_NULL( curr, k ) ) return curr; } } #endif /* Removes all elements from this set. * *

To increase object reuse, this method does not change the table size. * If you want to reduce the table size, you must use {@link #trim(long)}. * */ public void clear() { if ( size == 0 ) return; size = 0; containsNull = false; BIG_ARRAYS.fill( key, KEY_NULL ); } /** An iterator over a hash big set. */ private class SetIterator extends KEY_ABSTRACT_ITERATOR KEY_GENERIC { /** The base of the last entry returned, if positive or zero; initially, the number of components of the key array. If negative, the last element returned was that of index {@code - base - 1} from the {@link #wrapped} list. */ int base = key.length; /** The displacement of the last entry returned; initially, zero. */ int displ; /** The index of the last entry that has been returned (or {@link Long#MIN_VALUE} if {@link #base} is negative). It is -1 if either we did not return an entry yet, or the last returned entry has been removed. */ long last = -1; /** A downward counter measuring how many entries must still be returned. */ long c = size; /** A boolean telling us whether we should return the null key. */ boolean mustReturnNull = OPEN_HASH_BIG_SET.this.containsNull; /** A lazily allocated list containing elements that have wrapped around the table because of removals. */ ARRAY_LIST KEY_GENERIC wrapped; public boolean hasNext() { return c != 0; } public KEY_GENERIC_TYPE NEXT_KEY() { if ( ! hasNext() ) throw new NoSuchElementException(); c--; if ( mustReturnNull ) { mustReturnNull = false; last = n; return KEY_NULL; } final KEY_GENERIC_TYPE[][] key = OPEN_HASH_BIG_SET.this.key; for(;;) { if ( displ == 0 && base <= 0 ) { // We are just enumerating elements from the wrapped list. last = Long.MIN_VALUE; return wrapped.GET_KEY( - ( --base ) - 1 ); } if ( displ-- == 0 ) displ = key[ --base ].length - 1; final KEY_GENERIC_TYPE k = key[ base ][ displ ]; if ( ! KEY_IS_NULL( k ) ) { last = base * (long)BigArrays.SEGMENT_SIZE + displ; return k; } } } /** Shifts left entries with the specified hash code, starting at the specified position, * and empties the resulting free entry. * * @param pos a starting position. */ private final void shiftKeys( long pos ) { // Shift entries with the same hash. long last, slot; KEY_GENERIC_TYPE curr; final KEY_GENERIC_TYPE[][] key = OPEN_HASH_BIG_SET.this.key; for(;;) { pos = ( ( last = pos ) + 1 ) & mask; for(;;) { if( KEY_IS_NULL( curr = BIG_ARRAYS.get( key, pos ) ) ) { BIG_ARRAYS.set( key, last, KEY_NULL ); return; } slot = KEY2LONGHASH( curr ) & mask; if ( last <= pos ? last >= slot || slot > pos : last >= slot && slot > pos ) break; pos = ( pos + 1 ) & mask; } if ( pos < last ) { // Wrapped entry. if ( wrapped == null ) wrapped = new ARRAY_LIST KEY_GENERIC(); wrapped.add( BIG_ARRAYS.get( key, pos ) ); } BIG_ARRAYS.set( key, last, curr ); } } public void remove() { if ( last == -1 ) throw new IllegalStateException(); if ( last == n ) OPEN_HASH_BIG_SET.this.containsNull = false; else if ( base >= 0 ) shiftKeys( last ); else { // We're removing wrapped entries. #if KEYS_REFERENCE OPEN_HASH_BIG_SET.this.remove( wrapped.set( - base - 1, null ) ); #else OPEN_HASH_BIG_SET.this.remove( wrapped.GET_KEY( - base - 1 ) ); #endif last = -1; // Note that we must not decrement size return; } size--; last = -1; // You can no longer remove this entry. if ( ASSERTS ) checkTable(); } } public KEY_ITERATOR KEY_GENERIC iterator() { return new SetIterator(); } /** A no-op for backward compatibility. The kind of tables implemented by * this class never need rehashing. * *

If you need to reduce the table size to fit exactly * this set, use {@link #trim()}. * * @return true. * @see #trim() * @deprecated A no-op. */ @Deprecated public boolean rehash() { return true; } /** Rehashes this set, making the table as small as possible. * *

This method rehashes the table to the smallest size satisfying the * load factor. It can be used when the set will not be changed anymore, so * to optimize access speed and size. * *

If the table size is already the minimum possible, this method * does nothing. * * @return true if there was enough memory to trim the set. * @see #trim(long) */ public boolean trim() { final long l = bigArraySize( size, f ); if ( l >= n || size > maxFill( l, f ) ) return true; try { rehash( l ); } catch(OutOfMemoryError cantDoIt) { return false; } return true; } /** Rehashes this set if the table is too large. * *

Let N be the smallest table size that can hold * max(n,{@link #size64()}) entries, still satisfying the load factor. If the current * table size is smaller than or equal to N, this method does * nothing. Otherwise, it rehashes this set in a table of size * N. * *

This method is useful when reusing sets. {@linkplain #clear() Clearing a * set} leaves the table size untouched. If you are reusing a set * many times, you can call this method with a typical * size to avoid keeping around a very large table just * because of a few large transient sets. * * @param n the threshold for the trimming. * @return true if there was enough memory to trim the set. * @see #trim() */ public boolean trim( final long n ) { final long l = bigArraySize( n, f ); if ( this.n <= l ) return true; try { rehash( l ); } catch( OutOfMemoryError cantDoIt ) { return false; } return true; } /** Resizes the set. * *

This method implements the basic rehashing strategy, and may be * overriden by subclasses implementing different rehashing strategies (e.g., * disk-based rehashing). However, you should not override this method * unless you understand the internal workings of this class. * * @param newN the new size */ SUPPRESS_WARNINGS_KEY_UNCHECKED protected void rehash( final long newN ) { final KEY_GENERIC_TYPE key[][] = this.key; final KEY_GENERIC_TYPE newKey[][] = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray( newN ); final long mask = newN - 1; // Note that this is used by the hashing macro final int newSegmentMask = newKey[ 0 ].length - 1; final int newBaseMask = newKey.length - 1; int base = 0, displ = 0, b, d; long h; KEY_GENERIC_TYPE k; for( long i = realSize(); i-- != 0; ) { while( KEY_IS_NULL( key[ base ][ displ ] ) ) base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ); k = key[ base ][ displ ]; h = KEY2LONGHASH( k ); // The starting point. if ( ! KEY_IS_NULL( newKey[ b = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ d = (int)( h & newSegmentMask ) ] ) ) while( ! KEY_IS_NULL( newKey[ b = ( b + ( ( d = ( d + 1 ) & newSegmentMask ) == 0 ? 1 : 0 ) ) & newBaseMask ][ d ] ) ); newKey[ b ][ d ] = k; base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ); } this.n = newN; this.key = newKey; initMasks(); maxFill = maxFill( n, f ); } @Deprecated public int size() { return (int)Math.min( Integer.MAX_VALUE, size ); } public long size64() { return size; } public boolean isEmpty() { return size == 0; } /** Returns a deep copy of this big set. * *

This method performs a deep copy of this big hash set; the data stored in the * set, however, is not cloned. Note that this makes a difference only for object keys. * * @return a deep copy of this big set. */ SUPPRESS_WARNINGS_KEY_UNCHECKED public OPEN_HASH_BIG_SET KEY_GENERIC clone() { OPEN_HASH_BIG_SET KEY_GENERIC c; try { c = (OPEN_HASH_BIG_SET KEY_GENERIC)super.clone(); } catch(CloneNotSupportedException cantHappen) { throw new InternalError(); } c.key = BIG_ARRAYS.copy( key ); c.containsNull = containsNull; return c; } /** Returns a hash code for this set. * * This method overrides the generic method provided by the superclass. * Since equals() is not overriden, it is important * that the value returned by this method is the same value as * the one returned by the overriden method. * * @return a hash code for this set. */ public int hashCode() { final KEY_GENERIC_TYPE key[][] = this.key; int h = 0, base = 0, displ = 0; for( long j = realSize(); j-- != 0; ) { while( KEY_IS_NULL( key[ base ][ displ ] ) ) base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ); #if KEYS_REFERENCE if ( this != key[ base ][ displ ] ) #endif h += KEY2JAVAHASH_NOT_NULL( key[ base ][ displ ] ); base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ); } return h; } private void writeObject(java.io.ObjectOutputStream s) throws java.io.IOException { final KEY_ITERATOR KEY_GENERIC i = iterator(); s.defaultWriteObject(); for( long j = size; j-- != 0; ) s.WRITE_KEY( i.NEXT_KEY() ); } SUPPRESS_WARNINGS_KEY_UNCHECKED private void readObject(java.io.ObjectInputStream s) throws java.io.IOException, ClassNotFoundException { s.defaultReadObject(); n = bigArraySize( size, f ); maxFill = maxFill( n, f ); final KEY_GENERIC_TYPE[][] key = this.key = KEY_GENERIC_BIG_ARRAY_CAST BIG_ARRAYS.newBigArray( n ); initMasks(); long h; KEY_GENERIC_TYPE k; int base, displ; for( long i = size; i-- != 0; ) { k = KEY_GENERIC_CAST s.READ_KEY(); if ( KEY_IS_NULL( k ) ) containsNull = true; else { h = KEY2LONGHASH( k ); if ( ! KEY_IS_NULL( key[ base = (int)( ( h & mask ) >>> BigArrays.SEGMENT_SHIFT ) ][ displ = (int)( h & segmentMask ) ] ) ) while( ! KEY_IS_NULL( key[ base = ( base + ( ( displ = ( displ + 1 ) & segmentMask ) == 0 ? 1 : 0 ) ) & baseMask ][ displ ] ) ); key[ base ][ displ ] = k; } } if ( ASSERTS ) checkTable(); } #ifdef ASSERTS_CODE private void checkTable() { assert ( n & -n ) == n : "Table length is not a power of two: " + n; assert n == BIG_ARRAYS.length( key ); long n = this.n; while( n-- != 0 ) if ( ! KEY_IS_NULL( BIG_ARRAYS.get( key, n ) ) && ! contains( BIG_ARRAYS.get( key, n ) ) ) throw new AssertionError( "Hash table has key " + BIG_ARRAYS.get( key, n ) + " marked as occupied, but the key does not belong to the table" ); #if KEYS_PRIMITIVE java.util.HashSet s = new java.util.HashSet (); #else java.util.HashSet s = new java.util.HashSet(); #endif for( long i = size(); i-- != 0; ) if ( ! KEY_IS_NULL( BIG_ARRAYS.get( key, i ) ) && ! s.add( BIG_ARRAYS.get( key, i ) ) ) throw new AssertionError( "Key " + BIG_ARRAYS.get( key, i ) + " appears twice" ); } #else private void checkTable() {} #endif #ifdef TEST private static long seed = System.currentTimeMillis(); private static java.util.Random r = new java.util.Random( seed ); private static KEY_TYPE genKey() { #if KEY_CLASS_Byte || KEY_CLASS_Short || KEY_CLASS_Character return (KEY_TYPE)(r.nextInt()); #elif KEYS_PRIMITIVE return r.NEXT_KEY(); #elif KEY_CLASS_Object return Integer.toBinaryString( r.nextInt() ); #else return new java.io.Serializable() {}; #endif } private static final class ArrayComparator implements java.util.Comparator { public int compare( Object a, Object b ) { byte[] aa = (byte[])a; byte[] bb = (byte[])b; int length = Math.min( aa.length, bb.length ); for( int i = 0; i < length; i++ ) { if ( aa[ i ] < bb[ i ] ) return -1; if ( aa[ i ] > bb[ i ] ) return 1; } return aa.length == bb.length ? 0 : ( aa.length < bb.length ? -1 : 1 ); } } private static final class MockSet extends java.util.TreeSet { private java.util.List list = new java.util.ArrayList(); public MockSet( java.util.Comparator c ) { super( c ); } public boolean add( Object k ) { if ( ! contains( k ) ) list.add( k ); return super.add( k ); } public boolean addAll( Collection c ) { java.util.Iterator i = c.iterator(); boolean result = false; while( i.hasNext() ) result |= add( i.next() ); return result; } public boolean removeAll( Collection c ) { java.util.Iterator i = c.iterator(); boolean result = false; while( i.hasNext() ) result |= remove( i.next() ); return result; } public boolean remove( Object k ) { if ( contains( k ) ) { int i = list.size(); while( i-- != 0 ) if ( comparator().compare( list.get( i ), k ) == 0 ) { list.remove( i ); break; } } return super.remove( k ); } private void justRemove( Object k ) { super.remove( k ); } public java.util.Iterator iterator() { return new java.util.Iterator() { final java.util.Iterator iterator = list.iterator(); Object curr; public Object next() { return curr = iterator.next(); } public boolean hasNext() { return iterator.hasNext(); } public void remove() { justRemove( curr ); iterator.remove(); } }; } } private static java.text.NumberFormat format = new java.text.DecimalFormat( "#,###.00" ); private static java.text.FieldPosition fp = new java.text.FieldPosition( 0 ); private static String format( double d ) { StringBuffer s = new StringBuffer(); return format.format( d, s, fp ).toString(); } private static void speedTest( int n, float f, boolean comp ) { int i, j; OPEN_HASH_BIG_SET m; java.util.HashSet t; KEY_TYPE k[] = new KEY_TYPE[n]; KEY_TYPE nk[] = new KEY_TYPE[n]; long ms; for( i = 0; i < n; i++ ) { k[i] = genKey(); nk[i] = genKey(); } double totAdd = 0, totYes = 0, totNo = 0, totIter = 0, totRemYes = 0, totRemNo = 0, d; if ( comp ) { for( j = 0; j < 20; j++ ) { t = new java.util.HashSet( 16 ); /* We add pairs to t. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) t.add( KEY2OBJ( k[i] ) ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totAdd += d; System.out.print("Add: " + format( d ) +" K/s " ); /* We check for pairs in t. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) t.contains( KEY2OBJ( k[i] ) ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totYes += d; System.out.print("Yes: " + format( d ) +" K/s " ); /* We check for pairs not in t. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) t.contains( KEY2OBJ( nk[i] ) ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totNo += d; System.out.print("No: " + format( d ) +" K/s " ); /* We iterate on t. */ ms = System.currentTimeMillis(); for( java.util.Iterator it = t.iterator(); it.hasNext(); it.next() ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totIter += d; System.out.print("Iter: " + format( d ) +" K/s " ); /* We delete pairs not in t. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) t.remove( KEY2OBJ( nk[i] ) ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totRemNo += d; System.out.print("RemNo: " + format( d ) +" K/s " ); /* We delete pairs in t. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) t.remove( KEY2OBJ( k[i] ) ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totRemYes += d; System.out.print("RemYes: " + format( d ) +" K/s " ); System.out.println(); } System.out.println(); System.out.println( "java.util Add: " + format( totAdd/(j-3) ) + " K/s Yes: " + format( totYes/(j-3) ) + " K/s No: " + format( totNo/(j-3) ) + " K/s Iter: " + format( totIter/(j-3) ) + " K/s RemNo: " + format( totRemNo/(j-3) ) + " K/s RemYes: " + format( totRemYes/(j-3) ) + "K/s" ); System.out.println(); totAdd = totYes = totNo = totIter = totRemYes = totRemNo = 0; } for( j = 0; j < 20; j++ ) { m = new OPEN_HASH_BIG_SET( 16, f ); /* We add pairs to m. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) m.add( k[i] ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totAdd += d; System.out.print("Add: " + format( d ) +" K/s " ); /* We check for pairs in m. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) m.contains( k[i] ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totYes += d; System.out.print("Yes: " + format( d ) +" K/s " ); /* We check for pairs not in m. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) m.contains( nk[i] ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totNo += d; System.out.print("No: " + format( d ) +" K/s " ); /* We iterate on m. */ ms = System.currentTimeMillis(); for( KEY_ITERATOR it = (KEY_ITERATOR)m.iterator(); it.hasNext(); it.NEXT_KEY() ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totIter += d; System.out.print("Iter: " + format( d ) +" K/s " ); /* We delete pairs not in m. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) m.remove( nk[i] ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totRemNo += d; System.out.print("RemNo: " + format( d ) +" K/s " ); /* We delete pairs in m. */ ms = System.currentTimeMillis(); for( i = 0; i < n; i++ ) m.remove( k[i] ); d = 1.0 * n / (System.currentTimeMillis() - ms ); if ( j > 2 ) totRemYes += d; System.out.print("RemYes: " + format( d ) +" K/s " ); System.out.println(); } System.out.println(); System.out.println( "fastutil Add: " + format( totAdd/(j-3) ) + " K/s Yes: " + format( totYes/(j-3) ) + " K/s No: " + format( totNo/(j-3) ) + " K/s Iter: " + format( totIter/(j-3) ) + " K/s RemNo: " + format( totRemNo/(j-3) ) + " K/s RemYes: " + format( totRemYes/(j-3) ) + " K/s" ); System.out.println(); } private static void fatal( String msg ) { System.out.println( msg ); System.exit( 1 ); } private static void ensure( boolean cond, String msg ) { if ( cond ) return; fatal( msg ); } private static void printProbes( OPEN_HASH_BIG_SET m ) { long totProbes = 0; double totSquareProbes = 0; int maxProbes = 0; final double f = (double)m.size / m.n; for( int i = 0, c = 0; i < m.n; i++ ) { if ( ! KEY_IS_NULL( BIG_ARRAYS.get( m.key, i ) ) ) c++; else { if ( c != 0 ) { final long p = ( c + 1 ) * ( c + 2 ) / 2; totProbes += p; totSquareProbes += (double)p * p; } maxProbes = Math.max( c, maxProbes ); c = 0; totProbes++; totSquareProbes++; } } final double expected = (double)totProbes / m.n; System.err.println( "Expected probes: " + ( 3 * Math.sqrt( 3 ) * ( f / ( ( 1 - f ) * ( 1 - f ) ) ) + 4 / ( 9 * f ) - 1 ) + "; actual: " + expected + "; stddev: " + Math.sqrt( totSquareProbes / m.n - expected * expected ) + "; max probes: " + maxProbes ); } private static void test( int n, float f ) { int c; OPEN_HASH_BIG_SET m = new OPEN_HASH_BIG_SET(Hash.DEFAULT_INITIAL_SIZE, f); java.util.Set t = new java.util.HashSet(); /* First of all, we fill t with random data. */ for(int i=0; i2) f = Float.parseFloat(args[2]); if ( args.length > 3 ) r = new java.util.Random( seed = Long.parseLong( args[ 3 ] ) ); try { if ("speedTest".equals(args[0]) || "speedComp".equals(args[0])) speedTest( n, f, "speedComp".equals(args[0]) ); else if ( "test".equals( args[0] ) ) test(n, f); } catch( Throwable e ) { e.printStackTrace( System.err ); System.err.println( "seed: " + seed ); } } #endif }