All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.mahout.math.set.OpenLongHashSet Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.mahout.math.set;

import java.util.Arrays;

import org.apache.mahout.math.function.LongProcedure;
import org.apache.mahout.math.list.LongArrayList;
import org.apache.mahout.math.map.HashFunctions;
import org.apache.mahout.math.map.PrimeFinder;

/**
  * Open hash set of long items;
 **/
public class OpenLongHashSet extends AbstractLongSet {
  protected static final byte FREE = 0;
  protected static final byte FULL = 1;
  protected static final byte REMOVED = 2;
  protected static final long NO_KEY_VALUE = 0;

  /** The hash table keys. */
  private long[] table;

  /** The state of each hash table entry (FREE, FULL, REMOVED). */
  private byte[] state;

  /** The number of table entries in state==FREE. */
  private int freeEntries;


  /** Constructs an empty map with default capacity and default load factors. */
  public OpenLongHashSet() {
    this(defaultCapacity);
  }

  /**
   * Constructs an empty map with the specified initial capacity and default load factors.
   *
   * @param initialCapacity the initial capacity of the map.
   * @throws IllegalArgumentException if the initial capacity is less than zero.
   */
  public OpenLongHashSet(int initialCapacity) {
    this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
  }

  /**
   * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor.
   *
   * @param initialCapacity the initial capacity.
   * @param minLoadFactor   the minimum load factor.
   * @param maxLoadFactor   the maximum load factor.
   * @throws IllegalArgumentException if initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
   *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
   *                                  maxLoadFactor).
   */
  public OpenLongHashSet(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
    setUp(initialCapacity, minLoadFactor, maxLoadFactor);
  }

  /** Removes all values associations from the receiver. Implicitly calls trimToSize(). */
  @Override
  public void clear() {
    Arrays.fill(this.state, 0, state.length - 1, FREE);
    distinct = 0;
    freeEntries = table.length; // delta
    trimToSize();
  }

  /**
   * Returns a deep copy of the receiver.
   *
   * @return a deep copy of the receiver.
   */
  @Override
  public Object clone() {
    OpenLongHashSet copy = (OpenLongHashSet) super.clone();
    copy.table = copy.table.clone();
    copy.state = copy.state.clone();
    return copy;
  }

  /**
   * Returns true if the receiver contains the specified key.
   *
   * @return true if the receiver contains the specified key.
   */
  @Override
  public boolean contains(long key) {
    return indexOfKey(key) >= 0;
  }

  /**
   * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new
   * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. 

This * method never need be called; it is for performance tuning only. Calling this method before add()ing a * large number of associations boosts performance, because the receiver will grow only once instead of potentially * many times and hash collisions get less probable. * * @param minCapacity the desired minimum capacity. */ @Override public void ensureCapacity(int minCapacity) { if (table.length < minCapacity) { int newCapacity = nextPrime(minCapacity); rehash(newCapacity); } } /** * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order. * Subclasses can define a particular order, for example, "sorted by key". All methods which can be expressed * in terms of this method (most methods can) must guarantee to use the same order defined by this * method, even if it is no particular order. This is necessary so that, for example, methods keys and * values will yield association pairs, not two uncorrelated lists. * * @param procedure the procedure to be applied. Stops iteration if the procedure returns false, otherwise * continues. * @return false if the procedure stopped before all keys where iterated over, true otherwise. */ @Override public boolean forEachKey(LongProcedure procedure) { for (int i = table.length; i-- > 0;) { if (state[i] == FULL) { if (!procedure.apply(table[i])) { return false; } } } return true; } /** * @param key the key to be added to the receiver. * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the * key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained * at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at * slot index. */ protected int indexOfInsertion(long key) { final int length = table.length; final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; int i = hash % length; int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html //int decrement = (hash / length) % length; if (decrement == 0) { decrement = 1; } // stop if we find a removed or free slot, or if we find the key itself // do NOT skip over removed slots (yes, open addressing is like that...) while (state[i] == FULL && table[i] != key) { i -= decrement; //hashCollisions++; if (i < 0) { i += length; } } if (state[i] == REMOVED) { // stop if we find a free slot, or if we find the key itself. // do skip over removed slots (yes, open addressing is like that...) // assertion: there is at least one FREE slot. final int j = i; while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { i -= decrement; //hashCollisions++; if (i < 0) { i += length; } } if (state[i] == FREE) { i = j; } } if (state[i] == FULL) { // key already contained at slot i. // return a negative number identifying the slot. return -i - 1; } // not already contained, should be inserted at slot i. // return a number >= 0 identifying the slot. return i; } /** * @param key the key to be searched in the receiver. * @return the index where the key is contained in the receiver, returns -1 if the key was not found. */ protected int indexOfKey(long key) { final int length = table.length; final int hash = HashFunctions.hash(key) & 0x7FFFFFFF; int i = hash % length; int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html //int decrement = (hash / length) % length; if (decrement == 0) { decrement = 1; } // stop if we find a free slot, or if we find the key itself. // do skip over removed slots (yes, open addressing is like that...) while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) { i -= decrement; //hashCollisions++; if (i < 0) { i += length; } } if (state[i] == FREE) { return -1; } // not found return i; //found, return index where key is contained } /** * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this * call returns the specified list has a new size that equals this.size(). Iteration order is guaranteed to * be identical to the order used by method {@link #forEachKey(LongProcedure)}. *

This method can be used * to iterate over the keys of the receiver. * * @param list the list to be filled, can have any size. */ @Override public void keys(LongArrayList list) { list.setSize(distinct); long [] elements = list.elements(); int j = 0; for (int i = table.length; i-- > 0;) { if (state[i] == FULL) { elements[j++] = table[i]; } } } /** * Associates the given key with the given value. Replaces any old (key,someOtherValue) association, if * existing. * * @param key the key the value shall be associated with. * @return true if the receiver did not already contain such a key; false if the receiver did * already contain such a key - the new value has now replaced the formerly associated value. */ @Override public boolean add(long key) { int i = indexOfInsertion(key); if (i < 0) { //already contained //i = -i - 1; return false; } if (this.distinct > this.highWaterMark) { int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); rehash(newCapacity); return add(key); } this.table[i] = key; if (this.state[i] == FREE) { this.freeEntries--; } this.state[i] = FULL; this.distinct++; if (this.freeEntries < 1) { //delta int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor); rehash(newCapacity); } return true; } /** * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water * mark. */ protected void rehash(int newCapacity) { int oldCapacity = table.length; //if (oldCapacity == newCapacity) return; long[] oldTable = table; byte[] oldState = state; this.table = new long[newCapacity]; this.state = new byte[newCapacity]; this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor); this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor); this.freeEntries = newCapacity - this.distinct; // delta for (int i = oldCapacity; i-- > 0;) { if (oldState[i] == FULL) { long element = oldTable[i]; int index = indexOfInsertion(element); this.table[index] = element; this.state[index] = FULL; } } } /** * Removes the given key with its associated element from the receiver, if present. * * @param key the key to be removed from the receiver. * @return true if the receiver contained the specified key, false otherwise. */ @Override public boolean remove(long key) { int i = indexOfKey(key); if (i < 0) { return false; } // key not contained this.state[i] = REMOVED; this.distinct--; if (this.distinct < this.lowWaterMark) { int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor); rehash(newCapacity); } return true; } /** * Initializes the receiver. * * @param initialCapacity the initial capacity of the receiver. * @param minLoadFactor the minLoadFactor of the receiver. * @param maxLoadFactor the maxLoadFactor of the receiver. * @throws IllegalArgumentException if initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) || * (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >= * maxLoadFactor). */ @Override protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) { int capacity = initialCapacity; super.setUp(capacity, minLoadFactor, maxLoadFactor); capacity = nextPrime(capacity); if (capacity == 0) { capacity = 1; } // open addressing needs at least one FREE slot at any time. this.table = new long[capacity]; this.state = new byte[capacity]; // memory will be exhausted long before this pathological case happens, anyway. this.minLoadFactor = minLoadFactor; if (capacity == PrimeFinder.largestPrime) { this.maxLoadFactor = 1.0; } else { this.maxLoadFactor = maxLoadFactor; } this.distinct = 0; this.freeEntries = capacity; // delta // lowWaterMark will be established upon first expansion. // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...). // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young. // See ensureCapacity(...) this.lowWaterMark = 0; this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor); } /** * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An * application can use this operation to minimize the storage of the receiver. */ @Override public void trimToSize() { // * 1.2 because open addressing's performance exponentially degrades beyond that point // so that even rehashing the table can take very long int newCapacity = nextPrime((int) (1 + 1.2 * size())); if (table.length > newCapacity) { rehash(newCapacity); } } /** * Access for unit tests. * @param capacity * @param minLoadFactor * @param maxLoadFactor */ protected void getInternalFactors(int[] capacity, double[] minLoadFactor, double[] maxLoadFactor) { capacity[0] = table.length; minLoadFactor[0] = this.minLoadFactor; maxLoadFactor[0] = this.maxLoadFactor; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy