org.apache.hadoop.hdfs.util.LightWeightHashSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-apache2 Show documentation
Show all versions of hadoop-apache2 Show documentation
Shaded version of Apache Hadoop for Presto
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.util;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import com.facebook.presto.hadoop.$internal.org.slf4j.Logger;
import com.facebook.presto.hadoop.$internal.org.slf4j.LoggerFactory;
/**
* A low memory linked hash set implementation, which uses an array for storing
* the elements and linked lists for collision resolution. This class does not
* support null element.
*
* This class is not thread safe.
*
*/
public class LightWeightHashSet implements Collection {
/**
* Elements of {@link LightWeightLinkedSet}.
*/
static class LinkedElement {
protected final T element;
// reference to the next entry within a bucket linked list
protected LinkedElement next;
//hashCode of the element
protected final int hashCode;
public LinkedElement(T elem, int hash) {
this.element = elem;
this.next = null;
this.hashCode = hash;
}
@Override
public String toString() {
return element.toString();
}
}
protected static final float DEFAULT_MAX_LOAD_FACTOR = 0.75f;
protected static final float DEFAUT_MIN_LOAD_FACTOR = 0.2f;
protected static final int MINIMUM_CAPACITY = 16;
static final int MAXIMUM_CAPACITY = 1 << 30;
private static final Logger LOG =
LoggerFactory.getLogger(LightWeightHashSet.class);
/**
* An internal array of entries, which are the rows of the hash table. The
* size must be a power of two.
*/
protected LinkedElement[] entries;
/** Size of the entry table. */
private int capacity;
/** The size of the set (not the entry array). */
protected int size = 0;
/** Hashmask used for determining the bucket index **/
private int hash_mask;
/** Capacity at initialization time **/
private final int initialCapacity;
/**
* Modification version for fail-fast.
*
* @see ConcurrentModificationException
*/
protected int modification = 0;
private float maxLoadFactor;
private float minLoadFactor;
private final int expandMultiplier = 2;
private int expandThreshold;
private int shrinkThreshold;
/**
* @param initCapacity
* Recommended size of the internal array.
* @param maxLoadFactor
* used to determine when to expand the internal array
* @param minLoadFactor
* used to determine when to shrink the internal array
*/
@SuppressWarnings("unchecked")
public LightWeightHashSet(int initCapacity, float maxLoadFactor,
float minLoadFactor) {
if (maxLoadFactor <= 0 || maxLoadFactor > 1.0f)
throw new IllegalArgumentException("Illegal maxload factor: "
+ maxLoadFactor);
if (minLoadFactor <= 0 || minLoadFactor > maxLoadFactor)
throw new IllegalArgumentException("Illegal minload factor: "
+ minLoadFactor);
this.initialCapacity = computeCapacity(initCapacity);
this.capacity = this.initialCapacity;
this.hash_mask = capacity - 1;
this.maxLoadFactor = maxLoadFactor;
this.expandThreshold = (int) (capacity * maxLoadFactor);
this.minLoadFactor = minLoadFactor;
this.shrinkThreshold = (int) (capacity * minLoadFactor);
entries = new LinkedElement[capacity];
if (LOG.isDebugEnabled()) {
LOG.debug("initial capacity=" + initialCapacity + ", max load factor= "
+ maxLoadFactor + ", min load factor= " + minLoadFactor);
}
}
public LightWeightHashSet() {
this(MINIMUM_CAPACITY, DEFAULT_MAX_LOAD_FACTOR, DEFAUT_MIN_LOAD_FACTOR);
}
public LightWeightHashSet(int minCapacity) {
this(minCapacity, DEFAULT_MAX_LOAD_FACTOR, DEFAUT_MIN_LOAD_FACTOR);
}
/**
* Check if the set is empty.
*
* @return true is set empty, false otherwise
*/
@Override
public boolean isEmpty() {
return size == 0;
}
/**
* Return the current capacity (for testing).
*/
public int getCapacity() {
return capacity;
}
/**
* Return the number of stored elements.
*/
@Override
public int size() {
return size;
}
/**
* Get index in the internal table for a given hash.
*/
protected int getIndex(int hashCode) {
return hashCode & hash_mask;
}
/**
* Check if the set contains given element
*
* @return true if element present, false otherwise.
*/
@SuppressWarnings("unchecked")
@Override
public boolean contains(final Object key) {
return getElement((T)key) != null;
}
/**
* Return the element in this set which is equal to
* the given key, if such an element exists.
* Otherwise returns null.
*/
public T getElement(final T key) {
// validate key
if (key == null) {
throw new IllegalArgumentException("Null element is not supported.");
}
// find element
final int hashCode = key.hashCode();
final int index = getIndex(hashCode);
return getContainedElem(index, key, hashCode);
}
/**
* Check if the set contains given element at given index. If it
* does, return that element.
*
* @return the element, or null, if no element matches
*/
protected T getContainedElem(int index, final T key, int hashCode) {
for (LinkedElement e = entries[index]; e != null; e = e.next) {
// element found
if (hashCode == e.hashCode && e.element.equals(key)) {
return e.element;
}
}
// element not found
return null;
}
/**
* All all elements in the collection. Expand if necessary.
*
* @param toAdd - elements to add.
* @return true if the set has changed, false otherwise
*/
@Override
public boolean addAll(Collection extends T> toAdd) {
boolean changed = false;
for (T elem : toAdd) {
changed |= addElem(elem);
}
expandIfNecessary();
return changed;
}
/**
* Add given element to the hash table. Expand table if necessary.
*
* @return true if the element was not present in the table, false otherwise
*/
@Override
public boolean add(final T element) {
boolean added = addElem(element);
expandIfNecessary();
return added;
}
/**
* Add given element to the hash table
*
* @return true if the element was not present in the table, false otherwise
*/
protected boolean addElem(final T element) {
// validate element
if (element == null) {
throw new IllegalArgumentException("Null element is not supported.");
}
// find hashCode & index
final int hashCode = element.hashCode();
final int index = getIndex(hashCode);
// return false if already present
if (getContainedElem(index, element, hashCode) != null) {
return false;
}
modification++;
size++;
// update bucket linked list
LinkedElement le = new LinkedElement(element, hashCode);
le.next = entries[index];
entries[index] = le;
return true;
}
/**
* Remove the element corresponding to the key.
*
* @return If such element exists, return true. Otherwise, return false.
*/
@Override
@SuppressWarnings("unchecked")
public boolean remove(final Object key) {
// validate key
if (key == null) {
throw new IllegalArgumentException("Null element is not supported.");
}
LinkedElement removed = removeElem((T) key);
shrinkIfNecessary();
return removed == null ? false : true;
}
/**
* Remove the element corresponding to the key, given key.hashCode() == index.
*
* @return If such element exists, return true. Otherwise, return false.
*/
protected LinkedElement removeElem(final T key) {
LinkedElement found = null;
final int hashCode = key.hashCode();
final int index = getIndex(hashCode);
if (entries[index] == null) {
return null;
} else if (hashCode == entries[index].hashCode &&
entries[index].element.equals(key)) {
// remove the head of the bucket linked list
modification++;
size--;
found = entries[index];
entries[index] = found.next;
} else {
// head != null and key is not equal to head
// search the element
LinkedElement prev = entries[index];
for (found = prev.next; found != null;) {
if (hashCode == found.hashCode &&
found.element.equals(key)) {
// found the element, remove it
modification++;
size--;
prev.next = found.next;
found.next = null;
break;
} else {
prev = found;
found = found.next;
}
}
}
return found;
}
/**
* Remove and return n elements from the hashtable.
* The order in which entries are removed is unspecified, and
* and may not correspond to the order in which they were inserted.
*
* @return first element
*/
public List pollN(int n) {
if (n >= size) {
return pollAll();
}
List retList = new ArrayList(n);
if (n == 0) {
return retList;
}
boolean done = false;
int currentBucketIndex = 0;
while (!done) {
LinkedElement current = entries[currentBucketIndex];
while (current != null) {
retList.add(current.element);
current = current.next;
entries[currentBucketIndex] = current;
size--;
modification++;
if (--n == 0) {
done = true;
break;
}
}
currentBucketIndex++;
}
shrinkIfNecessary();
return retList;
}
/**
* Remove all elements from the set and return them. Clear the entries.
*/
public List pollAll() {
List retList = new ArrayList(size);
for (int i = 0; i < entries.length; i++) {
LinkedElement current = entries[i];
while (current != null) {
retList.add(current.element);
current = current.next;
}
}
this.clear();
return retList;
}
/**
* Get array.length elements from the set, and put them into the array.
*/
@SuppressWarnings("unchecked")
public T[] pollToArray(T[] array) {
int currentIndex = 0;
LinkedElement current = null;
if (array.length == 0) {
return array;
}
if (array.length > size) {
array = (T[]) java.lang.reflect.Array.newInstance(array.getClass()
.getComponentType(), size);
}
// do fast polling if the entire set needs to be fetched
if (array.length == size) {
for (int i = 0; i < entries.length; i++) {
current = entries[i];
while (current != null) {
array[currentIndex++] = current.element;
current = current.next;
}
}
this.clear();
return array;
}
boolean done = false;
int currentBucketIndex = 0;
while (!done) {
current = entries[currentBucketIndex];
while (current != null) {
array[currentIndex++] = current.element;
current = current.next;
entries[currentBucketIndex] = current;
size--;
modification++;
if (currentIndex == array.length) {
done = true;
break;
}
}
currentBucketIndex++;
}
shrinkIfNecessary();
return array;
}
/**
* Compute capacity given initial capacity.
*
* @return final capacity, either MIN_CAPACITY, MAX_CAPACITY, or power of 2
* closest to the requested capacity.
*/
private int computeCapacity(int initial) {
if (initial < MINIMUM_CAPACITY) {
return MINIMUM_CAPACITY;
}
if (initial > MAXIMUM_CAPACITY) {
return MAXIMUM_CAPACITY;
}
int capacity = 1;
while (capacity < initial) {
capacity <<= 1;
}
return capacity;
}
/**
* Resize the internal table to given capacity.
*/
@SuppressWarnings("unchecked")
private void resize(int cap) {
int newCapacity = computeCapacity(cap);
if (newCapacity == this.capacity) {
return;
}
this.capacity = newCapacity;
this.expandThreshold = (int) (capacity * maxLoadFactor);
this.shrinkThreshold = (int) (capacity * minLoadFactor);
this.hash_mask = capacity - 1;
LinkedElement[] temp = entries;
entries = new LinkedElement[capacity];
for (int i = 0; i < temp.length; i++) {
LinkedElement curr = temp[i];
while (curr != null) {
LinkedElement next = curr.next;
int index = getIndex(curr.hashCode);
curr.next = entries[index];
entries[index] = curr;
curr = next;
}
}
}
/**
* Checks if we need to shrink, and shrinks if necessary.
*/
protected void shrinkIfNecessary() {
if (size < this.shrinkThreshold && capacity > initialCapacity) {
resize(capacity / expandMultiplier);
}
}
/**
* Checks if we need to expand, and expands if necessary.
*/
protected void expandIfNecessary() {
if (size > this.expandThreshold && capacity < MAXIMUM_CAPACITY) {
resize(capacity * expandMultiplier);
}
}
@Override
public Iterator iterator() {
return new LinkedSetIterator();
}
@Override
public String toString() {
final StringBuilder b = new StringBuilder(getClass().getSimpleName());
b.append("(size=").append(size).append(", modification=")
.append(modification).append(", entries.length=")
.append(entries.length).append(")");
return b.toString();
}
/** Print detailed information of this object. */
public void printDetails(final PrintStream out) {
out.print(this + ", entries = [");
for (int i = 0; i < entries.length; i++) {
if (entries[i] != null) {
LinkedElement e = entries[i];
out.print("\n " + i + ": " + e);
for (e = e.next; e != null; e = e.next) {
out.print(" -> " + e);
}
}
}
out.println("\n]");
}
private class LinkedSetIterator implements Iterator {
/** The current modification epoch. */
private int expectedModification = modification;
/** The current index of the entry array. */
private int index = -1;
/** The next element to return. */
private LinkedElement next = nextNonemptyEntry();
private LinkedElement current;
private LinkedElement nextNonemptyEntry() {
for (index++; index < entries.length && entries[index] == null; index++);
return index < entries.length ? entries[index] : null;
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public T next() {
if (modification != expectedModification) {
throw new ConcurrentModificationException("modification="
+ modification + " != expectedModification = " + expectedModification);
}
if (next == null) {
throw new NoSuchElementException();
}
current = next;
final T e = next.element;
// find the next element
final LinkedElement n = next.next;
next = n != null ? n : nextNonemptyEntry();
return e;
}
@Override
public void remove() {
if (current == null) {
throw new NoSuchElementException();
}
if (modification != expectedModification) {
throw new ConcurrentModificationException("modification="
+ modification + " != expectedModification = " + expectedModification);
}
LightWeightHashSet.this.removeElem(current.element);
current = null;
expectedModification = modification;
}
}
/**
* Clear the set. Resize it to the original capacity.
*/
@Override
@SuppressWarnings("unchecked")
public void clear() {
this.capacity = this.initialCapacity;
this.hash_mask = capacity - 1;
this.expandThreshold = (int) (capacity * maxLoadFactor);
this.shrinkThreshold = (int) (capacity * minLoadFactor);
entries = new LinkedElement[capacity];
size = 0;
modification++;
}
@Override
public Object[] toArray() {
Object[] result = new Object[size];
return toArray(result);
}
@Override
@SuppressWarnings("unchecked")
public U[] toArray(U[] a) {
if (a == null) {
throw new NullPointerException("Input array can not be null");
}
if (a.length < size) {
a = (U[]) java.lang.reflect.Array.newInstance(a.getClass()
.getComponentType(), size);
}
int currentIndex = 0;
for (int i = 0; i < entries.length; i++) {
LinkedElement current = entries[i];
while (current != null) {
a[currentIndex++] = (U) current.element;
current = current.next;
}
}
return a;
}
@Override
public boolean containsAll(Collection> c) {
Iterator> iter = c.iterator();
while (iter.hasNext()) {
if (!contains(iter.next())) {
return false;
}
}
return true;
}
@Override
public boolean removeAll(Collection> c) {
boolean changed = false;
Iterator> iter = c.iterator();
while (iter.hasNext()) {
changed |= remove(iter.next());
}
return changed;
}
@Override
public boolean retainAll(Collection> c) {
throw new UnsupportedOperationException("retainAll is not supported.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy