com.mayabot.nlp.hppc.CharObjectHashMap Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mynlp Show documentation
Maya Nlp subproject :mynlp
There is a newer version: 4.0.0
/*
 * Copyright 2018 mayabot.com authors. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Copyright 2014 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License, version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.mayabot.nlp.hppc;

import java.util.*;

/**
 * A hash map implementation of {@link CharObjectMap} that uses open addressing for keys.
 * To minimize the memory footprint, this class uses open addressing rather than chaining.
 * Collisions are resolved using linear probing. Deletions implement compaction, so cost of
 * remove can approach O(N) for full maps, which makes a small loadFactor recommended.
 *
 * @param  The value type stored in the map.
 */
public class CharObjectHashMap implements CharObjectMap {

    /**
     * Default initial capacity. Used if not specified in the constructor
     */
    public static final int DEFAULT_CAPACITY = 8;

    /**
     * Default load factor. Used if not specified in the constructor
     */
    public static final float DEFAULT_LOAD_FACTOR = 0.5f;

    /**
     * Placeholder for null values, so we can use the actual null to mean available.
     * (Better than using a placeholder for available: less references for GC processing.)
     */
    private static final Object NULL_VALUE = new Object();

    /**
     * The maximum number of elements allowed without allocating more space.
     */
    private int maxSize;

    /**
     * The load factor for the map. Used to calculate {@link #maxSize}.
     */
    private final float loadFactor;

    private char[] keys;
    private V[] values;
    private int size;
    private int mask;

    private final Set keySet = new KeySet();
    private final Set> entrySet = new EntrySet();
    private final Iterable> entries = new Iterable>() {
        @Override
        public Iterator> iterator() {
            return new PrimitiveIterator();
        }
    };

    public CharObjectHashMap() {
        this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR);
    }

    public CharObjectHashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_LOAD_FACTOR);
    }

    public CharObjectHashMap(int initialCapacity, float loadFactor) {
        if (loadFactor <= 0.0f || loadFactor > 1.0f) {
            // Cannot exceed 1 because we can never store more than capacity elements;
            // using a bigger loadFactor would trigger rehashing before the desired load is reached.
            throw new IllegalArgumentException("loadFactor must be > 0 and <= 1");
        }

        this.loadFactor = loadFactor;

        // Adjust the initial capacity if necessary.
        int capacity = safeFindNextPositivePowerOfTwo(initialCapacity);
        mask = capacity - 1;

        // Allocate the arrays.
        keys = new char[capacity];
        @SuppressWarnings({"unchecked", "SuspiciousArrayCast"})
        V[] temp = (V[]) new Object[capacity];
        values = temp;

        // Initialize the maximum size value.
        maxSize = calcMaxSize(capacity);
    }

    private static  T toExternal(T value) {
        assert value != null : "null is not a legitimate internal value. Concurrent Modification?";
        return value == NULL_VALUE ? null : value;
    }

    @SuppressWarnings("unchecked")
    private static  T toInternal(T value) {
        return value == null ? (T) NULL_VALUE : value;
    }

    @Override
    public V get(char key) {
        int index = indexOf(key);
        return index == -1 ? null : toExternal(values[index]);
    }

    @Override
    public V put(char key, V value) {
        int startIndex = hashIndex(key);
        int index = startIndex;

        for (; ; ) {
            if (values[index] == null) {
                // Found empty slot, use it.
                keys[index] = key;
                values[index] = toInternal(value);
                growSize();
                return null;
            }
            if (keys[index] == key) {
                // Found existing entry with this key, just replace the value.
                V previousValue = values[index];
                values[index] = toInternal(value);
                return toExternal(previousValue);
            }

            // Conflict, keep probing ...
            if ((index = probeNext(index)) == startIndex) {
                // Can only happen if the map was full at MAX_ARRAY_SIZE and couldn't grow.
                throw new IllegalStateException("Unable to insert");
            }
        }
    }

    @Override
    public void putAll(Map sourceMap) {
        if (sourceMap instanceof CharObjectHashMap) {
            // Optimization - iterate through the arrays.
            @SuppressWarnings("unchecked")
            CharObjectHashMap source = (CharObjectHashMap) sourceMap;
            for (int i = 0; i < source.values.length; ++i) {
                V sourceValue = source.values[i];
                if (sourceValue != null) {
                    put(source.keys[i], sourceValue);
                }
            }
            return;
        }

        // Otherwise, just add each entry.
        for (Entry entry : sourceMap.entrySet()) {
            put(entry.getKey(), entry.getValue());
        }
    }

    @Override
    public V remove(char key) {
        int index = indexOf(key);
        if (index == -1) {
            return null;
        }

        V prev = values[index];
        removeAt(index);
        return toExternal(prev);
    }

    @Override
    public int size() {
        return size;
    }

    @Override
    public boolean isEmpty() {
        return size == 0;
    }

    @Override
    public void clear() {
        Arrays.fill(keys, (char) 0);
        Arrays.fill(values, null);
        size = 0;
    }

    @Override
    public boolean containsKey(char key) {
        return indexOf(key) >= 0;
    }

    @Override
    public boolean containsValue(Object value) {
        @SuppressWarnings("unchecked")
        V v1 = toInternal((V) value);
        for (V v2 : values) {
            // The map supports null values; this will be matched as NULL_VALUE.equals(NULL_VALUE).
            if (v2 != null && v2.equals(v1)) {
                return true;
            }
        }
        return false;
    }

    @Override
    public Iterable> entries() {
        return entries;
    }

    @Override
    public Collection values() {
        return new AbstractCollection() {
            @Override
            public Iterator iterator() {
                return new Iterator() {
                    final PrimitiveIterator iter = new PrimitiveIterator();

                    @Override
                    public boolean hasNext() {
                        return iter.hasNext();
                    }

                    @Override
                    public V next() {
                        return iter.next().value();
                    }

                    @Override
                    public void remove() {
                        throw new UnsupportedOperationException();
                    }
                };
            }

            @Override
            public int size() {
                return size;
            }
        };
    }

    @Override
    public int hashCode() {
        // Hashcode is based on all non-zero, valid keys. We have to scan the whole keys
        // array, which may have different lengths for two maps of same size(), so the
        // capacity cannot be used as input for hashing but the size can.
        int hash = size;
        for (char key : keys) {
            // 0 can be a valid key or unused slot, but won't impact the hashcode in either case.
            // This way we can use a cheap loop without conditionals, or hard-to-unroll operations,
            // or the devastatingly bad memory locality of visiting value objects.
            // Also, it's important to use a hash function that does not depend on the ordering
            // of terms, only their values; since the map is an unordered collection and
            // entries can end up in different positions in different maps that have the same
            // elements, but with different history of puts/removes, due to conflicts.
            hash ^= hashCode(key);
        }
        return hash;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (!(obj instanceof CharObjectMap)) {
            return false;
        }
        @SuppressWarnings("rawtypes")
        CharObjectMap other = (CharObjectMap) obj;
        if (size != other.size()) {
            return false;
        }
        for (int i = 0; i < values.length; ++i) {
            V value = values[i];
            if (value != null) {
                char key = keys[i];
                Object otherValue = other.get(key);
                if (value == NULL_VALUE) {
                    if (otherValue != null) {
                        return false;
                    }
                } else if (!value.equals(otherValue)) {
                    return false;
                }
            }
        }
        return true;
    }

    @Override
    public boolean containsKey(Object key) {
        return containsKey(objectToKey(key));
    }

    @Override
    public V get(Object key) {
        return get(objectToKey(key));
    }

    @Override
    public V put(Character key, V value) {
        return put(objectToKey(key), value);
    }

    @Override
    public V remove(Object key) {
        return remove(objectToKey(key));
    }

    @Override
    public Set keySet() {
        return keySet;
    }

    @Override
    public Set> entrySet() {
        return entrySet;
    }

    private char objectToKey(Object key) {
        return ((Character) key).charValue();
    }

    /**
     * Locates the index for the given key. This method probes using double hashing.
     *
     * @param key the key for an entry in the map.
     * @return the index where the key was found, or {@code -1} if no entry is found for that key.
     */
    private int indexOf(char key) {
        int startIndex = hashIndex(key);
        int index = startIndex;

        for (; ; ) {
            if (values[index] == null) {
                // It's available, so no chance that this value exists anywhere in the map.
                return -1;
            }
            if (key == keys[index]) {
                return index;
            }

            // Conflict, keep probing ...
            if ((index = probeNext(index)) == startIndex) {
                return -1;
            }
        }
    }

    /**
     * Returns the hashed index for the given key.
     */
    private int hashIndex(char key) {
        // The array lengths are always a power of two, so we can use a bitmask to stay inside the array bounds.
        return hashCode(key) & mask;
    }

    /**
     * Returns the hash code for the key.
     */
    private static int hashCode(char key) {
        return (int) key;
    }

    /**
     * Get the next sequential index after {@code index} and wraps if necessary.
     */
    private int probeNext(int index) {
        // The array lengths are always a power of two, so we can use a bitmask to stay inside the array bounds.
        return (index + 1) & mask;
    }

    /**
     * Grows the map size after an insertion. If necessary, performs a rehash of the map.
     */
    private void growSize() {
        size++;

        if (size > maxSize) {
            if (keys.length == Integer.MAX_VALUE) {
                throw new IllegalStateException("Max capacity reached at size=" + size);
            }

            // Double the capacity.
            rehash(keys.length << 1);
        }
    }

    /**
     * Removes entry at the given index position. Also performs opportunistic, incremental rehashing
     * if necessary to not break conflict chains.
     *
     * @param index the index position of the element to remove.
     * @return {@code true} if the next item was moved back. {@code false} otherwise.
     */
    private boolean removeAt(final int index) {
        --size;
        // Clearing the key is not strictly necessary (for GC like in a regular collection),
        // but recommended for security. The memory location is still fresh in the cache anyway.
        keys[index] = 0;
        values[index] = null;

        // In the interval from index to the next available entry, the arrays may have entries
        // that are displaced from their base position due to prior conflicts. Iterate these
        // entries and move them back if possible, optimizing future lookups.
        // Knuth Section 6.4 Algorithm R, also used by the JDK's IdentityHashMap.

        int nextFree = index;
        int i = probeNext(index);
        for (V value = values[i]; value != null; value = values[i = probeNext(i)]) {
            char key = keys[i];
            int bucket = hashIndex(key);
            if (i < bucket && (bucket <= nextFree || nextFree <= i) ||
                    bucket <= nextFree && nextFree <= i) {
                // Move the displaced entry "back" to the first available position.
                keys[nextFree] = key;
                values[nextFree] = value;
                // Put the first entry after the displaced entry
                keys[i] = 0;
                values[i] = null;
                nextFree = i;
            }
        }
        return nextFree != index;
    }

    /**
     * Calculates the maximum size allowed before rehashing.
     */
    private int calcMaxSize(int capacity) {
        // Clip the upper bound so that there will always be at least one available slot.
        int upperBound = capacity - 1;
        return Math.min(upperBound, (int) (capacity * loadFactor));
    }

    /**
     * Rehashes the map for the given capacity.
     *
     * @param newCapacity the new capacity for the map.
     */
    private void rehash(int newCapacity) {
        char[] oldKeys = keys;
        V[] oldVals = values;

        keys = new char[newCapacity];
        @SuppressWarnings({"unchecked", "SuspiciousArrayCast"})
        V[] temp = (V[]) new Object[newCapacity];
        values = temp;

        maxSize = calcMaxSize(newCapacity);
        mask = newCapacity - 1;

        // Insert to the new arrays.
        for (int i = 0; i < oldVals.length; ++i) {
            V oldVal = oldVals[i];
            if (oldVal != null) {
                // Inlined put(), but much simpler: we don't need to worry about
                // duplicated keys, growing/rehashing, or failing to insert.
                char oldKey = oldKeys[i];
                int index = hashIndex(oldKey);

                for (; ; ) {
                    if (values[index] == null) {
                        keys[index] = oldKey;
                        values[index] = oldVal;
                        break;
                    }

                    // Conflict, keep probing. Can wrap around, but never reaches startIndex again.
                    index = probeNext(index);
                }
            }
        }
    }

    @Override
    public String toString() {
        if (isEmpty()) {
            return "{}";
        }
        StringBuilder sb = new StringBuilder(4 * size);
        sb.append('{');
        boolean first = true;
        for (int i = 0; i < values.length; ++i) {
            V value = values[i];
            if (value != null) {
                if (!first) {
                    sb.append(", ");
                }
                sb.append(keyToString(keys[i])).append('=').append(value == this ? "(this Map)" :
                        toExternal(value));
                first = false;
            }
        }
        return sb.append('}').toString();
    }

    /**
     * Helper method called by {@link #toString()} in order to convert a single map key into a string.
     * This is protected to allow subclasses to override the appearance of a given key.
     */
    protected String keyToString(char key) {
        return Character.toString(key);
    }

    /**
     * Set implementation for iterating over the entries of the map.
     */
    private final class EntrySet extends AbstractSet> {
        @Override
        public Iterator> iterator() {
            return new MapIterator();
        }

        @Override
        public int size() {
            return CharObjectHashMap.this.size();
        }
    }

    /**
     * Set implementation for iterating over the keys.
     */
    private final class KeySet extends AbstractSet {
        @Override
        public int size() {
            return CharObjectHashMap.this.size();
        }

        @Override
        public boolean contains(Object o) {
            return CharObjectHashMap.this.containsKey(o);
        }

        @Override
        public boolean remove(Object o) {
            return CharObjectHashMap.this.remove(o) != null;
        }

        @Override
        public boolean retainAll(Collection retainedKeys) {
            boolean changed = false;
            for (Iterator> iter = entries().iterator(); iter.hasNext(); ) {
                PrimitiveEntry entry = iter.next();
                if (!retainedKeys.contains(entry.key())) {
                    changed = true;
                    iter.remove();
                }
            }
            return changed;
        }

        @Override
        public void clear() {
            CharObjectHashMap.this.clear();
        }

        @Override
        public Iterator iterator() {
            return new Iterator() {
                private final Iterator> iter = entrySet.iterator();

                @Override
                public boolean hasNext() {
                    return iter.hasNext();
                }

                @Override
                public Character next() {
                    return iter.next().getKey();
                }

                @Override
                public void remove() {
                    iter.remove();
                }
            };
        }
    }

    /**
     * Iterator over primitive entries. Entry key/values are overwritten by each call to {@link #next()}.
     */
    private final class PrimitiveIterator implements Iterator>, PrimitiveEntry {
        private int prevIndex = -1;
        private int nextIndex = -1;
        private int entryIndex = -1;

        private void scanNext() {
            while (++nextIndex != values.length && values[nextIndex] == null) {
            }
        }

        @Override
        public boolean hasNext() {
            if (nextIndex == -1) {
                scanNext();
            }
            return nextIndex != values.length;
        }

        @Override
        public PrimitiveEntry next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }

            prevIndex = nextIndex;
            scanNext();

            // Always return the same Entry object, just change its index each time.
            entryIndex = prevIndex;
            return this;
        }

        @Override
        public void remove() {
            if (prevIndex == -1) {
                throw new IllegalStateException("next must be called before each remove.");
            }
            if (removeAt(prevIndex)) {
                // removeAt may move elements "back" in the array if they have been displaced because their spot in the
                // array was occupied when they were inserted. If this occurs then the nextIndex is now invalid and
                // should instead point to the prevIndex which now holds an element which was "moved back".
                nextIndex = prevIndex;
            }
            prevIndex = -1;
        }

        // Entry implementation. Since this implementation uses a single Entry, we coalesce that
        // into the Iterator object (potentially making loop optimization much easier).

        @Override
        public char key() {
            return keys[entryIndex];
        }

        @Override
        public V value() {
            return toExternal(values[entryIndex]);
        }

        @Override
        public void setValue(V value) {
            values[entryIndex] = toInternal(value);
        }
    }

    /**
     * Iterator used by the {@link Map} interface.
     */
    private final class MapIterator implements Iterator> {
        private final PrimitiveIterator iter = new PrimitiveIterator();

        @Override
        public boolean hasNext() {
            return iter.hasNext();
        }

        @Override
        public Entry next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }

            iter.next();

            return new MapEntry(iter.entryIndex);
        }

        @Override
        public void remove() {
            iter.remove();
        }
    }

    /**
     * A single entry in the map.
     */
    final class MapEntry implements Entry {
        private final int entryIndex;

        MapEntry(int entryIndex) {
            this.entryIndex = entryIndex;
        }

        @Override
        public Character getKey() {
            verifyExists();
            return keys[entryIndex];
        }

        @Override
        public V getValue() {
            verifyExists();
            return toExternal(values[entryIndex]);
        }

        @Override
        public V setValue(V value) {
            verifyExists();
            V prevValue = toExternal(values[entryIndex]);
            values[entryIndex] = toInternal(value);
            return prevValue;
        }

        private void verifyExists() {
            if (values[entryIndex] == null) {
                throw new IllegalStateException("The map entry has been removed");
            }
        }
    }

    /**
     * Fast method of finding the next power of 2 greater than or equal to the supplied value.
     * This method will do runtime bounds checking and call {@link #findNextPositivePowerOfTwo(int)} if within a
     * valid range.
     *
     * @param value from which to search for next power of 2
     * @return The next power of 2 or the value itself if it is a power of 2.
     * 
Special cases for return values are as follows:
     * 

     * {@code <= 0} -> 1
     * {@code >= 2^30} -> 2^30
     * 
     */
    public static int safeFindNextPositivePowerOfTwo(final int value) {
        return value <= 0 ? 1 : value >= 0x40000000 ? 0x40000000 : findNextPositivePowerOfTwo(value);
    }


    /**
     * Fast method of finding the next power of 2 greater than or equal to the supplied value.
     * 
     * If the value is {@code <= 0} then 1 will be returned.
     * This method is not suitable for {@link Integer#MIN_VALUE} or numbers greater than 2^30.
     *
     * @param value from which to search for next power of 2
     * @return The next power of 2 or the value itself if it is a power of 2
     */
    public static int findNextPositivePowerOfTwo(final int value) {
        assert value > Integer.MIN_VALUE && value < 0x40000000;
        return 1 << (32 - Integer.numberOfLeadingZeros(value - 1));
    }
}