All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.unsafe.map.BytesToBytesMap Maven / Gradle / Ivy

There is a newer version: 2.4.8
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.unsafe.map;

import java.lang.Override;
import java.lang.UnsupportedOperationException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import com.google.common.annotations.VisibleForTesting;

import org.apache.spark.unsafe.*;
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.bitset.BitSet;
import org.apache.spark.unsafe.hash.Murmur3_x86_32;
import org.apache.spark.unsafe.memory.*;

/**
 * An append-only hash map where keys and values are contiguous regions of bytes.
 * 

* This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers, * which is guaranteed to exhaust the space. *

* The map can support up to 2^29 keys. If the key cardinality is higher than this, you should * probably be using sorting instead of hashing for better cache locality. *

* This class is not thread safe. */ public final class BytesToBytesMap { private static final Murmur3_x86_32 HASHER = new Murmur3_x86_32(0); private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy.DOUBLING; /** * Special record length that is placed after the last record in a data page. */ private static final int END_OF_PAGE_MARKER = -1; private final TaskMemoryManager memoryManager; /** * A linked list for tracking all allocated data pages so that we can free all of our memory. */ private final List dataPages = new LinkedList(); /** * The data page that will be used to store keys and values for new hashtable entries. When this * page becomes full, a new page will be allocated and this pointer will change to point to that * new page. */ private MemoryBlock currentDataPage = null; /** * Offset into `currentDataPage` that points to the location where new data can be inserted into * the page. This does not incorporate the page's base offset. */ private long pageCursor = 0; /** * The size of the data pages that hold key and value data. Map entries cannot span multiple * pages, so this limits the maximum entry size. */ private static final long PAGE_SIZE_BYTES = 1L << 26; // 64 megabytes /** * The maximum number of keys that BytesToBytesMap supports. The hash table has to be * power-of-2-sized and its backing Java array can contain at most (1 << 30) elements, since * that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array * entries per key, giving us a maximum capacity of (1 << 29). */ @VisibleForTesting static final int MAX_CAPACITY = (1 << 29); // This choice of page table size and page size means that we can address up to 500 gigabytes // of memory. /** * A single array to store the key and value. * * Position {@code 2 * i} in the array is used to track a pointer to the key at index {@code i}, * while position {@code 2 * i + 1} in the array holds key's full 32-bit hashcode. */ private LongArray longArray; // TODO: we're wasting 32 bits of space here; we can probably store fewer bits of the hashcode // and exploit word-alignment to use fewer bits to hold the address. This might let us store // only one long per map entry, increasing the chance that this array will fit in cache at the // expense of maybe performing more lookups if we have hash collisions. Say that we stored only // 27 bits of the hashcode and 37 bits of the address. 37 bits is enough to address 1 terabyte // of RAM given word-alignment. If we use 13 bits of this for our page table, that gives us a // maximum page size of 2^24 * 8 = ~134 megabytes per page. This change will require us to store // full base addresses in the page table for off-heap mode so that we can reconstruct the full // absolute memory addresses. /** * A {@link BitSet} used to track location of the map where the key is set. * Size of the bitset should be half of the size of the long array. */ private BitSet bitset; private final double loadFactor; /** * Number of keys defined in the map. */ private int size; /** * The map will be expanded once the number of keys exceeds this threshold. */ private int growthThreshold; /** * Mask for truncating hashcodes so that they do not exceed the long array's size. * This is a strength reduction optimization; we're essentially performing a modulus operation, * but doing so with a bitmask because this is a power-of-2-sized hash map. */ private int mask; /** * Return value of {@link BytesToBytesMap#lookup(Object, long, int)}. */ private final Location loc; private final boolean enablePerfMetrics; private long timeSpentResizingNs = 0; private long numProbes = 0; private long numKeyLookups = 0; private long numHashCollisions = 0; public BytesToBytesMap( TaskMemoryManager memoryManager, int initialCapacity, double loadFactor, boolean enablePerfMetrics) { this.memoryManager = memoryManager; this.loadFactor = loadFactor; this.loc = new Location(); this.enablePerfMetrics = enablePerfMetrics; if (initialCapacity <= 0) { throw new IllegalArgumentException("Initial capacity must be greater than 0"); } if (initialCapacity > MAX_CAPACITY) { throw new IllegalArgumentException( "Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY); } allocate(initialCapacity); } public BytesToBytesMap(TaskMemoryManager memoryManager, int initialCapacity) { this(memoryManager, initialCapacity, 0.70, false); } public BytesToBytesMap( TaskMemoryManager memoryManager, int initialCapacity, boolean enablePerfMetrics) { this(memoryManager, initialCapacity, 0.70, enablePerfMetrics); } /** * Returns the number of keys defined in the map. */ public int size() { return size; } private static final class BytesToBytesMapIterator implements Iterator { private final int numRecords; private final Iterator dataPagesIterator; private final Location loc; private int currentRecordNumber = 0; private Object pageBaseObject; private long offsetInPage; BytesToBytesMapIterator(int numRecords, Iterator dataPagesIterator, Location loc) { this.numRecords = numRecords; this.dataPagesIterator = dataPagesIterator; this.loc = loc; if (dataPagesIterator.hasNext()) { advanceToNextPage(); } } private void advanceToNextPage() { final MemoryBlock currentPage = dataPagesIterator.next(); pageBaseObject = currentPage.getBaseObject(); offsetInPage = currentPage.getBaseOffset(); } @Override public boolean hasNext() { return currentRecordNumber != numRecords; } @Override public Location next() { int keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage); if (keyLength == END_OF_PAGE_MARKER) { advanceToNextPage(); keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage); } loc.with(pageBaseObject, offsetInPage); offsetInPage += 8 + 8 + keyLength + loc.getValueLength(); currentRecordNumber++; return loc; } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * Returns an iterator for iterating over the entries of this map. * * For efficiency, all calls to `next()` will return the same {@link Location} object. * * If any other lookups or operations are performed on this map while iterating over it, including * `lookup()`, the behavior of the returned iterator is undefined. */ public Iterator iterator() { return new BytesToBytesMapIterator(size, dataPages.iterator(), loc); } /** * Looks up a key, and return a {@link Location} handle that can be used to test existence * and read/write values. * * This function always return the same {@link Location} instance to avoid object allocation. */ public Location lookup( Object keyBaseObject, long keyBaseOffset, int keyRowLengthBytes) { if (enablePerfMetrics) { numKeyLookups++; } final int hashcode = HASHER.hashUnsafeWords(keyBaseObject, keyBaseOffset, keyRowLengthBytes); int pos = hashcode & mask; int step = 1; while (true) { if (enablePerfMetrics) { numProbes++; } if (!bitset.isSet(pos)) { // This is a new key. return loc.with(pos, hashcode, false); } else { long stored = longArray.get(pos * 2 + 1); if ((int) (stored) == hashcode) { // Full hash code matches. Let's compare the keys for equality. loc.with(pos, hashcode, true); if (loc.getKeyLength() == keyRowLengthBytes) { final MemoryLocation keyAddress = loc.getKeyAddress(); final Object storedKeyBaseObject = keyAddress.getBaseObject(); final long storedKeyBaseOffset = keyAddress.getBaseOffset(); final boolean areEqual = ByteArrayMethods.wordAlignedArrayEquals( keyBaseObject, keyBaseOffset, storedKeyBaseObject, storedKeyBaseOffset, keyRowLengthBytes ); if (areEqual) { return loc; } else { if (enablePerfMetrics) { numHashCollisions++; } } } } } pos = (pos + step) & mask; step++; } } /** * Handle returned by {@link BytesToBytesMap#lookup(Object, long, int)} function. */ public final class Location { /** An index into the hash map's Long array */ private int pos; /** True if this location points to a position where a key is defined, false otherwise */ private boolean isDefined; /** * The hashcode of the most recent key passed to * {@link BytesToBytesMap#lookup(Object, long, int)}. Caching this hashcode here allows us to * avoid re-hashing the key when storing a value for that key. */ private int keyHashcode; private final MemoryLocation keyMemoryLocation = new MemoryLocation(); private final MemoryLocation valueMemoryLocation = new MemoryLocation(); private int keyLength; private int valueLength; private void updateAddressesAndSizes(long fullKeyAddress) { updateAddressesAndSizes( memoryManager.getPage(fullKeyAddress), memoryManager.getOffsetInPage(fullKeyAddress)); } private void updateAddressesAndSizes(Object page, long keyOffsetInPage) { long position = keyOffsetInPage; keyLength = (int) PlatformDependent.UNSAFE.getLong(page, position); position += 8; // word used to store the key size keyMemoryLocation.setObjAndOffset(page, position); position += keyLength; valueLength = (int) PlatformDependent.UNSAFE.getLong(page, position); position += 8; // word used to store the key size valueMemoryLocation.setObjAndOffset(page, position); } Location with(int pos, int keyHashcode, boolean isDefined) { this.pos = pos; this.isDefined = isDefined; this.keyHashcode = keyHashcode; if (isDefined) { final long fullKeyAddress = longArray.get(pos * 2); updateAddressesAndSizes(fullKeyAddress); } return this; } Location with(Object page, long keyOffsetInPage) { this.isDefined = true; updateAddressesAndSizes(page, keyOffsetInPage); return this; } /** * Returns true if the key is defined at this position, and false otherwise. */ public boolean isDefined() { return isDefined; } /** * Returns the address of the key defined at this position. * This points to the first byte of the key data. * Unspecified behavior if the key is not defined. * For efficiency reasons, calls to this method always returns the same MemoryLocation object. */ public MemoryLocation getKeyAddress() { assert (isDefined); return keyMemoryLocation; } /** * Returns the length of the key defined at this position. * Unspecified behavior if the key is not defined. */ public int getKeyLength() { assert (isDefined); return keyLength; } /** * Returns the address of the value defined at this position. * This points to the first byte of the value data. * Unspecified behavior if the key is not defined. * For efficiency reasons, calls to this method always returns the same MemoryLocation object. */ public MemoryLocation getValueAddress() { assert (isDefined); return valueMemoryLocation; } /** * Returns the length of the value defined at this position. * Unspecified behavior if the key is not defined. */ public int getValueLength() { assert (isDefined); return valueLength; } /** * Store a new key and value. This method may only be called once for a given key; if you want * to update the value associated with a key, then you can directly manipulate the bytes stored * at the value address. *

* It is only valid to call this method immediately after calling `lookup()` using the same key. *

* The key and value must be word-aligned (that is, their sizes must multiples of 8). *

* After calling this method, calls to `get[Key|Value]Address()` and `get[Key|Value]Length` * will return information on the data stored by this `putNewKey` call. *

* As an example usage, here's the proper way to store a new key: *

*

     *   Location loc = map.lookup(keyBaseObject, keyBaseOffset, keyLengthInBytes);
     *   if (!loc.isDefined()) {
     *     loc.putNewKey(keyBaseObject, keyBaseOffset, keyLengthInBytes, ...)
     *   }
     * 
*

* Unspecified behavior if the key is not defined. */ public void putNewKey( Object keyBaseObject, long keyBaseOffset, int keyLengthBytes, Object valueBaseObject, long valueBaseOffset, int valueLengthBytes) { assert (!isDefined) : "Can only set value once for a key"; assert (keyLengthBytes % 8 == 0); assert (valueLengthBytes % 8 == 0); if (size == MAX_CAPACITY) { throw new IllegalStateException("BytesToBytesMap has reached maximum capacity"); } // Here, we'll copy the data into our data pages. Because we only store a relative offset from // the key address instead of storing the absolute address of the value, the key and value // must be stored in the same memory page. // (8 byte key length) (key) (8 byte value length) (value) final long requiredSize = 8 + keyLengthBytes + 8 + valueLengthBytes; assert (requiredSize <= PAGE_SIZE_BYTES - 8); // Reserve 8 bytes for the end-of-page marker. size++; bitset.set(pos); // If there's not enough space in the current page, allocate a new page (8 bytes are reserved // for the end-of-page marker). if (currentDataPage == null || PAGE_SIZE_BYTES - 8 - pageCursor < requiredSize) { if (currentDataPage != null) { // There wasn't enough space in the current page, so write an end-of-page marker: final Object pageBaseObject = currentDataPage.getBaseObject(); final long lengthOffsetInPage = currentDataPage.getBaseOffset() + pageCursor; PlatformDependent.UNSAFE.putLong(pageBaseObject, lengthOffsetInPage, END_OF_PAGE_MARKER); } MemoryBlock newPage = memoryManager.allocatePage(PAGE_SIZE_BYTES); dataPages.add(newPage); pageCursor = 0; currentDataPage = newPage; } // Compute all of our offsets up-front: final Object pageBaseObject = currentDataPage.getBaseObject(); final long pageBaseOffset = currentDataPage.getBaseOffset(); final long keySizeOffsetInPage = pageBaseOffset + pageCursor; pageCursor += 8; // word used to store the key size final long keyDataOffsetInPage = pageBaseOffset + pageCursor; pageCursor += keyLengthBytes; final long valueSizeOffsetInPage = pageBaseOffset + pageCursor; pageCursor += 8; // word used to store the value size final long valueDataOffsetInPage = pageBaseOffset + pageCursor; pageCursor += valueLengthBytes; // Copy the key PlatformDependent.UNSAFE.putLong(pageBaseObject, keySizeOffsetInPage, keyLengthBytes); PlatformDependent.copyMemory( keyBaseObject, keyBaseOffset, pageBaseObject, keyDataOffsetInPage, keyLengthBytes); // Copy the value PlatformDependent.UNSAFE.putLong(pageBaseObject, valueSizeOffsetInPage, valueLengthBytes); PlatformDependent.copyMemory( valueBaseObject, valueBaseOffset, pageBaseObject, valueDataOffsetInPage, valueLengthBytes); final long storedKeyAddress = memoryManager.encodePageNumberAndOffset( currentDataPage, keySizeOffsetInPage); longArray.set(pos * 2, storedKeyAddress); longArray.set(pos * 2 + 1, keyHashcode); updateAddressesAndSizes(storedKeyAddress); isDefined = true; if (size > growthThreshold && longArray.size() < MAX_CAPACITY) { growAndRehash(); } } } /** * Allocate new data structures for this map. When calling this outside of the constructor, * make sure to keep references to the old data structures so that you can free them. * * @param capacity the new map capacity */ private void allocate(int capacity) { assert (capacity >= 0); // The capacity needs to be divisible by 64 so that our bit set can be sized properly capacity = Math.max((int) Math.min(MAX_CAPACITY, nextPowerOf2(capacity)), 64); assert (capacity <= MAX_CAPACITY); longArray = new LongArray(memoryManager.allocate(capacity * 8L * 2)); bitset = new BitSet(MemoryBlock.fromLongArray(new long[capacity / 64])); this.growthThreshold = (int) (capacity * loadFactor); this.mask = capacity - 1; } /** * Free all allocated memory associated with this map, including the storage for keys and values * as well as the hash map array itself. * * This method is idempotent. */ public void free() { if (longArray != null) { memoryManager.free(longArray.memoryBlock()); longArray = null; } if (bitset != null) { // The bitset's heap memory isn't managed by a memory manager, so no need to free it here. bitset = null; } Iterator dataPagesIterator = dataPages.iterator(); while (dataPagesIterator.hasNext()) { memoryManager.freePage(dataPagesIterator.next()); dataPagesIterator.remove(); } assert(dataPages.isEmpty()); } /** Returns the total amount of memory, in bytes, consumed by this map's managed structures. */ public long getTotalMemoryConsumption() { return ( dataPages.size() * PAGE_SIZE_BYTES + bitset.memoryBlock().size() + longArray.memoryBlock().size()); } /** * Returns the total amount of time spent resizing this map (in nanoseconds). */ public long getTimeSpentResizingNs() { if (!enablePerfMetrics) { throw new IllegalStateException(); } return timeSpentResizingNs; } /** * Returns the average number of probes per key lookup. */ public double getAverageProbesPerLookup() { if (!enablePerfMetrics) { throw new IllegalStateException(); } return (1.0 * numProbes) / numKeyLookups; } public long getNumHashCollisions() { if (!enablePerfMetrics) { throw new IllegalStateException(); } return numHashCollisions; } @VisibleForTesting int getNumDataPages() { return dataPages.size(); } /** * Grows the size of the hash table and re-hash everything. */ @VisibleForTesting void growAndRehash() { long resizeStartTime = -1; if (enablePerfMetrics) { resizeStartTime = System.nanoTime(); } // Store references to the old data structures to be used when we re-hash final LongArray oldLongArray = longArray; final BitSet oldBitSet = bitset; final int oldCapacity = (int) oldBitSet.capacity(); // Allocate the new data structures allocate(Math.min(growthStrategy.nextCapacity(oldCapacity), MAX_CAPACITY)); // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it) for (int pos = oldBitSet.nextSetBit(0); pos >= 0; pos = oldBitSet.nextSetBit(pos + 1)) { final long keyPointer = oldLongArray.get(pos * 2); final int hashcode = (int) oldLongArray.get(pos * 2 + 1); int newPos = hashcode & mask; int step = 1; boolean keepGoing = true; // No need to check for equality here when we insert so this has one less if branch than // the similar code path in addWithoutResize. while (keepGoing) { if (!bitset.isSet(newPos)) { bitset.set(newPos); longArray.set(newPos * 2, keyPointer); longArray.set(newPos * 2 + 1, hashcode); keepGoing = false; } else { newPos = (newPos + step) & mask; step++; } } } // Deallocate the old data structures. memoryManager.free(oldLongArray.memoryBlock()); if (enablePerfMetrics) { timeSpentResizingNs += System.nanoTime() - resizeStartTime; } } /** Returns the next number greater or equal num that is power of 2. */ private static long nextPowerOf2(long num) { final long highBit = Long.highestOneBit(num); return (highBit == num) ? num : highBit << 1; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy