All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.zeno.fastblob.state.ByteArrayOrdinalMap Maven / Gradle / Ivy

The newest version!
/*
 *
 *  Copyright 2013 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.zeno.fastblob.state;

import com.netflix.zeno.fastblob.FastBlobImageUtils;
import com.netflix.zeno.fastblob.OrdinalMapping;
import com.netflix.zeno.fastblob.StateOrdinalMapping;
import com.netflix.zeno.fastblob.record.ByteDataBuffer;
import com.netflix.zeno.fastblob.record.FastBlobDeserializationRecord;
import com.netflix.zeno.fastblob.record.SegmentedByteArray;
import com.netflix.zeno.fastblob.record.SegmentedByteArrayHasher;
import com.netflix.zeno.fastblob.record.VarInt;
import com.netflix.zeno.util.SimultaneousExecutor;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;

/**
 *
 * This data structure maps byte sequences to ordinals.  This is a hash table.  The pointersAndOrdinals AtomicLongArray contains
 * keys, and the ByteDataBuffer contains values.  Each key has two components.  The high 28 bits in the key represents the ordinal.
 * The low 36 bits represents the pointer to the start position of the byte sequence in the ByteDataBuffer.  Each byte sequence is preceded by
 * a variable-length integer (see {@link VarInt}), indicating the length of the sequence.

* * This implementation is extremely fast. Even though it would be technically correct and clearer, * using a separate int[] array for the pointers, and an AtomicIntegerArray for the ordinals as keys * was measured as two orders of magnitude slower. * * @author dkoszewnik * */ public class ByteArrayOrdinalMap { private final static long EMPTY_BUCKET_VALUE = -1L; /// IMPORTANT: Thread safety: We need volatile access semantics to the individual elements in the /// pointersAndOrdinals array. This only works in JVMs 1.5 or later (JSR 133). /// Ordinal is the high 28 bits. Pointer to byte data is the low 36 bits. private AtomicLongArray pointersAndOrdinals; private final ByteDataBuffer byteData; private final FreeOrdinalTracker freeOrdinalTracker; private int size; private int sizeBeforeGrow; private long pointersByOrdinal[]; public ByteArrayOrdinalMap() { this(262144); } public ByteArrayOrdinalMap(int bufferSize) { this.freeOrdinalTracker = new FreeOrdinalTracker(); this.byteData = new ByteDataBuffer(bufferSize); this.pointersAndOrdinals = emptyKeyArray(256); this.sizeBeforeGrow = 179; /// 70% load factor this.size = 0; } private ByteArrayOrdinalMap(long keys[], ByteDataBuffer byteData, FreeOrdinalTracker freeOrdinalTracker, int keyArraySize) { this.freeOrdinalTracker = freeOrdinalTracker; this.byteData = byteData; AtomicLongArray pointersAndOrdinals = emptyKeyArray(keyArraySize); populateNewHashArray(pointersAndOrdinals, keys); this.pointersAndOrdinals = pointersAndOrdinals; this.size = keys.length; this.sizeBeforeGrow = keyArraySize * 7 / 10; /// 70% load factor } /** * Add a sequence of bytes to this map. If the sequence of bytes has already been added to this map, return the originally assigned ordinal. * If the sequence of bytes has not been added to this map, assign and return a new ordinal. This operation is thread-safe. */ public int getOrAssignOrdinal(ByteDataBuffer serializedRepresentation) { int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation); int modBitmask = pointersAndOrdinals.length() - 1; int bucket = hash & modBitmask; long key = pointersAndOrdinals.get(bucket); /// linear probing to resolve collisions. while(key != EMPTY_BUCKET_VALUE) { if(compare(serializedRepresentation, key)) { return (int)(key >> 36); } bucket = (bucket + 1) & modBitmask; key = pointersAndOrdinals.get(bucket); } return assignOrdinal(serializedRepresentation, hash); } /// acquire the lock before writing. private synchronized int assignOrdinal(ByteDataBuffer serializedRepresentation, int hash) { if(size > sizeBeforeGrow) growKeyArray(); /// check to make sure that after acquiring the lock, the element still does not exist. /// this operation is akin to double-checked locking which is 'fixed' with the JSR 133 memory model in JVM >= 1.5. int modBitmask = pointersAndOrdinals.length() - 1; int bucket = hash & modBitmask; long key = pointersAndOrdinals.get(bucket); while(key != EMPTY_BUCKET_VALUE) { if(compare(serializedRepresentation, key)) { return (int)(key >> 36); } bucket = (bucket + 1) & modBitmask; key = pointersAndOrdinals.get(bucket); } /// the ordinal for this object still does not exist in the list, even after the lock has been acquired. /// it is up to this thread to add it at the current bucket position. int ordinal = freeOrdinalTracker.getFreeOrdinal(); long pointer = byteData.length(); VarInt.writeVInt(byteData, (int)serializedRepresentation.length()); serializedRepresentation.copyTo(byteData); key = ((long)ordinal << 36) | pointer; size++; /// this set on the AtomicLongArray has volatile semantics (i.e. behaves like a monitor release). /// Any other thread reading this element in the AtomicLongArray will have visibility to all memory writes this thread has made up to this point. /// This means the entire byte sequence is guaranteed to be visible to any thread which reads the pointer to that data. pointersAndOrdinals.set(bucket, key); return ordinal; } /** * Assign a predefined ordinal to a serialized representation.

* * WARNING: THIS OPERATION IS NOT THREAD-SAFE.

* * This is intended for use in the client-side heap-safe double snapshot load. * */ public void put(ByteDataBuffer serializedRepresentation, int ordinal) { if(size > sizeBeforeGrow) growKeyArray(); int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation); int modBitmask = pointersAndOrdinals.length() - 1; int bucket = hash & modBitmask; long key = pointersAndOrdinals.get(bucket); while(key != EMPTY_BUCKET_VALUE) { if(compare(serializedRepresentation, key)) return; bucket = (bucket + 1) & modBitmask; key = pointersAndOrdinals.get(bucket); } long pointer = byteData.length(); VarInt.writeVInt(byteData, (int)serializedRepresentation.length()); serializedRepresentation.copyTo(byteData); key = ((long)ordinal << 36) | pointer; size++; pointersAndOrdinals.set(bucket, key); } /** * Returns the ordinal for a previously added byte sequence. If this byte sequence has not been added to the map, then -1 is returned.

* * This is intended for use in the client-side heap-safe double snapshot load. * * @param serializedRepresentation * @return The ordinal for this serialized representation, or -1. */ public int get(ByteDataBuffer serializedRepresentation) { int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation); int modBitmask = pointersAndOrdinals.length() - 1; int bucket = hash & modBitmask; long key = pointersAndOrdinals.get(bucket); /// linear probing to resolve collisions. while(key != EMPTY_BUCKET_VALUE) { if(compare(serializedRepresentation, key)) { return (int)(key >> 36); } bucket = (bucket + 1) & modBitmask; key = pointersAndOrdinals.get(bucket); } return -1; } /** * Remove all entries from this map, but reuse the existing arrays when populating the map next time. * * This is intended for use in the client-side heap-safe double snapshot load. */ public void clear() { for(int i=0;i> 36); if(ordinal > maxOrdinal) maxOrdinal = ordinal; } } pointersByOrdinal = new long[maxOrdinal + 1]; Arrays.fill(pointersByOrdinal, -1); for(int i=0;i> 36); pointersByOrdinal[ordinal] = key & 0xFFFFFFFFFL; int dataLength = VarInt.readVInt(byteData.getUnderlyingArray(), pointersByOrdinal[ordinal]); if(dataLength > maxLength) maxLength = dataLength; } } return maxLength; } /** * Reclaim space in the byte array used in the previous cycle, but not referenced in this cycle.

* * This is achieved by shifting all used byte sequences down in the byte array, then updating * the key array to reflect the new pointers and exclude the removed entries. This is also where ordinals * which are unused are returned to the pool.

* * @param usedOrdinals a bit set representing the ordinals which are currently referenced by any image. */ public void compact(ThreadSafeBitSet usedOrdinals) { long populatedReverseKeys[] = new long[size]; int counter = 0; for(int i=0;i>> 36; } } Arrays.sort(populatedReverseKeys); SegmentedByteArray arr = byteData.getUnderlyingArray(); long currentCopyPointer = 0; for(int i=0;i> 28; int length = VarInt.readVInt(arr, pointer); length += VarInt.sizeOfVInt(length); if(currentCopyPointer != pointer) arr.copy(arr, pointer, currentCopyPointer, length); populatedReverseKeys[i] = populatedReverseKeys[i] << 36 | currentCopyPointer; currentCopyPointer += length; } else { freeOrdinalTracker.returnOrdinalToPool(ordinal); populatedReverseKeys[i] = EMPTY_BUCKET_VALUE; } } byteData.setPosition(currentCopyPointer); for(int i=0;i fill) { SimultaneousExecutor executor = new SimultaneousExecutor(1); final int numThreads = executor.getMaximumPoolSize(); fill.ensureCapacity(maxOrdinal() + 1); for(int i=0;i> 36); int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), pointer); pointer += VarInt.sizeOfVInt(sizeOfData); rec.position(pointer); fill.add(ordinal, rec); } } } }); } executor.awaitUninterruptibly(); } /** * Copy all of the data from this ByteArrayOrdinalMap to the provided FastBlobTypeSerializationState. * * Image memberships for each ordinal are determined via the provided array of ThreadSafeBitSets. * * @param destState * @param imageMemberships * @param stateOrdinalMappers */ void copySerializedObjectData(final FastBlobTypeSerializationState destState, final ThreadSafeBitSet imageMemberships[], final OrdinalMapping ordinalMapping) { final StateOrdinalMapping stateOrdinalMapping = ordinalMapping.createStateOrdinalMapping(destState.getName(), maxOrdinal()); SimultaneousExecutor executor = new SimultaneousExecutor(8); final int numThreads = executor.getMaximumPoolSize(); for(int i=0;i> 36); for(int imageIndex=0;imageIndex> 36); if(ordinal > maxOrdinal) maxOrdinal = ordinal; } return maxOrdinal; } /** * Compare the byte sequence contained in the supplied ByteDataBuffer with the * sequence contained in the map pointed to by the specified key, byte by byte. */ private boolean compare(ByteDataBuffer serializedRepresentation, long key) { long position = key & 0xFFFFFFFFFL; int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), position); if(sizeOfData != serializedRepresentation.length()) return false; position += VarInt.sizeOfVInt(sizeOfData); for(int i=0;i> 36)); VarInt.writeVLong(os, keys[i] & 0xFFFFFFFFFL); } /// write the byte data to the stream VarInt.writeVLong(os, byteData.length()); for(long i=0;i> 36); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy