com.netflix.zeno.fastblob.state.ByteArrayOrdinalMap Maven / Gradle / Ivy
The newest version!
/*
*
* Copyright 2013 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.netflix.zeno.fastblob.state;
import com.netflix.zeno.fastblob.FastBlobImageUtils;
import com.netflix.zeno.fastblob.OrdinalMapping;
import com.netflix.zeno.fastblob.StateOrdinalMapping;
import com.netflix.zeno.fastblob.record.ByteDataBuffer;
import com.netflix.zeno.fastblob.record.FastBlobDeserializationRecord;
import com.netflix.zeno.fastblob.record.SegmentedByteArray;
import com.netflix.zeno.fastblob.record.SegmentedByteArrayHasher;
import com.netflix.zeno.fastblob.record.VarInt;
import com.netflix.zeno.util.SimultaneousExecutor;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;
/**
*
* This data structure maps byte sequences to ordinals. This is a hash table. The pointersAndOrdinals
AtomicLongArray contains
* keys, and the ByteDataBuffer
contains values. Each key has two components. The high 28 bits in the key represents the ordinal.
* The low 36 bits represents the pointer to the start position of the byte sequence in the ByteDataBuffer. Each byte sequence is preceded by
* a variable-length integer (see {@link VarInt}), indicating the length of the sequence.
*
* This implementation is extremely fast. Even though it would be technically correct and clearer,
* using a separate int[] array for the pointers, and an AtomicIntegerArray for the ordinals as keys
* was measured as two orders of magnitude slower.
*
* @author dkoszewnik
*
*/
public class ByteArrayOrdinalMap {
private final static long EMPTY_BUCKET_VALUE = -1L;
/// IMPORTANT: Thread safety: We need volatile access semantics to the individual elements in the
/// pointersAndOrdinals array. This only works in JVMs 1.5 or later (JSR 133).
/// Ordinal is the high 28 bits. Pointer to byte data is the low 36 bits.
private AtomicLongArray pointersAndOrdinals;
private final ByteDataBuffer byteData;
private final FreeOrdinalTracker freeOrdinalTracker;
private int size;
private int sizeBeforeGrow;
private long pointersByOrdinal[];
public ByteArrayOrdinalMap() {
this(262144);
}
public ByteArrayOrdinalMap(int bufferSize) {
this.freeOrdinalTracker = new FreeOrdinalTracker();
this.byteData = new ByteDataBuffer(bufferSize);
this.pointersAndOrdinals = emptyKeyArray(256);
this.sizeBeforeGrow = 179; /// 70% load factor
this.size = 0;
}
private ByteArrayOrdinalMap(long keys[], ByteDataBuffer byteData, FreeOrdinalTracker freeOrdinalTracker, int keyArraySize) {
this.freeOrdinalTracker = freeOrdinalTracker;
this.byteData = byteData;
AtomicLongArray pointersAndOrdinals = emptyKeyArray(keyArraySize);
populateNewHashArray(pointersAndOrdinals, keys);
this.pointersAndOrdinals = pointersAndOrdinals;
this.size = keys.length;
this.sizeBeforeGrow = keyArraySize * 7 / 10; /// 70% load factor
}
/**
* Add a sequence of bytes to this map. If the sequence of bytes has already been added to this map, return the originally assigned ordinal.
* If the sequence of bytes has not been added to this map, assign and return a new ordinal. This operation is thread-safe.
*/
public int getOrAssignOrdinal(ByteDataBuffer serializedRepresentation) {
int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);
int modBitmask = pointersAndOrdinals.length() - 1;
int bucket = hash & modBitmask;
long key = pointersAndOrdinals.get(bucket);
/// linear probing to resolve collisions.
while(key != EMPTY_BUCKET_VALUE) {
if(compare(serializedRepresentation, key)) {
return (int)(key >> 36);
}
bucket = (bucket + 1) & modBitmask;
key = pointersAndOrdinals.get(bucket);
}
return assignOrdinal(serializedRepresentation, hash);
}
/// acquire the lock before writing.
private synchronized int assignOrdinal(ByteDataBuffer serializedRepresentation, int hash) {
if(size > sizeBeforeGrow)
growKeyArray();
/// check to make sure that after acquiring the lock, the element still does not exist.
/// this operation is akin to double-checked locking which is 'fixed' with the JSR 133 memory model in JVM >= 1.5.
int modBitmask = pointersAndOrdinals.length() - 1;
int bucket = hash & modBitmask;
long key = pointersAndOrdinals.get(bucket);
while(key != EMPTY_BUCKET_VALUE) {
if(compare(serializedRepresentation, key)) {
return (int)(key >> 36);
}
bucket = (bucket + 1) & modBitmask;
key = pointersAndOrdinals.get(bucket);
}
/// the ordinal for this object still does not exist in the list, even after the lock has been acquired.
/// it is up to this thread to add it at the current bucket position.
int ordinal = freeOrdinalTracker.getFreeOrdinal();
long pointer = byteData.length();
VarInt.writeVInt(byteData, (int)serializedRepresentation.length());
serializedRepresentation.copyTo(byteData);
key = ((long)ordinal << 36) | pointer;
size++;
/// this set on the AtomicLongArray has volatile semantics (i.e. behaves like a monitor release).
/// Any other thread reading this element in the AtomicLongArray will have visibility to all memory writes this thread has made up to this point.
/// This means the entire byte sequence is guaranteed to be visible to any thread which reads the pointer to that data.
pointersAndOrdinals.set(bucket, key);
return ordinal;
}
/**
* Assign a predefined ordinal to a serialized representation.
*
* WARNING: THIS OPERATION IS NOT THREAD-SAFE.
*
* This is intended for use in the client-side heap-safe double snapshot load.
*
*/
public void put(ByteDataBuffer serializedRepresentation, int ordinal) {
if(size > sizeBeforeGrow)
growKeyArray();
int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);
int modBitmask = pointersAndOrdinals.length() - 1;
int bucket = hash & modBitmask;
long key = pointersAndOrdinals.get(bucket);
while(key != EMPTY_BUCKET_VALUE) {
if(compare(serializedRepresentation, key))
return;
bucket = (bucket + 1) & modBitmask;
key = pointersAndOrdinals.get(bucket);
}
long pointer = byteData.length();
VarInt.writeVInt(byteData, (int)serializedRepresentation.length());
serializedRepresentation.copyTo(byteData);
key = ((long)ordinal << 36) | pointer;
size++;
pointersAndOrdinals.set(bucket, key);
}
/**
* Returns the ordinal for a previously added byte sequence. If this byte sequence has not been added to the map, then -1 is returned.
*
* This is intended for use in the client-side heap-safe double snapshot load.
*
* @param serializedRepresentation
* @return The ordinal for this serialized representation, or -1.
*/
public int get(ByteDataBuffer serializedRepresentation) {
int hash = SegmentedByteArrayHasher.hashCode(serializedRepresentation);
int modBitmask = pointersAndOrdinals.length() - 1;
int bucket = hash & modBitmask;
long key = pointersAndOrdinals.get(bucket);
/// linear probing to resolve collisions.
while(key != EMPTY_BUCKET_VALUE) {
if(compare(serializedRepresentation, key)) {
return (int)(key >> 36);
}
bucket = (bucket + 1) & modBitmask;
key = pointersAndOrdinals.get(bucket);
}
return -1;
}
/**
* Remove all entries from this map, but reuse the existing arrays when populating the map next time.
*
* This is intended for use in the client-side heap-safe double snapshot load.
*/
public void clear() {
for(int i=0;i> 36);
if(ordinal > maxOrdinal)
maxOrdinal = ordinal;
}
}
pointersByOrdinal = new long[maxOrdinal + 1];
Arrays.fill(pointersByOrdinal, -1);
for(int i=0;i> 36);
pointersByOrdinal[ordinal] = key & 0xFFFFFFFFFL;
int dataLength = VarInt.readVInt(byteData.getUnderlyingArray(), pointersByOrdinal[ordinal]);
if(dataLength > maxLength)
maxLength = dataLength;
}
}
return maxLength;
}
/**
* Reclaim space in the byte array used in the previous cycle, but not referenced in this cycle.
*
* This is achieved by shifting all used byte sequences down in the byte array, then updating
* the key array to reflect the new pointers and exclude the removed entries. This is also where ordinals
* which are unused are returned to the pool.
*
* @param usedOrdinals a bit set representing the ordinals which are currently referenced by any image.
*/
public void compact(ThreadSafeBitSet usedOrdinals) {
long populatedReverseKeys[] = new long[size];
int counter = 0;
for(int i=0;i>> 36;
}
}
Arrays.sort(populatedReverseKeys);
SegmentedByteArray arr = byteData.getUnderlyingArray();
long currentCopyPointer = 0;
for(int i=0;i> 28;
int length = VarInt.readVInt(arr, pointer);
length += VarInt.sizeOfVInt(length);
if(currentCopyPointer != pointer)
arr.copy(arr, pointer, currentCopyPointer, length);
populatedReverseKeys[i] = populatedReverseKeys[i] << 36 | currentCopyPointer;
currentCopyPointer += length;
} else {
freeOrdinalTracker.returnOrdinalToPool(ordinal);
populatedReverseKeys[i] = EMPTY_BUCKET_VALUE;
}
}
byteData.setPosition(currentCopyPointer);
for(int i=0;i fill) {
SimultaneousExecutor executor = new SimultaneousExecutor(1);
final int numThreads = executor.getMaximumPoolSize();
fill.ensureCapacity(maxOrdinal() + 1);
for(int i=0;i> 36);
int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), pointer);
pointer += VarInt.sizeOfVInt(sizeOfData);
rec.position(pointer);
fill.add(ordinal, rec);
}
}
}
});
}
executor.awaitUninterruptibly();
}
/**
* Copy all of the data from this ByteArrayOrdinalMap to the provided FastBlobTypeSerializationState.
*
* Image memberships for each ordinal are determined via the provided array of ThreadSafeBitSets.
*
* @param destState
* @param imageMemberships
* @param stateOrdinalMappers
*/
void copySerializedObjectData(final FastBlobTypeSerializationState destState, final ThreadSafeBitSet imageMemberships[],
final OrdinalMapping ordinalMapping) {
final StateOrdinalMapping stateOrdinalMapping = ordinalMapping.createStateOrdinalMapping(destState.getName(), maxOrdinal());
SimultaneousExecutor executor = new SimultaneousExecutor(8);
final int numThreads = executor.getMaximumPoolSize();
for(int i=0;i> 36);
for(int imageIndex=0;imageIndex> 36);
if(ordinal > maxOrdinal)
maxOrdinal = ordinal;
}
return maxOrdinal;
}
/**
* Compare the byte sequence contained in the supplied ByteDataBuffer with the
* sequence contained in the map pointed to by the specified key, byte by byte.
*/
private boolean compare(ByteDataBuffer serializedRepresentation, long key) {
long position = key & 0xFFFFFFFFFL;
int sizeOfData = VarInt.readVInt(byteData.getUnderlyingArray(), position);
if(sizeOfData != serializedRepresentation.length())
return false;
position += VarInt.sizeOfVInt(sizeOfData);
for(int i=0;i> 36));
VarInt.writeVLong(os, keys[i] & 0xFFFFFFFFFL);
}
/// write the byte data to the stream
VarInt.writeVLong(os, byteData.length());
for(long i=0;i> 36);
}
}