org.yamcs.parameterarchive.ObjectSegment Maven / Gradle / Ivy
package org.yamcs.parameterarchive;
import java.lang.reflect.Array;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import me.lemire.integercompression.FastPFOR128;
import me.lemire.integercompression.IntWrapper;
import org.yamcs.utils.DecodingException;
import org.yamcs.utils.IntArray;
import org.yamcs.utils.VarIntUtil;
/**
* Segment for all non primitive types.
*
* Each element is encoded to a binary that is not compressed. The compression of the segment (if any) is realized by
* not repeating elements.
*
* Finds best encoding among:
*
* - raw - list of values stored verbatim, each preceded by its size varint32 encoded
* - enum - the list of unique values are stored at the beginning of the segment.
*
* Each value has an implicit id (the order in the list). The rest of the segment is the list of ids and can be encoded
* in one of the following formats
*
* - VB: varint32 of each id
* - FPROF: coded with the FPROF codec + varint32 of remaining
* - RLE: run length encoded
*
*
*
*/
public abstract class ObjectSegment extends BaseSegment {
final static byte SUBFORMAT_ID_RAW = 0;
final static byte SUBFORMAT_ID_ENUM_RLE = 1;
final static byte SUBFORMAT_ID_ENUM_VB = 2;
final static byte SUBFORMAT_ID_ENUM_FPROF = 3;
// this is set only during deserialisation.
boolean runLengthEncoded = false;
// one of the lists below is used depending whether runLengthEncoded is true or false
List objectList;
List rleObjectList;
IntArray rleCounts;
int size = 0;
final ObjectSerializer objSerializer;
// temporary fields used during the construction before serialisation - could be probably refactored into some
// builder which returns another object in the consolidate method
Map valuemap;
IntArray rleValues;
IntArray enumValues;
List unique;
int rawSize;
int enumRawSize;
int enumRleSize;
boolean consolidated = false;
boolean writable = false;
/**
* b
*
* @param objSerializer
* @param buildForSerialisation
* - is set to true at the construction and false at deserialisation
*/
ObjectSegment(ObjectSerializer objSerializer, boolean buildForSerialisation) {
super(objSerializer.getFormatId());
this.objSerializer = objSerializer;
if (buildForSerialisation) {
writable = true;
objectList = new ArrayList();
unique = new ArrayList();
valuemap = new HashMap<>();
enumValues = new IntArray();
} // else in the parseFrom will construct the necessary fields
}
/**
* add element to the end of the segment
*
* @param e
*/
public void add(E e) {
if (!writable) {
throw new UnsupportedOperationException("Segment is not writable");
}
byte[] b = objSerializer.serialize(e);
HashableByteArray se = new HashableByteArray(b);
int valueId;
if (valuemap.containsKey(se)) {
valueId = valuemap.get(se);
se = unique.get(valueId); // release the old se object to garbage
e = objectList.get(enumValues.indexOf(valueId));// release the old e object to garbage
} else {
valueId = unique.size();
valuemap.put(se, valueId);
unique.add(se);
}
enumValues.add(valueId);
objectList.add(e);
size++;
}
public void add(int pos, E e) {
if (!writable) {
throw new UnsupportedOperationException("Segment is not writable");
}
if (pos == size) {
add(e);
return;
}
byte[] b = objSerializer.serialize(e);
HashableByteArray se = new HashableByteArray(b);
int valueId;
if (valuemap.containsKey(se)) {
valueId = valuemap.get(se);
se = unique.get(valueId); // release the old se object to garbage
e = objectList.get(enumValues.indexOf(valueId));// release the old e object to garbage
} else {
valueId = unique.size();
valuemap.put(se, valueId);
unique.add(se);
}
enumValues.add(pos, valueId);
objectList.add(pos, e);
size++;
}
@Override
public void writeTo(ByteBuffer bb) {
if (!consolidated) {
throw new IllegalStateException("The segment has to be consolidated before serialization can take place");
}
boolean encoded = false;
int position = bb.position();
try { // first try to encode them as Rle or EnuFprof
if (enumRleSize <= enumRawSize && enumRleSize <= rawSize) {
encoded = writeEnumRle(bb);
} else if (enumRawSize < enumRleSize && enumRawSize <= rawSize) {
encoded = writeEnumFprof(bb);
}
} catch (IndexOutOfBoundsException | BufferOverflowException e) {
// ignore -> encoded = false;
}
// if the resulted size is bigger than raw encoding, then encode it raw
if (!encoded) {
bb.position(position);
writeRaw(bb);
}
}
public void writeRaw(ByteBuffer bb) {
bb.put(SUBFORMAT_ID_RAW);
// write the size
VarIntUtil.writeVarInt32(bb, objectList.size());
// then write the values
for (int i = 0; i < size; i++) {
byte[] b = unique.get(enumValues.get(i)).b;
VarIntUtil.writeVarInt32(bb, b.length);
bb.put(b);
}
}
boolean writeEnumFprof(ByteBuffer bb) {
int position = bb.position();
bb.put(SUBFORMAT_ID_ENUM_FPROF);
// first write the enum values
VarIntUtil.writeVarInt32(bb, unique.size());
for (int i = 0; i < unique.size(); i++) {
byte[] b = unique.get(i).b;
VarIntUtil.writeVarInt32(bb, b.length);
bb.put(b);
}
// then writes the enum ids
VarIntUtil.writeVarInt32(bb, size);
FastPFOR128 fastpfor = FastPFORFactory.get();
IntWrapper inputoffset = new IntWrapper(0);
IntWrapper outputoffset = new IntWrapper(0);
int[] out = new int[size];
int[] in = enumValues.array();
fastpfor.compress(in, inputoffset, size, out, outputoffset);
if (outputoffset.get() == 0) {
// fastpfor didn't compress anything, probably there were too few datapoints
bb.put(position, SUBFORMAT_ID_ENUM_VB);
} else {
// write the fastpfor output
for (int i = 0; i < outputoffset.get(); i++) {
bb.putInt(out[i]);
}
}
// write the remaining bytes varint compressed
for (int i = inputoffset.get(); i < size; i++) {
VarIntUtil.writeVarInt32(bb, in[i]);
}
return true;
}
boolean writeEnumRle(ByteBuffer bb) {
bb.put(SUBFORMAT_ID_ENUM_RLE);
// first write the enum values
VarIntUtil.writeVarInt32(bb, unique.size());
for (int i = 0; i < unique.size(); i++) {
byte[] b = unique.get(i).b;
VarIntUtil.writeVarInt32(bb, b.length);
bb.put(b);
}
// then write the rleCounts
VarIntUtil.writeVarInt32(bb, rleCounts.size());
for (int i = 0; i < rleCounts.size(); i++) {
VarIntUtil.writeVarInt32(bb, rleCounts.get(i));
}
// and write the rleValues
for (int i = 0; i < rleCounts.size(); i++) {
VarIntUtil.writeVarInt32(bb, rleValues.get(i));
}
return true;
}
protected void parse(ByteBuffer bb) throws DecodingException {
byte formatId = bb.get();
try {
switch (formatId) {
case SUBFORMAT_ID_RAW:
parseRaw(bb);
break;
case SUBFORMAT_ID_ENUM_VB: // intentional fall trough
case SUBFORMAT_ID_ENUM_FPROF:// intentional fall trough
case SUBFORMAT_ID_ENUM_RLE:
parseEnum(formatId, bb);
break;
default:
throw new DecodingException("Unknown subformatid: " + formatId);
}
} catch (DecodingException e) {
throw e;
} catch (Exception e) {
throw new DecodingException("Cannot decode object segment subformatId " + formatId, e);
}
}
private void parseRaw(ByteBuffer bb) throws DecodingException {
size = VarIntUtil.readVarInt32(bb);
objectList = new ArrayList(size);
for (int i = 0; i < size; i++) {
int l = VarIntUtil.readVarInt32(bb);
byte[] b = new byte[l];
bb.get(b);
E e = objSerializer.deserialize(b);
objectList.add(e);
}
}
void parseEnum(int formatId, ByteBuffer bb) throws DecodingException {
int n = VarIntUtil.readVarInt32(bb);
List uniqueValues = new ArrayList();
for (int i = 0; i < n; i++) {
int l = VarIntUtil.readVarInt32(bb);
byte[] b = new byte[l];
bb.get(b);
E e = objSerializer.deserialize(b);
uniqueValues.add(e);
}
if (formatId == SUBFORMAT_ID_ENUM_RLE) {
parseEnumRle(uniqueValues, bb);
} else {
parseEnumNonRle(formatId, uniqueValues, bb);
}
}
private void parseEnumNonRle(int formatId, List uniqueValues, ByteBuffer bb) throws DecodingException {
size = VarIntUtil.readVarInt32(bb);
int position = bb.position();
int[] enumValues = new int[size];
IntWrapper outputoffset = new IntWrapper(0);
if (formatId == SUBFORMAT_ID_ENUM_FPROF) {
int[] x = new int[(bb.limit() - position) / 4];
for (int i = 0; i < x.length; i++) {
x[i] = bb.getInt();
}
IntWrapper inputoffset = new IntWrapper(0);
FastPFOR128 fastpfor = FastPFORFactory.get();
fastpfor.uncompress(x, inputoffset, x.length, enumValues, outputoffset);
bb.position(position + inputoffset.get() * 4);
}
for (int i = outputoffset.get(); i < size; i++) {
enumValues[i] = VarIntUtil.readVarInt32(bb);
}
objectList = new ArrayList(size);
for (int i = 0; i < size; i++) {
objectList.add(uniqueValues.get(enumValues[i]));
}
}
private void parseEnumRle(List uniqueValues, ByteBuffer bb) throws DecodingException {
int countNum = VarIntUtil.readVarInt32(bb);
rleCounts = new IntArray(countNum);
size = 0;
for (int i = 0; i < countNum; i++) {
int c = VarIntUtil.readVarInt32(bb);
rleCounts.add(c);
size += c;
}
rleObjectList = new ArrayList<>(countNum);
for (int i = 0; i < countNum; i++) {
int c = VarIntUtil.readVarInt32(bb);
rleObjectList.add(uniqueValues.get(c));
}
runLengthEncoded = true;
}
@Override
public int getMaxSerializedSize() {
if (!consolidated) {
throw new IllegalStateException("The segment has to be consolidated before serialization can take place");
}
return rawSize;
}
public E[] getRangeArray(int posStart, int posStop, boolean ascending) {
if (posStart >= posStop)
throw new IllegalArgumentException("posStart has to be smaller than posStop");
if (runLengthEncoded) {
if (ascending) {
return getRleRangeAscending(posStart, posStop);
} else {
return getRleRangeDescending(posStart, posStop);
}
} else {
return getNonRleRange(posStart, posStop, ascending);
}
}
E[] getNonRleRange(int posStart, int posStop, boolean ascending) {
@SuppressWarnings("unchecked")
E[] r = (E[]) Array.newInstance(objectList.get(0).getClass(), posStop - posStart);
if (ascending) {
for (int i = posStart; i < posStop; i++) {
r[i - posStart] = objectList.get(i);
}
} else {
for (int i = posStop; i > posStart; i--) {
r[posStop - i] = objectList.get(i);
}
}
return r;
}
E[] getRleRangeAscending(int posStart, int posStop) {
int n = posStop - posStart;
@SuppressWarnings("unchecked")
E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n);
int k = posStart;
int i = 0;
while (k >= rleCounts.get(i)) {
k -= rleCounts.get(i++);
}
int pos = 0;
while (pos < n) {
r[pos++] = rleObjectList.get(i);
k++;
if (k >= rleCounts.get(i)) {
i++;
k = 0;
}
}
return r;
}
public E[] getRleRangeDescending(int posStart, int posStop) {
if (posStop >= size)
throw new IndexOutOfBoundsException("Index: " + posStop + " size: " + size);
int n = posStop - posStart;
@SuppressWarnings("unchecked")
E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n);
int k = size - posStop;
int i = rleCounts.size() - 1;
while (k > rleCounts.get(i)) {
k -= rleCounts.get(i--);
}
k = rleCounts.get(i) - k;
int pos = 0;
while (true) {
r[pos++] = rleObjectList.get(i);
if (pos == n)
break;
k--;
if (k < 0) {
i--;
k = rleCounts.get(i) - 1;
}
}
return r;
}
public E get(int index) {
if (runLengthEncoded) {
int k = 0;
int i = 0;
while (k <= index) {
k += rleCounts.get(i);
i++;
}
return rleObjectList.get(i - 1);
} else {
return objectList.get(index);
}
}
/**
* the number of elements in this segment (not taking into account any compression due to run-length encoding)
*
* @return
*/
@Override
public int size() {
return size;
}
public void consolidate() {
rleCounts = new IntArray();
rleValues = new IntArray();
rawSize = enumRawSize = enumRleSize = 1; // subFormatId byte
rawSize += VarIntUtil.getEncodedSize(size);
enumRawSize += VarIntUtil.getEncodedSize(size) + VarIntUtil.getEncodedSize(unique.size());
enumRleSize += VarIntUtil.getEncodedSize(unique.size());
for (int i = 0; i < size; i++) {
int valueId = enumValues.get(i);
byte[] b = unique.get(valueId).b;
rawSize += VarIntUtil.getEncodedSize(b.length) + b.length;
enumRawSize += VarIntUtil.getEncodedSize(valueId);
boolean rleAdded = false;
int rleId = rleValues.size() - 1;
if (rleId >= 0) {
int lastValueId = rleValues.get(rleId);
if (valueId == lastValueId) {
rleCounts.set(rleId, rleCounts.get(rleId) + 1);
rleAdded = true;
}
}
if (!rleAdded) {
rleCounts.add(1);
rleValues.add(valueId);
}
}
for (int i = 0; i < unique.size(); i++) {
HashableByteArray se = unique.get(i);
byte[] b = se.b;
int s = VarIntUtil.getEncodedSize(b.length) + b.length;
enumRawSize += s;
enumRleSize += s;
}
enumRleSize += VarIntUtil.getEncodedSize(rleCounts.size());
for (int i = 0; i < rleCounts.size(); i++) {
enumRleSize += VarIntUtil.getEncodedSize(rleCounts.get(i)) + VarIntUtil.getEncodedSize(rleValues.get(i));
}
consolidated = true;
}
@Override
public void makeWritable() {
if (writable) {
return;
}
unique = new ArrayList();
valuemap = new HashMap<>();
enumValues = new IntArray();
if (runLengthEncoded) {
objectList = new ArrayList(size);
for (int i = 0; i < rleObjectList.size(); i++) {
var o = rleObjectList.get(i);
byte[] b = objSerializer.serialize(o);
HashableByteArray se = new HashableByteArray(b);
int idx = valuemap.computeIfAbsent(se, k -> {
int newIdx = unique.size();
unique.add(k);
return newIdx;
});
for (int k = 0; k < rleCounts.get(i); k++) {
objectList.add(o);
enumValues.add(idx);
}
}
runLengthEncoded = false;
} else {
for (var o : objectList) {
byte[] b = objSerializer.serialize(o);
HashableByteArray se = new HashableByteArray(b);
int idx = valuemap.computeIfAbsent(se, k -> {
int newIdx = unique.size();
unique.add(k);
return newIdx;
});
enumValues.add(idx);
}
}
writable = true;
}
@SuppressWarnings("rawtypes")
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ObjectSegment other = (ObjectSegment) obj;
if (unique == null) {
if (other.unique != null)
return false;
} else if (!unique.equals(other.unique))
return false;
if (enumValues == null) {
if (other.enumValues != null)
return false;
} else if (!enumValues.equals(other.enumValues))
return false;
return true;
}
}
/**
* wrapper around byte[] to allow it to be used in HashMaps
*/
class HashableByteArray {
private int hash = 0;
final byte[] b;
public HashableByteArray(byte[] b) {
this.b = b;
}
@Override
public int hashCode() {
if (hash == 0) {
hash = Arrays.hashCode(b);
}
return hash;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
HashableByteArray other = (HashableByteArray) obj;
if (hashCode() != other.hashCode())
return false;
if (!Arrays.equals(b, other.b))
return false;
return true;
}
}
interface ObjectSerializer {
byte getFormatId();
E deserialize(byte[] b) throws DecodingException;
byte[] serialize(E e);
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy