All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.yamcs.parameterarchive.ObjectSegment Maven / Gradle / Ivy

There is a newer version: 5.10.9
Show newest version
package org.yamcs.parameterarchive;

import java.lang.reflect.Array;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import me.lemire.integercompression.FastPFOR128;
import me.lemire.integercompression.IntWrapper;

import org.yamcs.utils.DecodingException;
import org.yamcs.utils.IntArray;
import org.yamcs.utils.VarIntUtil;

/**
 * Segment for all non primitive types.
 * 

* Each element is encoded to a binary that is not compressed. The compression of the segment (if any) is realized by * not repeating elements. *

* Finds best encoding among: *

    *
  • raw - list of values stored verbatim, each preceded by its size varint32 encoded
  • *
  • enum - the list of unique values are stored at the beginning of the segment. *

    * Each value has an implicit id (the order in the list). The rest of the segment is the list of ids and can be encoded * in one of the following formats *

      *
    • VB: varint32 of each id
    • *
    • FPROF: coded with the FPROF codec + varint32 of remaining
    • *
    • RLE: run length encoded
    • *
    *
* */ public abstract class ObjectSegment extends BaseSegment { final static byte SUBFORMAT_ID_RAW = 0; final static byte SUBFORMAT_ID_ENUM_RLE = 1; final static byte SUBFORMAT_ID_ENUM_VB = 2; final static byte SUBFORMAT_ID_ENUM_FPROF = 3; // this is set only during deserialisation. boolean runLengthEncoded = false; // one of the lists below is used depending whether runLengthEncoded is true or false List objectList; List rleObjectList; IntArray rleCounts; int size = 0; final ObjectSerializer objSerializer; // temporary fields used during the construction before serialisation - could be probably refactored into some // builder which returns another object in the consolidate method Map valuemap; IntArray rleValues; IntArray enumValues; List unique; int rawSize; int enumRawSize; int enumRleSize; boolean consolidated = false; boolean writable = false; /** * b * * @param objSerializer * @param buildForSerialisation * - is set to true at the construction and false at deserialisation */ ObjectSegment(ObjectSerializer objSerializer, boolean buildForSerialisation) { super(objSerializer.getFormatId()); this.objSerializer = objSerializer; if (buildForSerialisation) { writable = true; objectList = new ArrayList(); unique = new ArrayList(); valuemap = new HashMap<>(); enumValues = new IntArray(); } // else in the parseFrom will construct the necessary fields } /** * add element to the end of the segment * * @param e */ public void add(E e) { if (!writable) { throw new UnsupportedOperationException("Segment is not writable"); } byte[] b = objSerializer.serialize(e); HashableByteArray se = new HashableByteArray(b); int valueId; if (valuemap.containsKey(se)) { valueId = valuemap.get(se); se = unique.get(valueId); // release the old se object to garbage e = objectList.get(enumValues.indexOf(valueId));// release the old e object to garbage } else { valueId = unique.size(); valuemap.put(se, valueId); unique.add(se); } enumValues.add(valueId); objectList.add(e); size++; } public void add(int pos, E e) { if (!writable) { throw new UnsupportedOperationException("Segment is not writable"); } if (pos == size) { add(e); return; } byte[] b = objSerializer.serialize(e); HashableByteArray se = new HashableByteArray(b); int valueId; if (valuemap.containsKey(se)) { valueId = valuemap.get(se); se = unique.get(valueId); // release the old se object to garbage e = objectList.get(enumValues.indexOf(valueId));// release the old e object to garbage } else { valueId = unique.size(); valuemap.put(se, valueId); unique.add(se); } enumValues.add(pos, valueId); objectList.add(pos, e); size++; } @Override public void writeTo(ByteBuffer bb) { if (!consolidated) { throw new IllegalStateException("The segment has to be consolidated before serialization can take place"); } boolean encoded = false; int position = bb.position(); try { // first try to encode them as Rle or EnuFprof if (enumRleSize <= enumRawSize && enumRleSize <= rawSize) { encoded = writeEnumRle(bb); } else if (enumRawSize < enumRleSize && enumRawSize <= rawSize) { encoded = writeEnumFprof(bb); } } catch (IndexOutOfBoundsException | BufferOverflowException e) { // ignore -> encoded = false; } // if the resulted size is bigger than raw encoding, then encode it raw if (!encoded) { bb.position(position); writeRaw(bb); } } public void writeRaw(ByteBuffer bb) { bb.put(SUBFORMAT_ID_RAW); // write the size VarIntUtil.writeVarInt32(bb, objectList.size()); // then write the values for (int i = 0; i < size; i++) { byte[] b = unique.get(enumValues.get(i)).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } } boolean writeEnumFprof(ByteBuffer bb) { int position = bb.position(); bb.put(SUBFORMAT_ID_ENUM_FPROF); // first write the enum values VarIntUtil.writeVarInt32(bb, unique.size()); for (int i = 0; i < unique.size(); i++) { byte[] b = unique.get(i).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } // then writes the enum ids VarIntUtil.writeVarInt32(bb, size); FastPFOR128 fastpfor = FastPFORFactory.get(); IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); int[] out = new int[size]; int[] in = enumValues.array(); fastpfor.compress(in, inputoffset, size, out, outputoffset); if (outputoffset.get() == 0) { // fastpfor didn't compress anything, probably there were too few datapoints bb.put(position, SUBFORMAT_ID_ENUM_VB); } else { // write the fastpfor output for (int i = 0; i < outputoffset.get(); i++) { bb.putInt(out[i]); } } // write the remaining bytes varint compressed for (int i = inputoffset.get(); i < size; i++) { VarIntUtil.writeVarInt32(bb, in[i]); } return true; } boolean writeEnumRle(ByteBuffer bb) { bb.put(SUBFORMAT_ID_ENUM_RLE); // first write the enum values VarIntUtil.writeVarInt32(bb, unique.size()); for (int i = 0; i < unique.size(); i++) { byte[] b = unique.get(i).b; VarIntUtil.writeVarInt32(bb, b.length); bb.put(b); } // then write the rleCounts VarIntUtil.writeVarInt32(bb, rleCounts.size()); for (int i = 0; i < rleCounts.size(); i++) { VarIntUtil.writeVarInt32(bb, rleCounts.get(i)); } // and write the rleValues for (int i = 0; i < rleCounts.size(); i++) { VarIntUtil.writeVarInt32(bb, rleValues.get(i)); } return true; } protected void parse(ByteBuffer bb) throws DecodingException { byte formatId = bb.get(); try { switch (formatId) { case SUBFORMAT_ID_RAW: parseRaw(bb); break; case SUBFORMAT_ID_ENUM_VB: // intentional fall trough case SUBFORMAT_ID_ENUM_FPROF:// intentional fall trough case SUBFORMAT_ID_ENUM_RLE: parseEnum(formatId, bb); break; default: throw new DecodingException("Unknown subformatid: " + formatId); } } catch (DecodingException e) { throw e; } catch (Exception e) { throw new DecodingException("Cannot decode object segment subformatId " + formatId, e); } } private void parseRaw(ByteBuffer bb) throws DecodingException { size = VarIntUtil.readVarInt32(bb); objectList = new ArrayList(size); for (int i = 0; i < size; i++) { int l = VarIntUtil.readVarInt32(bb); byte[] b = new byte[l]; bb.get(b); E e = objSerializer.deserialize(b); objectList.add(e); } } void parseEnum(int formatId, ByteBuffer bb) throws DecodingException { int n = VarIntUtil.readVarInt32(bb); List uniqueValues = new ArrayList(); for (int i = 0; i < n; i++) { int l = VarIntUtil.readVarInt32(bb); byte[] b = new byte[l]; bb.get(b); E e = objSerializer.deserialize(b); uniqueValues.add(e); } if (formatId == SUBFORMAT_ID_ENUM_RLE) { parseEnumRle(uniqueValues, bb); } else { parseEnumNonRle(formatId, uniqueValues, bb); } } private void parseEnumNonRle(int formatId, List uniqueValues, ByteBuffer bb) throws DecodingException { size = VarIntUtil.readVarInt32(bb); int position = bb.position(); int[] enumValues = new int[size]; IntWrapper outputoffset = new IntWrapper(0); if (formatId == SUBFORMAT_ID_ENUM_FPROF) { int[] x = new int[(bb.limit() - position) / 4]; for (int i = 0; i < x.length; i++) { x[i] = bb.getInt(); } IntWrapper inputoffset = new IntWrapper(0); FastPFOR128 fastpfor = FastPFORFactory.get(); fastpfor.uncompress(x, inputoffset, x.length, enumValues, outputoffset); bb.position(position + inputoffset.get() * 4); } for (int i = outputoffset.get(); i < size; i++) { enumValues[i] = VarIntUtil.readVarInt32(bb); } objectList = new ArrayList(size); for (int i = 0; i < size; i++) { objectList.add(uniqueValues.get(enumValues[i])); } } private void parseEnumRle(List uniqueValues, ByteBuffer bb) throws DecodingException { int countNum = VarIntUtil.readVarInt32(bb); rleCounts = new IntArray(countNum); size = 0; for (int i = 0; i < countNum; i++) { int c = VarIntUtil.readVarInt32(bb); rleCounts.add(c); size += c; } rleObjectList = new ArrayList<>(countNum); for (int i = 0; i < countNum; i++) { int c = VarIntUtil.readVarInt32(bb); rleObjectList.add(uniqueValues.get(c)); } runLengthEncoded = true; } @Override public int getMaxSerializedSize() { if (!consolidated) { throw new IllegalStateException("The segment has to be consolidated before serialization can take place"); } return rawSize; } public E[] getRangeArray(int posStart, int posStop, boolean ascending) { if (posStart >= posStop) throw new IllegalArgumentException("posStart has to be smaller than posStop"); if (runLengthEncoded) { if (ascending) { return getRleRangeAscending(posStart, posStop); } else { return getRleRangeDescending(posStart, posStop); } } else { return getNonRleRange(posStart, posStop, ascending); } } E[] getNonRleRange(int posStart, int posStop, boolean ascending) { @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(objectList.get(0).getClass(), posStop - posStart); if (ascending) { for (int i = posStart; i < posStop; i++) { r[i - posStart] = objectList.get(i); } } else { for (int i = posStop; i > posStart; i--) { r[posStop - i] = objectList.get(i); } } return r; } E[] getRleRangeAscending(int posStart, int posStop) { int n = posStop - posStart; @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n); int k = posStart; int i = 0; while (k >= rleCounts.get(i)) { k -= rleCounts.get(i++); } int pos = 0; while (pos < n) { r[pos++] = rleObjectList.get(i); k++; if (k >= rleCounts.get(i)) { i++; k = 0; } } return r; } public E[] getRleRangeDescending(int posStart, int posStop) { if (posStop >= size) throw new IndexOutOfBoundsException("Index: " + posStop + " size: " + size); int n = posStop - posStart; @SuppressWarnings("unchecked") E[] r = (E[]) Array.newInstance(rleObjectList.get(0).getClass(), n); int k = size - posStop; int i = rleCounts.size() - 1; while (k > rleCounts.get(i)) { k -= rleCounts.get(i--); } k = rleCounts.get(i) - k; int pos = 0; while (true) { r[pos++] = rleObjectList.get(i); if (pos == n) break; k--; if (k < 0) { i--; k = rleCounts.get(i) - 1; } } return r; } public E get(int index) { if (runLengthEncoded) { int k = 0; int i = 0; while (k <= index) { k += rleCounts.get(i); i++; } return rleObjectList.get(i - 1); } else { return objectList.get(index); } } /** * the number of elements in this segment (not taking into account any compression due to run-length encoding) * * @return */ @Override public int size() { return size; } public void consolidate() { rleCounts = new IntArray(); rleValues = new IntArray(); rawSize = enumRawSize = enumRleSize = 1; // subFormatId byte rawSize += VarIntUtil.getEncodedSize(size); enumRawSize += VarIntUtil.getEncodedSize(size) + VarIntUtil.getEncodedSize(unique.size()); enumRleSize += VarIntUtil.getEncodedSize(unique.size()); for (int i = 0; i < size; i++) { int valueId = enumValues.get(i); byte[] b = unique.get(valueId).b; rawSize += VarIntUtil.getEncodedSize(b.length) + b.length; enumRawSize += VarIntUtil.getEncodedSize(valueId); boolean rleAdded = false; int rleId = rleValues.size() - 1; if (rleId >= 0) { int lastValueId = rleValues.get(rleId); if (valueId == lastValueId) { rleCounts.set(rleId, rleCounts.get(rleId) + 1); rleAdded = true; } } if (!rleAdded) { rleCounts.add(1); rleValues.add(valueId); } } for (int i = 0; i < unique.size(); i++) { HashableByteArray se = unique.get(i); byte[] b = se.b; int s = VarIntUtil.getEncodedSize(b.length) + b.length; enumRawSize += s; enumRleSize += s; } enumRleSize += VarIntUtil.getEncodedSize(rleCounts.size()); for (int i = 0; i < rleCounts.size(); i++) { enumRleSize += VarIntUtil.getEncodedSize(rleCounts.get(i)) + VarIntUtil.getEncodedSize(rleValues.get(i)); } consolidated = true; } @Override public void makeWritable() { if (writable) { return; } unique = new ArrayList(); valuemap = new HashMap<>(); enumValues = new IntArray(); if (runLengthEncoded) { objectList = new ArrayList(size); for (int i = 0; i < rleObjectList.size(); i++) { var o = rleObjectList.get(i); byte[] b = objSerializer.serialize(o); HashableByteArray se = new HashableByteArray(b); int idx = valuemap.computeIfAbsent(se, k -> { int newIdx = unique.size(); unique.add(k); return newIdx; }); for (int k = 0; k < rleCounts.get(i); k++) { objectList.add(o); enumValues.add(idx); } } runLengthEncoded = false; } else { for (var o : objectList) { byte[] b = objSerializer.serialize(o); HashableByteArray se = new HashableByteArray(b); int idx = valuemap.computeIfAbsent(se, k -> { int newIdx = unique.size(); unique.add(k); return newIdx; }); enumValues.add(idx); } } writable = true; } @SuppressWarnings("rawtypes") @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ObjectSegment other = (ObjectSegment) obj; if (unique == null) { if (other.unique != null) return false; } else if (!unique.equals(other.unique)) return false; if (enumValues == null) { if (other.enumValues != null) return false; } else if (!enumValues.equals(other.enumValues)) return false; return true; } } /** * wrapper around byte[] to allow it to be used in HashMaps */ class HashableByteArray { private int hash = 0; final byte[] b; public HashableByteArray(byte[] b) { this.b = b; } @Override public int hashCode() { if (hash == 0) { hash = Arrays.hashCode(b); } return hash; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; HashableByteArray other = (HashableByteArray) obj; if (hashCode() != other.hashCode()) return false; if (!Arrays.equals(b, other.b)) return false; return true; } } interface ObjectSerializer { byte getFormatId(); E deserialize(byte[] b) throws DecodingException; byte[] serialize(E e); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy