com.yahoo.sketches.theta.PreambleUtil Maven / Gradle / Ivy
/*
* Copyright 2015-16, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.theta;
import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.Util.zeroPad;
import java.nio.ByteOrder;
import com.yahoo.memory.Memory;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;
//@formatter:off
/**
* This class defines the preamble data structure and provides basic utilities for some of the key
* fields.
* The intent of the design of this class was to isolate the detailed knowledge of the bit and
* byte layout of the serialized form of the sketches derived from the Sketch class into one place.
* This allows the possibility of the introduction of different serialization
* schemes with minimal impact on the rest of the library.
*
*
* MAP: Low significance bytes of this long data structure are on the right. However, the
* multi-byte integers (int and long) are stored in native byte order. The
* byte values are treated as unsigned.
*
* An empty CompactSketch only requires 8 bytes. An exact (non-estimating) compact
* sketch requires 16 bytes of preamble. UpdateSketches require 24 bytes of preamble. Union objects
* require 32 bytes of preamble.
*
*
* Long || Start Byte Adr:
* Adr:
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* 0 || Seed Hash | Flags | LgArr | lgNom | FamID | SerVer | RF, Preamble_Longs |
*
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
* 1 ||-----------------p-----------------|----------Retained Entries Count---------------|
*
* || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
* 2 ||------------------------------THETA_LONG-------------------------------------------|
*
* || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
* 3 ||---------------------------Start of Long Array-------------------------------------|
*
*
* @author Lee Rhodes
*/
final class PreambleUtil {
private PreambleUtil() {}
// ###### DO NOT MESS WITH THIS FROM HERE ...
// Preamble byte Addresses
static final int PREAMBLE_LONGS_BYTE = 0; //low 6 bits
//upper 2 bits. BIT offset in byte. Not used by compact or direct.
static final int LG_RESIZE_FACTOR_BITS = 6;
static final int SER_VER_BYTE = 1;
static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE
static final int LG_NOM_LONGS_BYTE = 3; //not used by compact
static final int LG_ARR_LONGS_BYTE = 4; //not used by compact
static final int FLAGS_BYTE = 5;
static final int SEED_HASH_SHORT = 6; //byte 6,7
static final int RETAINED_ENTRIES_INT = 8; //8 byte aligned
static final int P_FLOAT = 12; //4 byte aligned, not used by compact
static final int THETA_LONG = 16; //8-byte aligned
static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union
// flag bit masks
static final int BIG_ENDIAN_FLAG_MASK = 1; //SerVer 1, 2, 3
static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3
static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3
static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK
static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK
//Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs
// SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2
// V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4
static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1
static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1
//Other constants
static final int SER_VER = 3;
static final boolean NATIVE_ORDER_IS_BIG_ENDIAN =
(ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN);
static final double MAX_THETA_LONG_AS_DOUBLE = Long.MAX_VALUE;
/**
* Computes the number of bytes required for a non-full sized sketch in hash-table form.
* This can be used to compute current storage size for heap sketches, or current off-heap memory
* requred for off-heap (direct) sketches. This does not apply for compact sketches.
* @param lgArrLongs log2(current hash-table size)
* @param preambleLongs current preamble size
* @return the size in bytes
*/
static final int getMemBytes(int lgArrLongs, int preambleLongs) {
return (8 << lgArrLongs) + (preambleLongs << 3);
}
// STRINGS
/**
* Returns a human readable string summary of the preamble state of the given byte array.
* Used primarily in testing.
*
* @param byteArr the given byte array.
* @return the summary preamble string.
*/
public static String preambleToString(byte[] byteArr) {
Memory mem = new NativeMemory(byteArr);
return preambleToString(mem);
}
/**
* Returns a human readable string summary of the preamble state of the given Memory.
* Note: other than making sure that the given Memory size is large
* enough for just the preamble, this does not do much value checking of the contents of the
* preamble as this is primarily a tool for debugging the preamble visually.
*
* @param mem the given Memory.
* @return the summary preamble string.
*/
public static String preambleToString(Memory mem) {
int preLongs = getAndCheckPreLongs(mem); //make sure we can get the assumed preamble
long pre0 = mem.getLong(0);
ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(pre0));
int serVer = extractSerVer(pre0);
Family family = Family.idToFamily(extractFamilyID(pre0));
int lgNomLongs = extractLgNomLongs(pre0);
int lgArrLongs = extractLgArrLongs(pre0);
//Flags
int flags = extractFlags(pre0);
String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0;
String nativeOrder = ByteOrder.nativeOrder().toString();
boolean compact = (flags & COMPACT_FLAG_MASK) > 0;
boolean ordered = (flags & ORDERED_FLAG_MASK) > 0;
boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0;
boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
int seedHash = extractSeedHash(pre0);
//Assumed if preLongs == 1
int curCount = 0;
float p = (float)1.0;
//Assumed if preLongs == 1 or 2
long thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE);
//Assumed if preLongs == 1 or 2 or 3
long thetaULong = thetaLong;
if (preLongs == 2) {
long pre1 = mem.getLong(8);
curCount = extractCurCount(pre1);
p = extractP(pre1);
thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE);
thetaULong = thetaLong;
}
else if (preLongs == 3) {
long pre1 = mem.getLong(8);
curCount = extractCurCount(pre1);
p = extractP(pre1);
thetaLong = mem.getLong(THETA_LONG);
thetaULong = thetaLong;
}
else if (preLongs == 4) {
long pre1 = mem.getLong(8);
curCount = extractCurCount(pre1);
p = extractP(pre1);
thetaLong = mem.getLong(THETA_LONG);
thetaULong = mem.getLong(UNION_THETA_LONG);
} //else: the same as preLongs == 1
double thetaDbl = thetaLong / MAX_THETA_LONG_AS_DOUBLE;
String thetaHex = zeroPad(Long.toHexString(thetaLong), 16);
double thetaUDbl = thetaULong / MAX_THETA_LONG_AS_DOUBLE;
String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16);
StringBuilder sb = new StringBuilder();
sb.append(LS)
.append("### SKETCH PREAMBLE SUMMARY:").append(LS)
.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS)
.append("Byte 0: ResizeFactor : ").append(rf.toString()).append(LS)
.append("Byte 1: Serialization Version: ").append(serVer).append(LS)
.append("Byte 2: Family : ").append(family.toString()).append(LS)
.append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS)
.append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS)
.append("Byte 5: Flags Field : ").append(flagsStr).append(LS)
.append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS)
.append(" (Native Byte Order) : ").append(nativeOrder).append(LS)
.append(" READ_ONLY : ").append(readOnly).append(LS)
.append(" EMPTY : ").append(empty).append(LS)
.append(" COMPACT : ").append(compact).append(LS)
.append(" ORDERED : ").append(ordered).append(LS)
.append("Bytes 6-7 : Seed Hash : ").append(Integer.toHexString(seedHash)).append(LS);
if (preLongs == 1) {
sb.append(" --ABSENT, ASSUMED:").append(LS);
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS)
.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS)
.append(" Theta (long) : ").append(thetaLong).append(LS)
.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 2) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS)
.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append(" --ABSENT, ASSUMED:").append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS)
.append(" Theta (long) : ").append(thetaLong).append(LS)
.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 3) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS)
.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS)
.append(" Theta (long) : ").append(thetaLong).append(LS)
.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
}
if (preLongs == 4) {
sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS)
.append("Bytes 12-15: P : ").append(p).append(LS);
sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS)
.append(" Theta (long) : ").append(thetaLong).append(LS)
.append(" Theta (long,hex) : ").append(thetaHex).append(LS);
sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS)
.append(" ThetaU (long) : ").append(thetaULong).append(LS)
.append(" ThetaU (long,hex): ").append(thetaUHex).append(LS);
}
sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS);
sb.append( "Data Bytes : ").append(curCount * 8).append(LS);
sb.append( "TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS)
.append("### END SKETCH PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
//@formatter:on
//Extract from long and insert into long methods
static int extractPreLongs(final long long0) {
long mask = 0X3FL;
return (int) (long0 & mask);
}
static int extractResizeFactor(final long long0) {
int shift = LG_RESIZE_FACTOR_BITS; // units in bits
long mask = 0X3L;
return (int) ((long0 >>> shift) & mask);
}
static int extractSerVer(final long long0) {
int shift = SER_VER_BYTE << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractFamilyID(final long long0) {
int shift = FAMILY_BYTE << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractLgNomLongs(final long long0) {
int shift = LG_NOM_LONGS_BYTE << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractLgArrLongs(final long long0) {
int shift = LG_ARR_LONGS_BYTE << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractFlags(final long long0) {
int shift = FLAGS_BYTE << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractFlagsV1(final long long0) {
int shift = FLAGS_BYTE_V1 << 3;
long mask = 0XFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractSeedHash(final long long0) {
int shift = SEED_HASH_SHORT << 3;
long mask = 0XFFFFL;
return (int) ((long0 >>> shift) & mask);
}
static int extractCurCount(final long long1) {
long mask = 0XFFFFFFFFL;
return (int) (long1 & mask);
}
static float extractP(final long long1) {
int shift = 32;
return Float.intBitsToFloat((int)(long1 >>> shift));
}
static long insertPreLongs(final int preLongs, final long long0) {
long mask = 0X3FL;
return (preLongs & mask) | (~mask & long0);
}
static long insertResizeFactor(final int rf, final long long0) {
int shift = LG_RESIZE_FACTOR_BITS; // units in bits
long mask = 3L;
return ((rf & mask) << shift) | (~(mask << shift) & long0);
}
static long insertSerVer(final int serVer, final long long0) {
int shift = SER_VER_BYTE << 3;
long mask = 0XFFL;
return ((serVer & mask) << shift) | (~(mask << shift) & long0);
}
static long insertFamilyID(final int familyID, final long long0) {
int shift = FAMILY_BYTE << 3;
long mask = 0XFFL;
return ((familyID & mask) << shift) | (~(mask << shift) & long0);
}
static long insertLgNomLongs(final int lgNomLongs, final long long0) {
int shift = LG_NOM_LONGS_BYTE << 3;
long mask = 0XFFL;
return ((lgNomLongs & mask) << shift) | (~(mask << shift) & long0);
}
static long insertLgArrLongs(final int lgArrLongs, final long long0) {
int shift = LG_ARR_LONGS_BYTE << 3;
long mask = 0XFFL;
return ((lgArrLongs & mask) << shift) | (~(mask << shift) & long0);
}
static long insertFlags(final int flags, final long long0) {
int shift = FLAGS_BYTE << 3;
long mask = 0XFFL;
return ((flags & mask) << shift) | (~(mask << shift) & long0);
}
static long insertSeedHash(final int seedHash, final long long0) {
int shift = SEED_HASH_SHORT << 3;
long mask = 0XFFFFL;
return ((seedHash & mask) << shift) | (~(mask << shift) & long0);
}
static long insertCurCount(final int curCount, final long long1) { //Retained Entries
long mask = 0XFFFFFFFFL;
return (curCount & mask) | (~mask & long1);
}
static long insertP(final float p, final long long1) {
int shift = 32;
long mask = 0XFFFFFFFFL;
return ((Float.floatToRawIntBits(p) & mask) << shift) | (~(mask << shift) & long1);
}
/**
* Checks Memory for capacity to hold the preamble and returns the extracted preLongs.
* @param mem the given Memory
* @return the extracted prelongs value.
*/
static int getAndCheckPreLongs(Memory mem) {
long cap = mem.getCapacity();
if (cap < 8) { throwNotBigEnough(cap, 8); }
long pre0 = mem.getLong(0);
int preLongs = extractPreLongs(pre0);
int required = Math.max(preLongs << 3, 8);
if (cap < required) { throwNotBigEnough(cap, required); }
return preLongs;
}
private static void throwNotBigEnough(long cap, int required) {
throw new SketchesArgumentException(
"Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap
+ ", Required: " + required);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy