All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.theta.PreambleUtil Maven / Gradle / Ivy

/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.theta;

import static com.yahoo.memory.UnsafeUtil.unsafe;
import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.Util.zeroPad;

import java.nio.ByteOrder;

import com.yahoo.memory.Memory;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

//@formatter:off

/**
 * This class defines the preamble data structure and provides basic utilities for some of the key
 * fields.
 * 

The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization * schemes with minimal impact on the rest of the library.

* *

* MAP: Low significance bytes of this long data structure are on the right. However, the * multi-byte integers (int and long) are stored in native byte order. The * byte values are treated as unsigned.

* *

An empty CompactSketch only requires 8 bytes. An exact (non-estimating) compact * sketch requires 16 bytes of preamble. UpdateSketches require 24 bytes of preamble. Union objects * require 32 bytes of preamble.

* *
 * Long || Start Byte Adr:
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
 *  0   ||    Seed Hash    | Flags  |  LgArr |  lgNom | FamID  | SerVer | RF, Preamble_Longs |
 *
 *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
 *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
 *
 *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
 *  2   ||------------------------------THETA_LONG-------------------------------------------|
 *
 *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
 *  3   ||---------------------------Start of Long Array-------------------------------------|
 *  
* * @author Lee Rhodes */ final class PreambleUtil { private PreambleUtil() {} // ###### DO NOT MESS WITH THIS FROM HERE ... // Preamble byte Addresses static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte. static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte. Not used by compact, direct static final int SER_VER_BYTE = 1; static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE static final int LG_NOM_LONGS_BYTE = 3; //not used by compact static final int LG_ARR_LONGS_BYTE = 4; //not used by compact static final int FLAGS_BYTE = 5; static final int SEED_HASH_SHORT = 6; //byte 6,7 static final int RETAINED_ENTRIES_INT = 8; //8 byte aligned static final int P_FLOAT = 12; //4 byte aligned, not used by compact static final int THETA_LONG = 16; //8-byte aligned static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union // flag bit masks static final int BIG_ENDIAN_FLAG_MASK = 1; //SerVer 1, 2, 3 static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3 static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3 static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK //Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs // SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2 // V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4 static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1 static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1 //Other constants static final int SER_VER = 3; static final boolean NATIVE_ORDER_IS_BIG_ENDIAN = (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); static final double MAX_THETA_LONG_AS_DOUBLE = Long.MAX_VALUE; /** * Computes the number of bytes required for a non-full sized sketch in hash-table form. * This can be used to compute current storage size for heap sketches, or current off-heap memory * requred for off-heap (direct) sketches. This does not apply for compact sketches. * @param lgArrLongs log2(current hash-table size) * @param preambleLongs current preamble size * @return the size in bytes */ static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { return (8 << lgArrLongs) + (preambleLongs << 3); } // STRINGS /** * Returns a human readable string summary of the preamble state of the given byte array. * Used primarily in testing. * * @param byteArr the given byte array. * @return the summary preamble string. */ public static String preambleToString(final byte[] byteArr) { final Memory mem = new NativeMemory(byteArr); return preambleToString(mem); } /** * Returns a human readable string summary of the preamble state of the given Memory. * Note: other than making sure that the given Memory size is large * enough for just the preamble, this does not do much value checking of the contents of the * preamble as this is primarily a tool for debugging the preamble visually. * * @param mem the given Memory. * @return the summary preamble string. */ public static String preambleToString(final Memory mem) { final Object memObj = mem.array(); //may be null final long memAdd = mem.getCumulativeOffset(0L); final int preLongs = getAndCheckPreLongs(memObj, memAdd, mem); final ResizeFactor rf = ResizeFactor.getRF(extractLgResizeFactor(memObj, memAdd)); final int serVer = extractSerVer(memObj, memAdd); final Family family = Family.idToFamily(extractFamilyID(memObj, memAdd)); final int lgNomLongs = extractLgNomLongs(memObj, memAdd); final int lgArrLongs = extractLgArrLongs(memObj, memAdd); //Flags final int flags = extractFlags(memObj, memAdd); final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags); final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0; final String nativeOrder = ByteOrder.nativeOrder().toString(); final boolean compact = (flags & COMPACT_FLAG_MASK) > 0; final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; final int seedHash = extractSeedHash(memObj, memAdd); //Assumed if preLongs == 1 int curCount = 0; float p = (float)1.0; //Assumed if preLongs == 1 or 2 long thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE); //Assumed if preLongs == 1 or 2 or 3 long thetaULong = thetaLong; if (preLongs == 2) { curCount = extractCurCount(memObj, memAdd); p = extractP(memObj, memAdd); thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE); thetaULong = thetaLong; } else if (preLongs == 3) { curCount = extractCurCount(memObj, memAdd); p = extractP(memObj, memAdd); thetaLong = extractThetaLong(memObj, memAdd); thetaULong = thetaLong; } else if (preLongs == 4) { curCount = extractCurCount(memObj, memAdd); p = extractP(memObj, memAdd); thetaLong = extractThetaLong(memObj, memAdd); thetaULong = extractUnionThetaLong(memObj, memAdd); } //else: the same as preLongs == 1 final double thetaDbl = thetaLong / MAX_THETA_LONG_AS_DOUBLE; final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16); final double thetaUDbl = thetaULong / MAX_THETA_LONG_AS_DOUBLE; final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16); final StringBuilder sb = new StringBuilder(); sb.append(LS) .append("### SKETCH PREAMBLE SUMMARY:").append(LS) .append("Byte 0: Preamble Longs : ").append(preLongs).append(LS) .append("Byte 0: ResizeFactor : ").append(rf.toString()).append(LS) .append("Byte 1: Serialization Version: ").append(serVer).append(LS) .append("Byte 2: Family : ").append(family.toString()).append(LS) .append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS) .append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS) .append("Byte 5: Flags Field : ").append(flagsStr).append(LS) .append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS) .append(" (Native Byte Order) : ").append(nativeOrder).append(LS) .append(" READ_ONLY : ").append(readOnly).append(LS) .append(" EMPTY : ").append(empty).append(LS) .append(" COMPACT : ").append(compact).append(LS) .append(" ORDERED : ").append(ordered).append(LS) .append("Bytes 6-7 : Seed Hash : ").append(Integer.toHexString(seedHash)).append(LS); if (preLongs == 1) { sb.append(" --ABSENT, ASSUMED:").append(LS); sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS) .append("Bytes 12-15: P : ").append(p).append(LS); sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS) .append(" Theta (long) : ").append(thetaLong).append(LS) .append(" Theta (long,hex) : ").append(thetaHex).append(LS); } if (preLongs == 2) { sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS) .append("Bytes 12-15: P : ").append(p).append(LS); sb.append(" --ABSENT, ASSUMED:").append(LS); sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS) .append(" Theta (long) : ").append(thetaLong).append(LS) .append(" Theta (long,hex) : ").append(thetaHex).append(LS); } if (preLongs == 3) { sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS) .append("Bytes 12-15: P : ").append(p).append(LS); sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS) .append(" Theta (long) : ").append(thetaLong).append(LS) .append(" Theta (long,hex) : ").append(thetaHex).append(LS); } if (preLongs == 4) { sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS) .append("Bytes 12-15: P : ").append(p).append(LS); sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS) .append(" Theta (long) : ").append(thetaLong).append(LS) .append(" Theta (long,hex) : ").append(thetaHex).append(LS); sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS) .append(" ThetaU (long) : ").append(thetaULong).append(LS) .append(" ThetaU (long,hex): ").append(thetaUHex).append(LS); } sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS); sb.append( "Data Bytes : ").append(curCount * 8).append(LS); sb.append( "TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS) .append("### END SKETCH PREAMBLE SUMMARY").append(LS); return sb.toString(); } //@formatter:on static int extractPreLongs(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + PREAMBLE_LONGS_BYTE) & 0X3F; } static int extractLgResizeFactor(final Object memObj, final long memAdd) { return (unsafe.getByte(memObj, memAdd + PREAMBLE_LONGS_BYTE) >> LG_RESIZE_FACTOR_BIT) & 0X3; } static int extractLgResizeRatioV1(final Object memObj, final long memAdd) { return (unsafe.getByte(memObj, memAdd + LG_RESIZE_RATIO_BYTE_V1)) & 0X3; } static int extractSerVer(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + SER_VER_BYTE) & 0XFF; } static int extractFamilyID(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + FAMILY_BYTE) & 0XFF; } static int extractLgNomLongs(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + LG_NOM_LONGS_BYTE) & 0XFF; } static int extractLgArrLongs(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + LG_ARR_LONGS_BYTE) & 0XFF; } static int extractFlags(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + FLAGS_BYTE) & 0XFF; } static int extractFlagsV1(final Object memObj, final long memAdd) { return unsafe.getByte(memObj, memAdd + FLAGS_BYTE_V1) & 0XFF; } static int extractSeedHash(final Object memObj, final long memAdd) { return unsafe.getShort(memObj, memAdd + SEED_HASH_SHORT) & 0XFFFF; } static int extractCurCount(final Object memObj, final long memAdd) { return unsafe.getInt(memObj, memAdd + RETAINED_ENTRIES_INT); } static float extractP(final Object memObj, final long memAdd) { return unsafe.getFloat(memObj, memAdd + P_FLOAT); } static long extractThetaLong(final Object memObj, final long memAdd) { return unsafe.getLong(memObj, memAdd + THETA_LONG); } static long extractUnionThetaLong(final Object memObj, final long memAdd) { return unsafe.getLong(memObj, memAdd + UNION_THETA_LONG); } static void insertPreLongs(final Object memObj, final long memAdd, final int preLongs) { unsafe.putByte(memObj, memAdd + PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); } static void insertLgResizeFactor(final Object memObj, final long memAdd, final int rf) { final int curByte = unsafe.getByte(memObj, memAdd + PREAMBLE_LONGS_BYTE); final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits final int mask = 3; final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte)); unsafe.putByte(memObj, memAdd + PREAMBLE_LONGS_BYTE, newByte); } static void insertSerVer(final Object memObj, final long memAdd, final int serVer) { unsafe.putByte(memObj, memAdd + SER_VER_BYTE, (byte) serVer); } static void insertFamilyID(final Object memObj, final long memAdd, final int famId) { unsafe.putByte(memObj, memAdd + FAMILY_BYTE, (byte) famId); } static void insertLgNomLongs(final Object memObj, final long memAdd, final int lgNomLongs) { unsafe.putByte(memObj, memAdd + LG_NOM_LONGS_BYTE, (byte) lgNomLongs); } static void insertLgArrLongs(final Object memObj, final long memAdd, final int lgArrLongs) { unsafe.putByte(memObj, memAdd + LG_ARR_LONGS_BYTE, (byte) lgArrLongs); } static void insertFlags(final Object memObj, final long memAdd, final int flags) { unsafe.putByte(memObj, memAdd + FLAGS_BYTE, (byte) flags); } static void insertSeedHash(final Object memObj, final long memAdd, final int seedHash) { unsafe.putShort(memObj, memAdd + SEED_HASH_SHORT, (short) seedHash); } static void insertCurCount(final Object memObj, final long memAdd, final int curCount) { unsafe.putInt(memObj, memAdd + RETAINED_ENTRIES_INT, curCount); } static void insertP(final Object memObj, final long memAdd, final float p) { unsafe.putFloat(memObj, memAdd + P_FLOAT, p); } static void insertThetaLong(final Object memObj, final long memAdd, final long thetaLong) { unsafe.putLong(memObj, memAdd + THETA_LONG, thetaLong); } static void insertUnionThetaLong(final Object memObj, final long memAdd, final long unionThetaLong) { unsafe.putLong(memObj, memAdd + UNION_THETA_LONG, unionThetaLong); } static void setEmpty(final Object memObj, final long memAdd) { int flags = unsafe.getByte(memObj, memAdd + FLAGS_BYTE); flags |= EMPTY_FLAG_MASK; unsafe.putByte(memObj, memAdd + FLAGS_BYTE, (byte) flags); } static void clearEmpty(final Object memObj, final long memAdd) { int flags = unsafe.getByte(memObj, memAdd + FLAGS_BYTE); flags &= ~EMPTY_FLAG_MASK; unsafe.putByte(memObj, memAdd + FLAGS_BYTE, (byte) flags); } static boolean isEmpty(final Object memObj, final long memAdd) { final int flags = unsafe.getByte(memObj, memAdd + FLAGS_BYTE); return (flags & EMPTY_FLAG_MASK) > 0; } /** * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. * @param mem the given Memory * @return the extracted prelongs value. */ static int getAndCheckPreLongs(final Object memObj, final long memAdd, final Memory mem) { final long cap = mem.getCapacity(); if (cap < 8) { throwNotBigEnough(cap, 8); } final int preLongs = extractPreLongs(memObj, memAdd); final int required = Math.max(preLongs << 3, 8); if (cap < required) { throwNotBigEnough(cap, required); } return preLongs; } private static void throwNotBigEnough(final long cap, final int required) { throw new SketchesArgumentException( "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + ", Required: " + required); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy