All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.sampling.PreambleUtil Maven / Gradle / Ivy

There is a newer version: 0.13.4
Show newest version
/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.sampling;

import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.Util.TAB;
import static com.yahoo.sketches.Util.zeroPad;

import java.nio.ByteOrder;

import com.yahoo.memory.Memory;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

//@formatter:off

/**
 * This class defines the preamble data structure and provides basic utilities for some of the key
 * fields.
 *
 * 

* MAP: Low significance bytes of this long data structure are on the right. However, the * multi-byte integers (int and long) are stored in native byte order. The * byte values are treated as unsigned.

* *

Sketch: The count of items seen is limited to 48 bits (~256 trillion) even * though there are adjacent unused preamble bits. The acceptance probability for an item is a * double in the range [0,1), limiting us to 53 bits of randomness due to details of the IEEE * floating point format. To ensure meaningful probabilities as the items seen count approaches * capacity, we intentionally use slightly fewer bits.

* *

An empty sampling sketch only requires 8 bytes. A non-empty sampling sketch requires 16 * bytes of preamble.

* *
 * Long || Start Byte Adr:
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
 *  0   ||--------Reservoir Size (K)---------|  Flags | FamID  | SerVer |   Preamble_Longs   |
 *
 *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
 *  1   ||-----(empty)-----|-------------------Items Seen Count------------------------------|
 *  
* *

Union: The union has fewer internal parameters to track and uses a slightly * different preamble structure. The maximum reservoir size intentionally occupies the same byte * range as the reservoir size in the sketch preamble, allowing the same methods to be used for * reading and writing the values.

* *

An empty union only requires 8 bytes. A non-empty union requires 8 bytes of preamble.

* *
 * Long || Start Byte Adr:
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
 *  0   ||---------Max Res. Size (K)---------|  Flags | FamID  | SerVer |   Preamble_Longs   |
 *  
* * @author Jon Malkin * @author Lee Rhodes */ final class PreambleUtil { private PreambleUtil() {} // ###### DO NOT MESS WITH THIS FROM HERE ... // Preamble byte Addresses static final int PREAMBLE_LONGS_BYTE = 0; // Only low 6 bits used static final int LG_RESIZE_FACTOR_BITS = 6; // upper 2 bits. Not used by compact or direct. static final int SER_VER_BYTE = 1; static final int FAMILY_BYTE = 2; static final int FLAGS_BYTE = 3; static final int RESERVOIR_SIZE_SHORT = 4; // used in ser_ver 1 static final int RESERVOIR_SIZE_INT = 4; static final int SERDE_ID_SHORT = 6; // used in ser_ver 1 static final int ITEMS_SEEN_BYTE = 8; //static final int MAX_K_SHORT = 4; // used in Union only, ser_ver 1 //static final int MAX_K_INT = 4; // used in Union only // flag bit masks //static final int BIG_ENDIAN_FLAG_MASK = 1; //static final int READ_ONLY_FLAG_MASK = 2; static final int EMPTY_FLAG_MASK = 4; //static final int COMPACT_FLAG_MASK = 8; //static final int ORDERED_FLAG_MASK = 16; //Other constants static final int SER_VER = 2; static final boolean NATIVE_ORDER_IS_BIG_ENDIAN = (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); // STRINGS /** * Returns a human readable string summary of the preamble state of the given byte array. * Used primarily in testing. * * @param byteArr the given byte array. * @return the summary preamble string. */ public static String preambleToString(byte[] byteArr) { Memory mem = new NativeMemory(byteArr); return preambleToString(mem); } /** * Returns a human readable string summary of the preamble state of the given Memory. * Note: other than making sure that the given Memory size is large * enough for just the preamble, this does not do much value checking of the contents of the * preamble as this is primarily a tool for debugging the preamble visually. * * @param mem the given Memory. * @return the summary preamble string. */ public static String preambleToString(Memory mem) { // TODO: different path for sketch vs union int preLongs = getAndCheckPreLongs(mem); //make sure we can get the assumed preamble long pre0 = mem.getLong(0); ResizeFactor rf = ResizeFactor.getRF(extractResizeFactor(pre0)); int serVer = extractSerVer(pre0); Family family = Family.idToFamily(extractFamilyID(pre0)); //Flags int flags = extractFlags(pre0); String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags); //boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0; String nativeOrder = ByteOrder.nativeOrder().toString(); //boolean compact = (flags & COMPACT_FLAG_MASK) > 0; //boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; //boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; boolean isEmpty = (flags & EMPTY_FLAG_MASK) > 0; int resSize = extractReservoirSize(pre0); long itemsSeen = 0; if (!isEmpty) { long pre1 = mem.getLong(8); itemsSeen = extractItemsSeenCount(pre1); } StringBuilder sb = new StringBuilder(); sb.append(LS) .append("### SKETCH PREAMBLE SUMMARY:").append(LS) .append("Byte 0: Preamble Longs : ").append(preLongs).append(LS) .append("Byte 0: ResizeFactor : ").append(rf.toString()).append(LS) .append("Byte 1: Serialization Version: ").append(serVer).append(LS) .append("Byte 2: Family : ").append(family.toString()).append(LS) .append("Byte 3: Flags Field : ").append(flagsStr).append(LS) //.append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS) .append(" (Native Byte Order) : ").append(nativeOrder).append(LS) //.append(" READ_ONLY : ").append(readOnly).append(LS) .append(" EMPTY : ").append(isEmpty).append(LS) .append("Bytes 4-7: Reservoir Size : ").append(resSize).append(TAB + "(").append(LS); if (!isEmpty) { sb.append("Bytes 8-13: Items Seen : ").append(itemsSeen).append(LS); } sb.append("Preamble Bytes : ").append(preLongs << 3).append(LS); //sb.append( "Data Bytes : ").append(curCount * 8).append(LS); //sb.append( "TOTAL Sketch Bytes : ").append(mem.getCapacity()).append(LS) sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS); return sb.toString(); } //Extract from long and insert into long methods static int extractPreLongs(final long long0) { long mask = 0X3FL; return (int) (long0 & mask); } static int extractResizeFactor(final long long0) { int shift = LG_RESIZE_FACTOR_BITS; // units in bits long mask = 0X3L; return (int) ((long0 >>> shift) & mask); } static int extractSerVer(final long long0) { int shift = SER_VER_BYTE << 3; long mask = 0XFFL; return (int) ((long0 >>> shift) & mask); } static int extractFamilyID(final long long0) { int shift = FAMILY_BYTE << 3; long mask = 0XFFL; return (int) ((long0 >>> shift) & mask); } static int extractFlags(final long long0) { int shift = FLAGS_BYTE << 3; long mask = 0XFFL; return (int) ((long0 >>> shift) & mask); } static short extractEncodedReservoirSize(final long long0) { int shift = RESERVOIR_SIZE_SHORT << 3; long mask = 0XFFFFL; return (short) ((long0 >>> shift) & mask); } static int extractReservoirSize(final long long0) { int shift = RESERVOIR_SIZE_INT << 3; long mask = 0XFFFFFFFFL; return (int) ((long0 >>> shift) & mask); } static int extractMaxK(final long long0) { return extractReservoirSize(long0); } @Deprecated static short extractSerDeId(final long long0) { int shift = SERDE_ID_SHORT << 3; long mask = 0XFFFFL; return (short) ((long0 >>> shift) & mask); } static long extractItemsSeenCount(final long long1) { long mask = 0XFFFFFFFFFFFFL; return (long1 & mask); } static long insertPreLongs(final int preLongs, final long long0) { long mask = 0X3FL; return (preLongs & mask) | (~mask & long0); } static long insertResizeFactor(final int rf, final long long0) { int shift = LG_RESIZE_FACTOR_BITS; // units in bits long mask = 3L; return ((rf & mask) << shift) | (~(mask << shift) & long0); } static long insertSerVer(final int serVer, final long long0) { int shift = SER_VER_BYTE << 3; long mask = 0XFFL; return ((serVer & mask) << shift) | (~(mask << shift) & long0); } static long insertFamilyID(final int familyID, final long long0) { int shift = FAMILY_BYTE << 3; long mask = 0XFFL; return ((familyID & mask) << shift) | (~(mask << shift) & long0); } static long insertFlags(final int flags, final long long0) { int shift = FLAGS_BYTE << 3; long mask = 0XFFL; return ((flags & mask) << shift) | (~(mask << shift) & long0); } static long insertReservoirSize(final int reservoirSize, final long long0) { int shift = RESERVOIR_SIZE_INT << 3; long mask = 0XFFFFFFFFL; return ((reservoirSize & mask) << shift) | (~(mask << shift) & long0); } static long insertMaxK(final int reservoirSize, final long long0) { return insertReservoirSize(reservoirSize, long0); } @Deprecated static long insertSerDeId(final int serDeId, final long long0) { int shift = SERDE_ID_SHORT << 3; long mask = 0XFFFFL; return ((serDeId & mask) << shift) | (~(mask << shift) & long0); } static long insertItemsSeenCount(final long totalSeen, final long long1) { long mask = 0XFFFFFFFFFFFFL; return (totalSeen & mask) | (~mask & long1); } /** * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. * @param mem the given Memory * @return the extracted prelongs value. */ static int getAndCheckPreLongs(Memory mem) { long cap = mem.getCapacity(); if (cap < 8) { throwNotBigEnough(cap, 8); } long pre0 = mem.getLong(0); int preLongs = extractPreLongs(pre0); int required = Math.max(preLongs << 3, 8); if (cap < required) { throwNotBigEnough(cap, required); } return preLongs; } private static void throwNotBigEnough(long cap, int required) { throw new SketchesArgumentException( "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + ", Required: " + required); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy