All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.hll.PreambleUtil Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 6.1.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.hll;

import static org.apache.datasketches.common.Util.ceilingIntPowerOf2;
import static org.apache.datasketches.common.Util.exactLog2OfLong;
import static org.apache.datasketches.common.Util.zeroPad;
import static org.apache.datasketches.hll.HllUtil.LG_AUX_ARR_INTS;
import static org.apache.datasketches.hll.HllUtil.LG_INIT_SET_SIZE;
import static org.apache.datasketches.hll.HllUtil.RESIZE_DENOM;
import static org.apache.datasketches.hll.HllUtil.RESIZE_NUMER;

import java.nio.ByteOrder;

import org.apache.datasketches.common.Family;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

//@formatter:off
/**
 * 
 * CouponList Layout
 * Long || Start Byte Adr, Big Endian Illustration
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |    0   |
 *  0   ||  Mode  | ListCnt| Flags  |  LgArr |   lgK  | FamID  | SerVer |  PI=2  |
 *
 *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |    8   |
 *  1   ||                                   |------Coupon Int List Start--------|
 * 
* *
 * CouponHashSet Layout
 * Long || Start Byte Adr, Big Endian Illustration
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |    0   |
 *  0   ||  Mode  |        | Flags  |  LgArr |   lgK  | FamID  | SerVer |  PI=3  |
 *
 *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |    8   |
 *  1   ||-----Coupon Int Hash Set Start-----|---------Hash Set Count------------|
 * 
* *
 * HllArray Layout
 * Long || Start Byte Adr, Big Endian Illustration
 * Adr:
 *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |    0   |
 *  0   ||  Mode  | CurMin | Flags  |  LgArr |   lgK  | FamID  | SerVer | PI=10  |
 *
 *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |    8   |
 *  1   ||-------------------------------HIP Accum-------------------------------|
 *
 *      ||   23   |   22   |   21   |   20   |   19   |   18   |   17   |   16   |
 *  2   ||----------------------------------KxQ0---------------------------------|
 *
 *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |   24   |
 *  3   ||----------------------------------KxQ1---------------------------------|
 *
 *      ||   39   |   38   |   37   |   36   |   35   |   34   |   33   |   32   |
 *  4   ||-------------Aux Count-------------|----------Num At Cur Min-----------|
 *
 *      ||   47   |   46   |   45   |   44   |   43   |   42   |   41   |   40   |
 *  5   ||...................................|------Start of HLL_X Byte Array----|
 *
 *  N   ||----End of Byte Array for HLL_4----|...................................|
 *  N+1 ||...................................|-----Start of Aux Array for HLL_4--|
 * 
* If in compact form exceptions array will be compacted. * * @author Lee Rhodes */ final class PreambleUtil { private PreambleUtil() {} private static final String LS = System.getProperty("line.separator"); // ###### DO NOT MESS WITH THIS ... // Preamble byte start addresses // First 8 Bytes: static int PREAMBLE_INTS_BYTE = 0; static int SER_VER_BYTE = 1; static int FAMILY_BYTE = 2; static int LG_K_BYTE = 3; static int LG_ARR_BYTE = 4; //used for LIST, SET & HLL_4 static int FLAGS_BYTE = 5; static int LIST_COUNT_BYTE = 6; static int HLL_CUR_MIN_BYTE = 6; static int MODE_BYTE = 7; //lo2bits = curMode, next 2 bits = tgtHllType //mode encoding of combined CurMode and TgtHllType: // Dec Lo4Bits TgtHllType, CurMode // 0 0000 HLL_4, LIST // 1 0001 HLL_4, SET // 2 0010 HLL_4, HLL // 4 0100 HLL_6, LIST // 5 0101 HLL_6, SET // 6 0110 HLL_6, HLL // 8 1000 HLL_8, LIST // 9 1001 HLL_8, SET // 10 1010 HLL_8, HLL //Coupon List static int LIST_INT_ARR_START = 8; //Coupon Hash Set static int HASH_SET_COUNT_INT = 8; static int HASH_SET_INT_ARR_START = 12; //HLL static int HIP_ACCUM_DOUBLE = 8; static int KXQ0_DOUBLE = 16; static int KXQ1_DOUBLE = 24; static int CUR_MIN_COUNT_INT = 32; static int AUX_COUNT_INT = 36; static int HLL_BYTE_ARR_START = 40; //Flag bit masks static final int BIG_ENDIAN_FLAG_MASK = 1; //Set but not read. Reserved. static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. static final int EMPTY_FLAG_MASK = 4; static final int COMPACT_FLAG_MASK = 8; static final int OUT_OF_ORDER_FLAG_MASK = 16; static final int REBUILD_CURMIN_NUM_KXQ_MASK = 32; //used only by Union //Mode byte masks static final int CUR_MODE_MASK = 3; static final int TGT_HLL_TYPE_MASK = 12; //Other constants static final int SER_VER = 1; static final int FAMILY_ID = 7; static final int LIST_PREINTS = 2; static final int HASH_SET_PREINTS = 3; static final int HLL_PREINTS = 10; static final boolean NATIVE_ORDER_IS_BIG_ENDIAN = (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); static String toString(final byte[] byteArr) { final Memory mem = Memory.wrap(byteArr); return toString(mem); } static String toString(final Memory mem) { //First 8 bytes final int preInts = mem.getByte(PREAMBLE_INTS_BYTE); final int serVer = mem.getByte(SER_VER_BYTE); final Family family = Family.idToFamily(mem.getByte(FAMILY_BYTE)); final int lgK = mem.getByte(LG_K_BYTE); final int lgArr = mem.getByte(LG_ARR_BYTE); final int flags = mem.getByte(FLAGS_BYTE); //Flags final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags); final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0; final String nativeOrder = ByteOrder.nativeOrder().toString(); final boolean compact = (flags & COMPACT_FLAG_MASK) > 0; final boolean oooFlag = (flags & OUT_OF_ORDER_FLAG_MASK) > 0; final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; final boolean rebuildKxQ = (flags & REBUILD_CURMIN_NUM_KXQ_MASK) > 0; final int hllCurMin = mem.getByte(HLL_CUR_MIN_BYTE); final int listCount = hllCurMin; final int modeByte = mem.getByte(MODE_BYTE); final CurMode curMode = CurMode.fromOrdinal(modeByte & 3); final TgtHllType tgtHllType = TgtHllType.fromOrdinal((modeByte >>> 2) & 3); double hipAccum = 0; double kxq0 = 0; double kxq1 = 0; int hashSetCount = 0; int curMinCount = 0; int exceptionCount = 0; if (curMode == CurMode.SET) { hashSetCount = mem.getInt(HASH_SET_COUNT_INT); } else if (curMode == CurMode.HLL) { hipAccum = mem.getDouble(HIP_ACCUM_DOUBLE); kxq0 = mem.getDouble(KXQ0_DOUBLE); kxq1 = mem.getDouble(KXQ1_DOUBLE); curMinCount = mem.getInt(CUR_MIN_COUNT_INT); exceptionCount = mem.getInt(AUX_COUNT_INT); } final StringBuilder sb = new StringBuilder(); sb.append(LS); sb.append("### HLL SKETCH PREAMBLE:").append(LS); sb.append("Byte 0: Preamble Ints : ").append(preInts).append(LS); sb.append("Byte 1: SerVer : ").append(serVer).append(LS); sb.append("Byte 2: Family : ").append(family).append(LS); sb.append("Byte 3: lgK : ").append(lgK).append(LS); //expand byte 4: LgArr if (curMode == CurMode.LIST) { sb.append("Byte 4: LgArr: List Arr : ").append(lgArr).append(LS); } if (curMode == CurMode.SET) { sb.append("Byte 4: LgArr: Hash Set Arr : ").append(lgArr).append(LS); } if (curMode == CurMode.HLL) { sb.append("Byte 4: LgArr or Aux LgArr : ").append(lgArr).append(LS); } //expand byte 5: Flags sb.append("Byte 5: Flags: : ").append(flagsStr).append(LS); sb.append(" BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS); sb.append(" (Native Byte Order) : ").append(nativeOrder).append(LS); sb.append(" READ_ONLY : ").append(readOnly).append(LS); sb.append(" EMPTY : ").append(empty).append(LS); sb.append(" COMPACT : ").append(compact).append(LS); sb.append(" OUT_OF_ORDER : ").append(oooFlag).append(LS); sb.append(" REBUILD_KXQ : ").append(rebuildKxQ).append(LS); //expand byte 6: ListCount, CurMin if (curMode == CurMode.LIST) { sb.append("Byte 6: List Count/CurMin : ").append(listCount).append(LS); } if (curMode == CurMode.SET) { sb.append("Byte 6: (not used) : ").append(LS); } if (curMode == CurMode.HLL) { sb.append("Byte 6: Cur Min : ").append(hllCurMin).append(LS); } final String modes = curMode.toString() + ", " + tgtHllType.toString(); sb.append("Byte 7: Mode : ").append(modes).append(LS); if (curMode == CurMode.SET) { sb.append("Hash Set Count : ").append(hashSetCount).append(LS); } if (curMode == CurMode.HLL) { sb.append("HIP Accum : ").append(hipAccum).append(LS); sb.append("KxQ0 : ").append(kxq0).append(LS); sb.append("KxQ1 : ").append(kxq1).append(LS); sb.append("Num At Cur Min : ").append(curMinCount).append(LS); sb.append("Aux Count : ").append(exceptionCount).append(LS); } sb.append("### END HLL SKETCH PREAMBLE").append(LS); return sb.toString(); } //@formatter:on static int extractPreInts(final Memory mem) { return mem.getByte(PREAMBLE_INTS_BYTE) & 0X3F; } static void insertPreInts(final WritableMemory wmem, final int preInts) { wmem.putByte(PREAMBLE_INTS_BYTE, (byte) (preInts & 0X3F)); } static int extractSerVer(final Memory mem) { return mem.getByte(SER_VER_BYTE) & 0XFF; } static void insertSerVer(final WritableMemory wmem) { wmem.putByte(SER_VER_BYTE, (byte) SER_VER); } static int extractFamilyId(final Memory mem) { return mem.getByte(FAMILY_BYTE) & 0XFF; } static void insertFamilyId(final WritableMemory wmem) { wmem.putByte(FAMILY_BYTE, (byte) FAMILY_ID); } static int extractLgK(final Memory mem) { return mem.getByte(LG_K_BYTE) & 0XFF; } static void insertLgK(final WritableMemory wmem, final int lgK) { wmem.putByte(LG_K_BYTE, (byte) lgK); } static int extractLgArr(final Memory mem) { final int lgArr = mem.getByte(LG_ARR_BYTE) & 0XFF; return lgArr; } static void insertLgArr(final WritableMemory wmem, final int lgArr) { wmem.putByte(LG_ARR_BYTE, (byte) lgArr); } static int extractListCount(final Memory mem) { return mem.getByte(LIST_COUNT_BYTE) & 0XFF; } static void insertListCount(final WritableMemory wmem, final int listCnt) { wmem.putByte(LIST_COUNT_BYTE, (byte) listCnt); } static int extractCurMin(final Memory mem) { return mem.getByte(HLL_CUR_MIN_BYTE) & 0XFF; } static void insertCurMin(final WritableMemory wmem, final int curMin) { wmem.putByte(HLL_CUR_MIN_BYTE, (byte) curMin); } static double extractHipAccum(final Memory mem) { return mem.getDouble(HIP_ACCUM_DOUBLE); } static void insertHipAccum(final WritableMemory wmem, final double hipAccum) { wmem.putDouble(HIP_ACCUM_DOUBLE, hipAccum); } static double extractKxQ0(final Memory mem) { return mem.getDouble(KXQ0_DOUBLE); } static void insertKxQ0(final WritableMemory wmem, final double kxq0) { wmem.putDouble(KXQ0_DOUBLE, kxq0); } static double extractKxQ1(final Memory mem) { return mem.getDouble(KXQ1_DOUBLE); } static void insertKxQ1(final WritableMemory wmem, final double kxq1) { wmem.putDouble(KXQ1_DOUBLE, kxq1); } static int extractHashSetCount(final Memory mem) { return mem.getInt(HASH_SET_COUNT_INT); } static void insertHashSetCount(final WritableMemory wmem, final int hashSetCnt) { wmem.putInt(HASH_SET_COUNT_INT, hashSetCnt); } static int extractNumAtCurMin(final Memory mem) { return mem.getInt(CUR_MIN_COUNT_INT); } static void insertNumAtCurMin(final WritableMemory wmem, final int numAtCurMin) { wmem.putInt(CUR_MIN_COUNT_INT, numAtCurMin); } static int extractAuxCount(final Memory mem) { return mem.getInt(AUX_COUNT_INT); } static void insertAuxCount(final WritableMemory wmem, final int auxCount) { wmem.putInt(AUX_COUNT_INT, auxCount); } //Mode bits static void insertCurMode(final WritableMemory wmem, final CurMode curMode) { final int curModeId = curMode.ordinal(); int mode = wmem.getByte(MODE_BYTE) & ~CUR_MODE_MASK; //strip bits 0, 1 mode |= (curModeId & CUR_MODE_MASK); wmem.putByte(MODE_BYTE, (byte) mode); } static CurMode extractCurMode(final Memory mem) { final int curModeId = mem.getByte(MODE_BYTE) & CUR_MODE_MASK; return CurMode.fromOrdinal(curModeId); } static void insertTgtHllType(final WritableMemory wmem, final TgtHllType tgtHllType) { final int typeId = tgtHllType.ordinal(); int mode = wmem.getByte(MODE_BYTE) & ~TGT_HLL_TYPE_MASK; //strip bits 2, 3 mode |= (typeId << 2) & TGT_HLL_TYPE_MASK; wmem.putByte(MODE_BYTE, (byte) mode); } static TgtHllType extractTgtHllType(final Memory mem) { final int typeId = mem.getByte(MODE_BYTE) & TGT_HLL_TYPE_MASK; return TgtHllType.fromOrdinal(typeId >>> 2); } static void insertModes(final WritableMemory wmem, final TgtHllType tgtHllType, final CurMode curMode) { final int curModeId = curMode.ordinal() & 3; final int typeId = (tgtHllType.ordinal() & 3) << 2; final int mode = typeId | curModeId; wmem.putByte(MODE_BYTE, (byte) mode); } //Flags static void insertEmptyFlag(final WritableMemory wmem, final boolean empty) { int flags = wmem.getByte(FLAGS_BYTE); if (empty) { flags |= EMPTY_FLAG_MASK; } else { flags &= ~EMPTY_FLAG_MASK; } wmem.putByte(FLAGS_BYTE, (byte) flags); } static boolean extractEmptyFlag(final Memory mem) { final int flags = mem.getByte(FLAGS_BYTE); return (flags & EMPTY_FLAG_MASK) > 0; } static void insertCompactFlag(final WritableMemory wmem, final boolean compact) { int flags = wmem.getByte(FLAGS_BYTE); if (compact) { flags |= COMPACT_FLAG_MASK; } else { flags &= ~COMPACT_FLAG_MASK; } wmem.putByte(FLAGS_BYTE, (byte) flags); } static boolean extractCompactFlag(final Memory mem) { final int flags = mem.getByte(FLAGS_BYTE); return (flags & COMPACT_FLAG_MASK) > 0; } static void insertOooFlag(final WritableMemory wmem, final boolean oooFlag) { int flags = wmem.getByte(FLAGS_BYTE); if (oooFlag) { flags |= OUT_OF_ORDER_FLAG_MASK; } else { flags &= ~OUT_OF_ORDER_FLAG_MASK; } wmem.putByte(FLAGS_BYTE, (byte) flags); } static boolean extractOooFlag(final Memory mem) { final int flags = mem.getByte(FLAGS_BYTE); return (flags & OUT_OF_ORDER_FLAG_MASK) > 0; } static void insertRebuildCurMinNumKxQFlag(final WritableMemory wmem, final boolean rebuild) { int flags = wmem.getByte(FLAGS_BYTE); if (rebuild) { flags |= REBUILD_CURMIN_NUM_KXQ_MASK; } else { flags &= ~REBUILD_CURMIN_NUM_KXQ_MASK; } wmem.putByte(FLAGS_BYTE, (byte) flags); } static boolean extractRebuildCurMinNumKxQFlag(final Memory mem) { final int flags = mem.getByte(FLAGS_BYTE); return (flags & REBUILD_CURMIN_NUM_KXQ_MASK) > 0; } static void insertFlags(final WritableMemory wmem, final int flags) { wmem.putByte(FLAGS_BYTE, (byte) flags); } static int extractFlags(final Memory mem) { return mem.getByte(FLAGS_BYTE) & 0XFF; } //Other static int extractInt(final Memory mem, final long byteOffset) { return mem.getInt(byteOffset); } static void insertInt(final WritableMemory wmem, final long byteOffset, final int value) { wmem.putInt(byteOffset, value); } static int computeLgArr(final Memory mem, final int count, final int lgConfigK) { //value is missing, recompute final CurMode curMode = extractCurMode(mem); if (curMode == CurMode.LIST) { return HllUtil.LG_INIT_LIST_SIZE; } int ceilPwr2 = ceilingIntPowerOf2(count); if ((RESIZE_DENOM * count) > (RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1; } if (curMode == CurMode.SET) { return Math.max(LG_INIT_SET_SIZE, exactLog2OfLong(ceilPwr2)); } //only used for HLL4 return Math.max(LG_AUX_ARR_INTS[lgConfigK], exactLog2OfLong(ceilPwr2)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy