All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.hll.Preamble Maven / Gradle / Ivy

There is a newer version: 0.13.4
Show newest version
/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.hll;

import com.yahoo.memory.Memory;
import com.yahoo.memory.MemoryRegion;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.SketchesArgumentException;
import com.yahoo.sketches.Util;
import com.yahoo.sketches.hash.MurmurHash3;

/**
 * @author Kevin Lang
 */
public final class Preamble {
  static final byte PREAMBLE_LONGS = 1;
  static final byte PREAMBLE_VERSION = 8;
  static final byte HLL_PREAMBLE_FAMILY_ID = (byte) Family.HLL.getID();

  static final int[] AUX_SIZE = new int[] {
      1, 4, 4, 4, 4, 4, 4, 8, 8, 8,
      16, 16, 32, 32, 64, 128, 256, 512, 1024, 2048,
      4096, 8192, 16384, 32768, 65536, 131072, 262144
  };

  private byte preambleLongs;
  private byte version;
  private byte familyId;
  private byte logConfigK;
  private byte flags;
  private short seedHash;

  private Preamble(
      byte preambleLongs, byte version, byte familyId, byte logConfigK, byte flags, short seedHash) {
    this.preambleLongs = preambleLongs;
    this.version = version;
    this.familyId = familyId;
    this.logConfigK = logConfigK;
    this.flags = flags;
    this.seedHash = seedHash;

    if ((logConfigK < Interpolation.INTERPOLATION_MIN_LOG_K)
        || (logConfigK > Interpolation.INTERPOLATION_MAX_LOG_K)) {
      throw new SketchesArgumentException(
          String.format(
              "logConfigK[%s] is out of bounds, should be between [%s] and [%s]",
              logConfigK, Interpolation.INTERPOLATION_MIN_LOG_K, Interpolation.INTERPOLATION_MAX_LOG_K
          )
      );
    }
  }

  /**
   * Instantiates a new Preamble from the given Memory
   * @param memory the given Memory
   * @return a new Preamble from the given Memory
   */
  public static Preamble fromMemory(Memory memory) {
    Builder builder = new Builder()
        .setPreambleLongs(memory.getByte(0))
        .setVersion(memory.getByte(1))
        .setFamilyId(memory.getByte(2))
        .setLogConfigK(memory.getByte(3))
        // Invert the ++ in order to skip over the unused byte.  Some bits are wasted
        // instead of packing the preamble so that the semantics of the various parts of the
        // preamble can be aligned across different types of sketches.
        .setFlags(memory.getByte(5));

    short seedHash = memory.getShort(6);
    return builder.setSeedHash(seedHash).build();
  }

  /**
   * Computes and checks the 16-bit seed hash from the given long seed.
   * The seed hash may not be zero in order to maintain compatibility with older serialized
   * versions that did not have this concept.
   *
   * @param seed the given seed.
   *
   * @return the seed hash.
   */
  private static short computeSeedHash(long seed) {
    long[] seedArr = {seed};
    short seedHash = (short) ((MurmurHash3.hash(seedArr, 0L)[0]) & 0xFFFFL);
    if (seedHash == 0) {
      throw new SketchesArgumentException(
          "The given seed: " + seed + " produced a seedHash of zero. "
              + "You must choose a different seed."
      );
    }
    return seedHash;
  }

  /**
   * Instantiates a new Preamble with the parameter log_base2 of K.
   * @param logK log_base2 of the desired K
   * @return a new Preamble with the parameter log_base2 of K.
   */
  public static Preamble fromLogK(int logK) {
    if (logK > 255) {
      throw new SketchesArgumentException("logK is greater than a byte, make it smaller");
    }

    byte flags = new PreambleFlags.Builder()
        .setBigEndian(false)
        .setReadOnly(true)
        .setEmpty(true)
        .setSharedPreambleMode(true)
        .setSparseMode(true)
        .setUnionMode(true)
        .setEightBytePadding(false)
        .build();

    short seedHash = computeSeedHash(Util.DEFAULT_UPDATE_SEED);
    return new Builder()
        .setLogConfigK((byte) logK)
        .setFlags(flags)
        .setSeedHash(seedHash)
        .build();
  }

  /**
   * Serializes this Preamble to a byte array.
   * @return this Preamble as a byte array
   */
  public byte[] toByteArray() {
    byte[] retVal = new byte[getPreambleLongs() << 3];
    intoByteArray(retVal, 0);
    return retVal;
  }

  int intoByteArray(byte[] bytes, int offset) {
    if ((bytes.length - offset) < 8) {
      throw new SketchesArgumentException("bytes too small");
    }

    Memory mem = new MemoryRegion(new NativeMemory(bytes), offset, 8);
    mem.putByte(0, getPreambleLongs());
    mem.putByte(1, getVersion());
    mem.putByte(2, getFamilyId());
    mem.putByte(3, getLogConfigK());
    mem.putByte(5, getFlags());
    mem.putShort(6, getSeedHash());
    return offset + 8;
  }

  /**
   * Gets the size of the Preamble in longs
   * @return the size of the Preamble in longs
   */
  public byte getPreambleLongs() {
    return preambleLongs;
  }

  /**
   * Gets the serialization version of this Preamble
   * @return the serialization version of this Preamble
   */
  public byte getVersion() {
    return version;
  }

  /**
   * Gets the Family ID
   * @return the Family ID
   */
  public byte getFamilyId() {
    return familyId;
  }

  /**
   * Gets the log_base2 of the configured K
   * @return the log_base2 of the configured K
   */
  public byte getLogConfigK() {
    return logConfigK;
  }

  /**
   * Gets the configured K
   * @return the configured K
   */
  public int getConfigK() {
    return 1 << logConfigK;
  }

  /**
   * Gets the maximum auxiliary size
   * @return the maximum auxiliary size
   */
  public int getMaxAuxSize() {
    return AUX_SIZE[logConfigK] << 2;
  }

  /**
   * Gets the flags byte
   * @return the flags byte
   */
  public byte getFlags() {
    return flags;
  }

  /**
   * Gets the seed hash
   * @return the seed hash
   */
  public short getSeedHash() {
    return seedHash;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    Preamble preamble = (Preamble) o;

    return familyId == preamble.familyId
       && flags == preamble.flags
       && logConfigK == preamble.logConfigK
       && preambleLongs == preamble.preambleLongs
       && seedHash == preamble.seedHash
       && version == preamble.version;
  }

  @SuppressWarnings("cast")
  @Override
  public int hashCode() {
    int result = (int) preambleLongs;
    result = 31 * result + (int) version;
    result = 31 * result + (int) familyId;
    result = 31 * result + (int) logConfigK;
    result = 31 * result + (int) flags;
    result = 31 * result + (int) seedHash;
    return result;
  }

  /**
   * Builder for the Preamble
   */
  public static class Builder {
    private byte preambleLongs = Preamble.PREAMBLE_LONGS;
    private byte version = Preamble.PREAMBLE_VERSION;
    private byte familyId = Preamble.HLL_PREAMBLE_FAMILY_ID;
    private byte logConfigK = (byte) Integer.numberOfTrailingZeros(Util.DEFAULT_NOMINAL_ENTRIES);
    private byte flags; //needs defaults?
    private short seedHash = computeSeedHash(Util.DEFAULT_UPDATE_SEED);

    /**
     * Sets the preamble longs byte
     * @param preambleLongs the size of the preamble in longs
     * @return this Builder
     */
    public Builder setPreambleLongs(byte preambleLongs) {
      this.preambleLongs = preambleLongs;
      return this;
    }

    /**
     * Sets the serialization version of this Preamble
     * @param version the serialization version
     * @return this Builder
     */
    public Builder setVersion(byte version) {
      this.version = version;
      return this;
    }

    /**
     * Sets the Family ID for this Preamble
     * @param familyId the Family ID
     * @return this Builder
     */
    public Builder setFamilyId(byte familyId) {
      this.familyId = familyId;
      return this;
    }

    /**
     * Sets the value of k by using the log_base2 of K
     * @param logConfigK the log_base2 of K
     * @return this Builder
     */
    public Builder setLogConfigK(byte logConfigK) {
      this.logConfigK = logConfigK;
      return this;
    }

    /**
     * Sets the flags byte for this Preamble
     * @param flags the flags byte
     * @return this Builder
     */
    public Builder setFlags(byte flags) {
      this.flags = flags;
      return this;
    }

    /**
     * Sets the seed hash from the given seed
     * @param seed the given seed
     * @return this Builder
     */
    public Builder setSeed(long seed) {
      return setSeedHash(computeSeedHash(seed));
    }

    /**
     * Sets the seed hash directly from the given seed hash
     * @param seedHash the given seed hash
     * @return this Builder
     */
    public Builder setSeedHash(short seedHash) {
      this.seedHash = seedHash;
      return this;
    }

    /**
     * Build this Preamble
     * @return a new Preamble
     */
    public Preamble build() {
      return new Preamble(preambleLongs, version, familyId, logConfigK, flags, seedHash);
    }

  } //End Builder

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy