All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.Util Maven / Gradle / Ivy

There is a newer version: 0.13.4
Show newest version
/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches;

import static com.yahoo.sketches.hash.MurmurHash3.hash;

/**
 * Common utility functions.
 *
 * @author Lee Rhodes
 */
public final class Util {

  /**
   * The smallest Log2 cache size allowed: 32.
   */
  public static final int MIN_LG_ARR_LONGS = 5;

  /**
   * The smallest Log2 nom entries allowed: 16.
   */
  public static final int MIN_LG_NOM_LONGS = 4;

  /**
   * The hash table rebuild threshold = 15.0/16.0.
   */
  public static final double REBUILD_THRESHOLD = 15.0 / 16.0;

  /**
   * The resize threshold = 0.5; tuned for speed.
   */
  public static final double RESIZE_THRESHOLD = 0.5;

  private Util() {}

  /**
   * The default nominal entries is provided as a convenience for those cases where the
   * nominal sketch size in number of entries is not provided.
   * A sketch of 4096 entries has a Relative Standard Error (RSE) of +/- 1.56% at a confidence of
   * 68%; or equivalently, a Relative Error of +/- 3.1% at a confidence of 95.4%.
   * See Default Nominal Entries
   */
  public static final int DEFAULT_NOMINAL_ENTRIES = 4096;

  /**
   * The seed 9001 used in the sketch update methods is a prime number that
   * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and
   * the author cannot prove that this particular seed is somehow superior to other seeds.  There
   * was some early internet disussion that a seed of 0 did not produce as clean avalanche diagrams
   * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did
   * have some issues. As far as the author can determine, MurmurHash3 does not have these problems.
   *
   * 

In order to perform set operations on two sketches it is critical that the same hash * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship * between the original source key value and the hashed bit string would be violated. Once * you have developed a history of stored sketches you are stuck with it. * See Default Update Seed */ public static final long DEFAULT_UPDATE_SEED = 9001L; /** * The java line separator character as a String. */ public static final String LS = System.getProperty("line.separator"); /** * The tab character */ public static final char TAB = '\t'; /** * Returns a string of spaced hex bytes in Big-Endian order. * @param v the given long * @return string of spaced hex bytes in Big-Endian order. */ public static String longToHexBytes(long v) { long mask = 0XFFL; StringBuilder sb = new StringBuilder(); for (int i = 8; i-- > 0; ) { String s = Long.toHexString((v >>> i * 8) & mask); sb.append(zeroPad(s, 2)).append(" "); } return sb.toString(); } /** * Returns an int array of points that will be evenly spaced on a log axis. * This is designed for Log_base2 numbers. * @param lgStart the Log_base2 of the starting value. E.g., for 1 lgStart = 0. * @param lgEnd the Log_base2 of the ending value. E.g. for 1024 lgEnd = 10. * @param points the total number of points including the starting and ending values. * @return an int array of points that will be evenly spaced on a log axis. */ public static int[] evenlyLgSpaced(int lgStart, int lgEnd, int points) { if (points <= 0) { throw new SketchesArgumentException("points must be > 0"); } if ((lgEnd < 0) || (lgStart < 0)) { throw new SketchesArgumentException("lgStart and lgEnd must be >= 0."); } int[] out = new int[points]; out[0] = 1 << lgStart; if (points == 1) { return out; } double delta = (lgEnd - lgStart) / (points - 1.0); for (int i = 1; i < points; i++) { double mXpY = delta * i + lgStart; out[i] = (int)Math.round(Math.pow(2, mXpY)); } return out; } /** * Returns an int extracted from a Little-Endian byte array. * @param arr the given byte array * @return an int extracted from a Little-Endian byte array. */ public static int bytesToInt(byte[] arr) { int v = 0; for (int i = 0; i < 4; i++) { v |= (arr[i] & 0XFF) << i * 8; } return v; } /** * Returns a long extracted from a Little-Endian byte array. * @param arr the given byte array * @return a long extracted from a Little-Endian byte array. */ public static long bytesToLong(byte[] arr) { long v = 0; for (int i = 0; i < 8; i++) { v |= (arr[i] & 0XFFL) << i * 8; } return v; } /** * Returns a string view of a byte array * @param arr the given byte array * @param signed set true if you want the byte values signed. * @param littleEndian set true if you want Little-Endian order * @param sep the separator string between bytes * @return a string view of a byte array */ public static String bytesToString( byte[] arr, boolean signed, boolean littleEndian, String sep) { StringBuilder sb = new StringBuilder(); int mask = (signed) ? 0XFFFFFFFF : 0XFF; int arrLen = arr.length; if (littleEndian) { for (int i = 0; i < arrLen - 1; i++) { sb.append(arr[i] & mask).append(sep); } sb.append(arr[arrLen - 1] & mask); } else { for (int i = arrLen; i-- > 1; ) { sb.append(arr[i] & mask).append(sep); } sb.append(arr[0] & mask); } return sb.toString(); } /** * Returns the given time in nanoseconds formatted as Sec.mSec uSec nSec * @param nS the given nanoseconds * @return the given time in nanoseconds formatted as Sec.mSec uSec nSec */ public static String nanoSecToString(long nS) { long rem_nS = (long)(nS % 1000.0); long rem_uS = (long)((nS / 1000.0) % 1000.0); long rem_mS = (long)((nS / 1000000.0) % 1000.0); long sec = (long)(nS / 1000000000.0); String nSstr = zeroPad(Long.toString(rem_nS), 3); String uSstr = zeroPad(Long.toString(rem_uS), 3); String mSstr = zeroPad(Long.toString(rem_mS), 3); return String.format("%d.%3s %3s %3s", sec, mSstr, uSstr, nSstr); } /** * Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec * @param mS the given nanoseconds * @return the given time in milliseconds formatted as Hours:Min:Sec.mSec */ public static String milliSecToString(long mS) { long rem_mS = (long)(mS % 1000.0); long rem_sec = (long)((mS / 1000.0) % 60.0); long rem_min = (long)((mS / 60000.0) % 60.0); long hr = (long)(mS / 3600000.0); String mSstr = zeroPad(Long.toString(rem_mS), 3); String secStr = zeroPad(Long.toString(rem_sec), 2); String minStr = zeroPad(Long.toString(rem_min), 2); return String.format("%d:%2s:%2s.%3s", hr, minStr, secStr, mSstr); } /** * Returns a Little-Endian byte array extracted from the given int. * @param v the given int * @param arr a given array of 4 bytes that will be returned with the data * @return a Little-Endian byte array extracted from the given int. */ public static byte[] intToBytes(int v, byte[] arr) { for (int i = 0; i < 4; i++) { arr[i] = (byte) (v & 0XFF); v >>>= 8; } return arr; } /** * Returns a Little-Endian byte array extracted from the given long. * @param v the given long * @param arr a given array of 8 bytes that will be returned with the data * @return a Little-Endian byte array extracted from the given long. */ public static byte[] longToBytes(long v, byte[] arr) { for (int i = 0; i < 8; i++) { arr[i] = (byte) (v & 0XFFL); v >>>= 8; } return arr; } /** * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException. * @param seedHashA the seedHash A * @param seedHashB the seedHash B */ public static final void checkSeedHashes(short seedHashA, short seedHashB) { if (seedHashA != seedHashB) { throw new SketchesArgumentException( "Incompatible Seed Hashes. " + seedHashA + ", " + seedHashB); } } /** * Computes and checks the 16-bit seed hash from the given long seed. * The seed hash may not be zero in order to maintain compatibility with older serialized * versions that did not have this concept. * @param seed See Update Hash Seed * @return the seed hash. */ public static short computeSeedHash(long seed) { long[] seedArr = {seed}; short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL); if (seedHash == 0) { throw new SketchesArgumentException( "The given seed: " + seed + " produced a seedHash of zero. " + "You must choose a different seed."); } return seedHash; } /** * Checks if parameter v is a multiple of 8 and greater than zero. * @param v The parameter to check * @param argName This name will be part of the error message if the check fails. */ public static void checkIfMultipleOf8AndGT0(long v, String argName) { if (((v & 0X7L) == 0L) && (v > 0L)) { return; } throw new SketchesArgumentException("The value of the parameter \"" + argName + "\" must be a positive multiple of 8 and greater than zero: " + v); } /** * Returns true if v is a multiple of 8 and greater than zero * @param v The parameter to check * @return true if v is a multiple of 8 and greater than zero */ public static boolean isMultipleOf8AndGT0(long v) { return (((v & 0X7L) == 0L) && (v > 0L)); } /** * Returns true if argument is exactly a positive power of 2 and greater than zero. * * @param v The input argument. * @return true if argument is exactly a positive power of 2 and greater than zero. */ public static boolean isPowerOf2(int v) { return (v > 0) && ((v & (v - 1)) == 0); //or (v > 0) && ((v & -v) == v) } /** * Checks the given parameter to make sure it is positive, an integer-power of 2 and greater than * zero. * * @param v The input argument. * @param argName Used in the thrown exception. */ public static void checkIfPowerOf2(int v, String argName) { if ((v > 0) && ((v & (v - 1)) == 0)) { return; } throw new SketchesArgumentException("The value of the parameter \"" + argName + "\" must be a positive integer-power of 2" + " and greater than 0: " + v); } /** * Checks the given value if it is a power of 2. If not, it throws an exception. * Otherwise, returns the log-base2 of the given value. * @param value must be a power of 2 and greater than zero. * @param argName the argument name used in the exception if thrown. * @return the log-base2 of the given value */ public static int toLog2(int value, String argName) { checkIfPowerOf2(value, argName); return Integer.numberOfTrailingZeros(value); } /** * Checks the given parameter to make sure it is positive and between 0.0 inclusive and 1.0 * inclusive. * * @param p * See Sampling Probability, p * @param argName Used in the thrown exception. */ public static void checkProbability(double p, String argName) { if ((p >= 0.0) && (p <= 1.0)) { return; } throw new SketchesArgumentException("The value of the parameter \"" + argName + "\" must be between 0.0 inclusive and 1.0 inclusive: " + p); } /** * Computes the ceiling power of 2 within the range [1, 2^30]. This is the smallest positive power * of 2 that equal to or greater than the given n.
* For: *

    *
  • n ≤ 1: returns 1
  • *
  • 2^30 ≤ n ≤ 2^31 -1 : returns 2^30
  • *
  • n == a power of 2 : returns n
  • *
  • otherwise returns the smallest power of 2 greater than n
  • *
* * @param n The input argument. * @return the ceiling power of 2. */ public static int ceilingPowerOf2(int n) { if (n <= 1) { return 1; } int topPwrOf2 = 1 << 30; return (n >= topPwrOf2) ? topPwrOf2 : Integer.highestOneBit((n - 1) << 1); } /** * Computes the floor power of 2 within the range [1, 2^30]. This is the largest positive power of * 2 that equal to or less than the given n.
* For: *
    *
  • n ≤ 1: returns 1
  • *
  • 2^30 ≤ n ≤ 2^31 -1 : returns 2^30
  • *
  • n == a power of 2 : returns n
  • *
  • otherwise returns the largest power of 2 less than n
  • *
* * @param n The given argument. * @return the floor power of 2. */ public static int floorPowerOf2(int n) { if (n <= 1) { return 1; } return Integer.highestOneBit(n); } /** * Computes the inverse integer power of 2: 1/(2^e) = 2^(-e). * @param e a positive value between 0 and 1023 inclusive * @return the inverse integer power of 2: 1/(2^e) = 2^(-e) */ public static double invPow2(int e) { assert (e | (1024 - e - 1)) >= 0 : "e cannot be negative or greater than 1023: " + e; return Double.longBitsToDouble((1023L - e) << 52); } /** * Unsigned compare with longs. * @param n1 A long to be treated as if unsigned. * @param n2 A long to be treated as if unsigned. * @return true if n1 > n2. */ public static boolean isLessThanUnsigned(long n1, long n2) { return (n1 < n2) ^ ((n1 < 0) != (n2 < 0)); } /** * Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero, * one or more resize factors. * * @param lgTarget Log2 of the target size * @param rf See Resize Factor * @param lgMin Log2 of the minimum allowed starting size * @return The Log2 of the starting size */ public static final int startingSubMultiple(int lgTarget, ResizeFactor rf, int lgMin) { int lgRF = rf.lg(); return (lgTarget <= lgMin) ? lgMin : (lgRF == 0) ? lgTarget : (lgTarget - lgMin) % lgRF + lgMin; } /** * Prepend the given string with zeros. If the given string is equal or greater than the given * field length, it will be returned without modification. * @param s the given string * @param fieldLength desired total field length including the given string * @return the given string prepended with zeros. */ public static final String zeroPad(String s, int fieldLength) { return characterPad(s, fieldLength, '0', false); } /** * Prepend or postpend the given string with the given character to fill the given field length. * If the given string is equal or greater than the given field length, it will be returned * without modification. * @param s the given string * @param fieldLength the desired field length * @param padChar the desired pad character * @param postpend if true append the pacCharacters to the end of the string. * @return prepended or postpended given string with the given character to fill the given field * length. */ public static final String characterPad(String s, int fieldLength, char padChar, boolean postpend) { char[] chArr = s.toCharArray(); int sLen = chArr.length; if (sLen < fieldLength) { char[] out = new char[fieldLength]; int blanks = fieldLength - sLen; if (postpend) { for (int i = 0; i < sLen; i++) { out[i] = chArr[i]; } for (int i = sLen; i < fieldLength; i++) { out[i] = padChar; } } else { //prepend for (int i = 0; i < blanks; i++) { out[i] = padChar; } for (int i = blanks; i < fieldLength; i++) { out[i] = chArr[i - blanks]; } } return String.valueOf(out); } return s; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy