com.yahoo.sketches.Util Maven / Gradle / Ivy
/*
* Copyright 2015-16, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches;
import static com.yahoo.sketches.hash.MurmurHash3.hash;
/**
* Common utility functions.
*
* @author Lee Rhodes
*/
public final class Util {
/**
* The smallest Log2 cache size allowed: 32.
*/
public static final int MIN_LG_ARR_LONGS = 5;
/**
* The smallest Log2 nom entries allowed: 16.
*/
public static final int MIN_LG_NOM_LONGS = 4;
/**
* The hash table rebuild threshold = 15.0/16.0.
*/
public static final double REBUILD_THRESHOLD = 15.0 / 16.0;
/**
* The resize threshold = 0.5; tuned for speed.
*/
public static final double RESIZE_THRESHOLD = 0.5;
private Util() {}
/**
* The default nominal entries is provided as a convenience for those cases where the
* nominal sketch size in number of entries is not provided.
* A sketch of 4096 entries has a Relative Standard Error (RSE) of +/- 1.56% at a confidence of
* 68%; or equivalently, a Relative Error of +/- 3.1% at a confidence of 95.4%.
* See Default Nominal Entries
*/
public static final int DEFAULT_NOMINAL_ENTRIES = 4096;
/**
* The seed 9001 used in the sketch update methods is a prime number that
* was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and
* the author cannot prove that this particular seed is somehow superior to other seeds. There
* was some early internet disussion that a seed of 0 did not produce as clean avalanche diagrams
* as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did
* have some issues. As far as the author can determine, MurmurHash3 does not have these problems.
*
* In order to perform set operations on two sketches it is critical that the same hash
* function and seed are identical for both sketches, otherwise the assumed 1:1 relationship
* between the original source key value and the hashed bit string would be violated. Once
* you have developed a history of stored sketches you are stuck with it.
* See Default Update Seed
*/
public static final long DEFAULT_UPDATE_SEED = 9001L;
/**
* The java line separator character as a String.
*/
public static final String LS = System.getProperty("line.separator");
/**
* The tab character
*/
public static final char TAB = '\t';
/**
* Returns a string of spaced hex bytes in Big-Endian order.
* @param v the given long
* @return string of spaced hex bytes in Big-Endian order.
*/
public static String longToHexBytes(long v) {
long mask = 0XFFL;
StringBuilder sb = new StringBuilder();
for (int i = 8; i-- > 0; ) {
String s = Long.toHexString((v >>> i * 8) & mask);
sb.append(zeroPad(s, 2)).append(" ");
}
return sb.toString();
}
/**
* Returns an int array of points that will be evenly spaced on a log axis.
* This is designed for Log_base2 numbers.
* @param lgStart the Log_base2 of the starting value. E.g., for 1 lgStart = 0.
* @param lgEnd the Log_base2 of the ending value. E.g. for 1024 lgEnd = 10.
* @param points the total number of points including the starting and ending values.
* @return an int array of points that will be evenly spaced on a log axis.
*/
public static int[] evenlyLgSpaced(int lgStart, int lgEnd, int points) {
if (points <= 0) {
throw new SketchesArgumentException("points must be > 0");
}
if ((lgEnd < 0) || (lgStart < 0)) {
throw new SketchesArgumentException("lgStart and lgEnd must be >= 0.");
}
int[] out = new int[points];
out[0] = 1 << lgStart;
if (points == 1) { return out; }
double delta = (lgEnd - lgStart) / (points - 1.0);
for (int i = 1; i < points; i++) {
double mXpY = delta * i + lgStart;
out[i] = (int)Math.round(Math.pow(2, mXpY));
}
return out;
}
/**
* Returns an int extracted from a Little-Endian byte array.
* @param arr the given byte array
* @return an int extracted from a Little-Endian byte array.
*/
public static int bytesToInt(byte[] arr) {
int v = 0;
for (int i = 0; i < 4; i++) {
v |= (arr[i] & 0XFF) << i * 8;
}
return v;
}
/**
* Returns a long extracted from a Little-Endian byte array.
* @param arr the given byte array
* @return a long extracted from a Little-Endian byte array.
*/
public static long bytesToLong(byte[] arr) {
long v = 0;
for (int i = 0; i < 8; i++) {
v |= (arr[i] & 0XFFL) << i * 8;
}
return v;
}
/**
* Returns a string view of a byte array
* @param arr the given byte array
* @param signed set true if you want the byte values signed.
* @param littleEndian set true if you want Little-Endian order
* @param sep the separator string between bytes
* @return a string view of a byte array
*/
public static String bytesToString(
byte[] arr, boolean signed, boolean littleEndian, String sep) {
StringBuilder sb = new StringBuilder();
int mask = (signed) ? 0XFFFFFFFF : 0XFF;
int arrLen = arr.length;
if (littleEndian) {
for (int i = 0; i < arrLen - 1; i++) {
sb.append(arr[i] & mask).append(sep);
}
sb.append(arr[arrLen - 1] & mask);
} else {
for (int i = arrLen; i-- > 1; ) {
sb.append(arr[i] & mask).append(sep);
}
sb.append(arr[0] & mask);
}
return sb.toString();
}
/**
* Returns the given time in nanoseconds formatted as Sec.mSec uSec nSec
* @param nS the given nanoseconds
* @return the given time in nanoseconds formatted as Sec.mSec uSec nSec
*/
public static String nanoSecToString(long nS) {
long rem_nS = (long)(nS % 1000.0);
long rem_uS = (long)((nS / 1000.0) % 1000.0);
long rem_mS = (long)((nS / 1000000.0) % 1000.0);
long sec = (long)(nS / 1000000000.0);
String nSstr = zeroPad(Long.toString(rem_nS), 3);
String uSstr = zeroPad(Long.toString(rem_uS), 3);
String mSstr = zeroPad(Long.toString(rem_mS), 3);
return String.format("%d.%3s %3s %3s", sec, mSstr, uSstr, nSstr);
}
/**
* Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec
* @param mS the given nanoseconds
* @return the given time in milliseconds formatted as Hours:Min:Sec.mSec
*/
public static String milliSecToString(long mS) {
long rem_mS = (long)(mS % 1000.0);
long rem_sec = (long)((mS / 1000.0) % 60.0);
long rem_min = (long)((mS / 60000.0) % 60.0);
long hr = (long)(mS / 3600000.0);
String mSstr = zeroPad(Long.toString(rem_mS), 3);
String secStr = zeroPad(Long.toString(rem_sec), 2);
String minStr = zeroPad(Long.toString(rem_min), 2);
return String.format("%d:%2s:%2s.%3s", hr, minStr, secStr, mSstr);
}
/**
* Returns a Little-Endian byte array extracted from the given int.
* @param v the given int
* @param arr a given array of 4 bytes that will be returned with the data
* @return a Little-Endian byte array extracted from the given int.
*/
public static byte[] intToBytes(int v, byte[] arr) {
for (int i = 0; i < 4; i++) {
arr[i] = (byte) (v & 0XFF);
v >>>= 8;
}
return arr;
}
/**
* Returns a Little-Endian byte array extracted from the given long.
* @param v the given long
* @param arr a given array of 8 bytes that will be returned with the data
* @return a Little-Endian byte array extracted from the given long.
*/
public static byte[] longToBytes(long v, byte[] arr) {
for (int i = 0; i < 8; i++) {
arr[i] = (byte) (v & 0XFFL);
v >>>= 8;
}
return arr;
}
/**
* Check if the two seed hashes are equal. If not, throw an SketchesArgumentException.
* @param seedHashA the seedHash A
* @param seedHashB the seedHash B
*/
public static final void checkSeedHashes(short seedHashA, short seedHashB) {
if (seedHashA != seedHashB) {
throw new SketchesArgumentException(
"Incompatible Seed Hashes. " + seedHashA + ", " + seedHashB);
}
}
/**
* Computes and checks the 16-bit seed hash from the given long seed.
* The seed hash may not be zero in order to maintain compatibility with older serialized
* versions that did not have this concept.
* @param seed See Update Hash Seed
* @return the seed hash.
*/
public static short computeSeedHash(long seed) {
long[] seedArr = {seed};
short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL);
if (seedHash == 0) {
throw new SketchesArgumentException(
"The given seed: " + seed + " produced a seedHash of zero. "
+ "You must choose a different seed.");
}
return seedHash;
}
/**
* Checks if parameter v is a multiple of 8 and greater than zero.
* @param v The parameter to check
* @param argName This name will be part of the error message if the check fails.
*/
public static void checkIfMultipleOf8AndGT0(long v, String argName) {
if (((v & 0X7L) == 0L) && (v > 0L)) {
return;
}
throw new SketchesArgumentException("The value of the parameter \"" + argName
+ "\" must be a positive multiple of 8 and greater than zero: " + v);
}
/**
* Returns true if v is a multiple of 8 and greater than zero
* @param v The parameter to check
* @return true if v is a multiple of 8 and greater than zero
*/
public static boolean isMultipleOf8AndGT0(long v) {
return (((v & 0X7L) == 0L) && (v > 0L));
}
/**
* Returns true if argument is exactly a positive power of 2 and greater than zero.
*
* @param v The input argument.
* @return true if argument is exactly a positive power of 2 and greater than zero.
*/
public static boolean isPowerOf2(int v) {
return (v > 0) && ((v & (v - 1)) == 0); //or (v > 0) && ((v & -v) == v)
}
/**
* Checks the given parameter to make sure it is positive, an integer-power of 2 and greater than
* zero.
*
* @param v The input argument.
* @param argName Used in the thrown exception.
*/
public static void checkIfPowerOf2(int v, String argName) {
if ((v > 0) && ((v & (v - 1)) == 0)) {
return;
}
throw new SketchesArgumentException("The value of the parameter \"" + argName
+ "\" must be a positive integer-power of 2" + " and greater than 0: " + v);
}
/**
* Checks the given value if it is a power of 2. If not, it throws an exception.
* Otherwise, returns the log-base2 of the given value.
* @param value must be a power of 2 and greater than zero.
* @param argName the argument name used in the exception if thrown.
* @return the log-base2 of the given value
*/
public static int toLog2(int value, String argName) {
checkIfPowerOf2(value, argName);
return Integer.numberOfTrailingZeros(value);
}
/**
* Checks the given parameter to make sure it is positive and between 0.0 inclusive and 1.0
* inclusive.
*
* @param p
* See Sampling Probability, p
* @param argName Used in the thrown exception.
*/
public static void checkProbability(double p, String argName) {
if ((p >= 0.0) && (p <= 1.0)) {
return;
}
throw new SketchesArgumentException("The value of the parameter \"" + argName
+ "\" must be between 0.0 inclusive and 1.0 inclusive: " + p);
}
/**
* Computes the ceiling power of 2 within the range [1, 2^30]. This is the smallest positive power
* of 2 that equal to or greater than the given n.
* For:
*
* - n ≤ 1: returns 1
* - 2^30 ≤ n ≤ 2^31 -1 : returns 2^30
* - n == a power of 2 : returns n
* - otherwise returns the smallest power of 2 greater than n
*
*
* @param n The input argument.
* @return the ceiling power of 2.
*/
public static int ceilingPowerOf2(int n) {
if (n <= 1) { return 1; }
int topPwrOf2 = 1 << 30;
return (n >= topPwrOf2) ? topPwrOf2 : Integer.highestOneBit((n - 1) << 1);
}
/**
* Computes the floor power of 2 within the range [1, 2^30]. This is the largest positive power of
* 2 that equal to or less than the given n.
* For:
*
* - n ≤ 1: returns 1
* - 2^30 ≤ n ≤ 2^31 -1 : returns 2^30
* - n == a power of 2 : returns n
* - otherwise returns the largest power of 2 less than n
*
*
* @param n The given argument.
* @return the floor power of 2.
*/
public static int floorPowerOf2(int n) {
if (n <= 1) { return 1; }
return Integer.highestOneBit(n);
}
/**
* Computes the inverse integer power of 2: 1/(2^e) = 2^(-e).
* @param e a positive value between 0 and 1023 inclusive
* @return the inverse integer power of 2: 1/(2^e) = 2^(-e)
*/
public static double invPow2(int e) {
assert (e | (1024 - e - 1)) >= 0 : "e cannot be negative or greater than 1023: " + e;
return Double.longBitsToDouble((1023L - e) << 52);
}
/**
* Unsigned compare with longs.
* @param n1 A long to be treated as if unsigned.
* @param n2 A long to be treated as if unsigned.
* @return true if n1 > n2.
*/
public static boolean isLessThanUnsigned(long n1, long n2) {
return (n1 < n2) ^ ((n1 < 0) != (n2 < 0));
}
/**
* Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero,
* one or more resize factors.
*
* @param lgTarget Log2 of the target size
* @param rf See Resize Factor
* @param lgMin Log2 of the minimum allowed starting size
* @return The Log2 of the starting size
*/
public static final int startingSubMultiple(int lgTarget, ResizeFactor rf, int lgMin) {
int lgRF = rf.lg();
return (lgTarget <= lgMin) ? lgMin : (lgRF == 0) ? lgTarget : (lgTarget - lgMin) % lgRF + lgMin;
}
/**
* Prepend the given string with zeros. If the given string is equal or greater than the given
* field length, it will be returned without modification.
* @param s the given string
* @param fieldLength desired total field length including the given string
* @return the given string prepended with zeros.
*/
public static final String zeroPad(String s, int fieldLength) {
return characterPad(s, fieldLength, '0', false);
}
/**
* Prepend or postpend the given string with the given character to fill the given field length.
* If the given string is equal or greater than the given field length, it will be returned
* without modification.
* @param s the given string
* @param fieldLength the desired field length
* @param padChar the desired pad character
* @param postpend if true append the pacCharacters to the end of the string.
* @return prepended or postpended given string with the given character to fill the given field
* length.
*/
public static final String characterPad(String s, int fieldLength, char padChar,
boolean postpend) {
char[] chArr = s.toCharArray();
int sLen = chArr.length;
if (sLen < fieldLength) {
char[] out = new char[fieldLength];
int blanks = fieldLength - sLen;
if (postpend) {
for (int i = 0; i < sLen; i++) {
out[i] = chArr[i];
}
for (int i = sLen; i < fieldLength; i++) {
out[i] = padChar;
}
} else { //prepend
for (int i = 0; i < blanks; i++) {
out[i] = padChar;
}
for (int i = blanks; i < fieldLength; i++) {
out[i] = chArr[i - blanks];
}
}
return String.valueOf(out);
}
return s;
}
}