com.yahoo.sketches.performance.SketchPerformance Maven / Gradle / Ivy
Show all versions of sketches-misc Show documentation
/*
* Copyright 2016, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.performance;
import static java.lang.Math.floor;
import static java.lang.Math.pow;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.hll.HllSketch;
import com.yahoo.sketches.hll.HllSketchBuilder;
import com.yahoo.sketches.theta.UpdateSketch;
import com.yahoo.sketches.theta.UpdateSketchBuilder;
//CHECKSTYLE.OFF: JavadocMethod
//CHECKSTYLE.OFF: WhitespaceAround
/**
* Used to generate data for plotting the error distribution or speed performance of a sketch.
* The X-axis is assumed to be the number of uniques fed to the sketch and varies from 1 to whatever
* is specified in the lgMaxU parameter. "lg" is shorthand for Log_base_2, so if lgMaxU is 12 then
* the highest number of uniques on the X-axis would be 4096. An exponential series is used for the
* unique values per trial so that a wide range of unique values (over many octaves) can be tested
* using a constant number of points per octave. This dramatically reduces the number of plotting
* points required and produces nice plots when plotted against a log axis.
*
* See the main() method as an example of how to configure.
*
* @author Lee Rhodes
*/
public class SketchPerformance {
/**
* This method drives the whole process. An exponential series is used for the unique
* counts per trial so that a wide range of unique values (over many octaves) can be tested using
* a constant number of points per octave. This dramatically reduces the number of plotting points
* required and produces nice plots when plotted against a log axis. See the main() method as an
* example of how to configure this.
*
* @param trialMgr TrialManager to be used
*/
public static void start(TrialManager trialMgr) {
long testStartTime_mS = System.currentTimeMillis();
int lastGI = trialMgr.getMaximumGeneratingIndex();
int ppo = trialMgr.getPPO();
int lastU = 0;
println(ProcessStats.getHeader());
StringBuilder dataStr = new StringBuilder();
//Each generating index (gi) will generate a new row of data
// representing N trials at a specific number of unique values.
for (int gi = 0; gi <= lastGI; gi++) {
int u = (int)floor(pow(2.0, (double)gi/ppo));
if (u == lastU) { continue; }//at the low end skips over duplicate values of u
lastU = u;
int trials = trialMgr.getTrials(u);
int lgK = trialMgr.getLgK();
double p = trialMgr.getP();
Stats[] statsArr = processTrialSet(trialMgr, u, trials);
ProcessStats.process(statsArr, u, lgK, p, dataStr);
println(dataStr.toString());
}
int testTime_S = (int)((System.currentTimeMillis() - testStartTime_mS)/1000.0);
int min = testTime_S/60;
int sec = testTime_S%60;
println("TestTime: "+min+":"+sec);
}
/**
* A Trial Set is a number of trials at number of uniques per trial, uPerTrial.
* This is set up so that the number of trials may vary based on the number of uniques for the
* trial set.
* @param trialMgr manages the sketch and updating of a stats object
* @param uPerTrial uniques for every trial of a trial set
* @param trials number of trials per trial set
* @return the Stats array contains measurements for each trial of the trial set
*/
private static Stats[] processTrialSet(TrialManager trialMgr, int uPerTrial, int trials) {
Stats[] statsArr = new Stats[trials];
//System.gc();
for (int t=0; t < trials; t++) {
if (statsArr[t] == null) { statsArr[t] = new Stats(); }
trialMgr.doTrial(statsArr[t], uPerTrial);
}
return statsArr;
}
private static void println(String s) { System.out.println(s); }
/**
* This main method sets the configuration of the sketches, the TrialManager profile, and
* runs the test.
* @param args not used.
*/
public static void main(String[] args) {
//Common parameters
int lgK = 12; //4K
boolean udSketch = true; //set true if you want to use a theta UpdateSketch, false for HLL
//Theta UpdateSketch parameters
Family family = Family.QUICKSELECT;
ResizeFactor rf = ResizeFactor.X1;// See javadocs.
boolean direct = false; //See javadocs and the setSketchProfile code
float p = 1.0F;
boolean rebuild = false; //set true if rebuild is desired to reduce size down to k.
//HLL Parameters
boolean hip = true;
boolean dense = false;
//Trials Profile Parameters
// For speed trials use min=4,5, max= 13,14,15,16
// For accuracy trials use min=max= 10 or more
int lgMinTrials = 4;
int lgMaxTrials = 13;
int lgMaxU = 20;
int ppo = 16;
//INITIALIZE
TrialManager trialMgr = new TrialManager();
trialMgr.setTrialsProfile(lgMinTrials, lgMaxTrials, lgMaxU, ppo);
UpdateSketchBuilder udBldr = null;
HllSketchBuilder hllBldr = null;
if (udSketch) { //UpdateSketch Builder
udBldr = UpdateSketch.builder().setNominalEntries(1 << lgK).setFamily(family).setP(p)
.setResizeFactor(rf);
trialMgr.setUpdateSketchBuilder(udBldr, direct, rebuild);
}
else { //HLL Builder
hllBldr = HllSketch.builder().setLogBuckets(lgK).setHipEstimator(hip).setDenseMode(dense);
trialMgr.setHllSketchBuilder(hllBldr);
}
//START THE TESTS
SketchPerformance.start(trialMgr);
//PRINT SUMMARY
if (udBldr != null) { println(udBldr.toString()); }
if (hllBldr != null) { println(hllBldr.toString()); }
println(trialMgr.toString());
}
}