All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.performance.SketchPerformance Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.performance;

import static java.lang.Math.floor;
import static java.lang.Math.pow;

import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.hll.HllSketch;
import com.yahoo.sketches.hll.HllSketchBuilder;
import com.yahoo.sketches.theta.UpdateSketch;
import com.yahoo.sketches.theta.UpdateSketchBuilder;

//CHECKSTYLE.OFF: JavadocMethod
//CHECKSTYLE.OFF: WhitespaceAround
/**
 * Used to generate data for plotting the error distribution or speed performance of a sketch.
 * The X-axis is assumed to be the number of uniques fed to the sketch and varies from 1 to whatever
 * is specified in the lgMaxU parameter.  "lg" is shorthand for Log_base_2, so if lgMaxU is 12 then
 * the highest number of uniques on the X-axis would be 4096. An exponential series is used for the
 * unique values per trial so that a wide range of unique values (over many octaves) can be tested
 * using a constant number of points per octave. This dramatically reduces the number of plotting
 * points required and produces nice plots when plotted against a log axis.
 *
 * 

See the main() method as an example of how to configure. * * @author Lee Rhodes */ public class SketchPerformance { /** * This method drives the whole process. An exponential series is used for the unique * counts per trial so that a wide range of unique values (over many octaves) can be tested using * a constant number of points per octave. This dramatically reduces the number of plotting points * required and produces nice plots when plotted against a log axis. See the main() method as an * example of how to configure this. * * @param trialMgr TrialManager to be used */ public static void start(TrialManager trialMgr) { long testStartTime_mS = System.currentTimeMillis(); int lastGI = trialMgr.getMaximumGeneratingIndex(); int ppo = trialMgr.getPPO(); int lastU = 0; println(ProcessStats.getHeader()); StringBuilder dataStr = new StringBuilder(); //Each generating index (gi) will generate a new row of data // representing N trials at a specific number of unique values. for (int gi = 0; gi <= lastGI; gi++) { int u = (int)floor(pow(2.0, (double)gi/ppo)); if (u == lastU) { continue; }//at the low end skips over duplicate values of u lastU = u; int trials = trialMgr.getTrials(u); int lgK = trialMgr.getLgK(); double p = trialMgr.getP(); Stats[] statsArr = processTrialSet(trialMgr, u, trials); ProcessStats.process(statsArr, u, lgK, p, dataStr); println(dataStr.toString()); } int testTime_S = (int)((System.currentTimeMillis() - testStartTime_mS)/1000.0); int min = testTime_S/60; int sec = testTime_S%60; println("TestTime: "+min+":"+sec); } /** * A Trial Set is a number of trials at number of uniques per trial, uPerTrial. * This is set up so that the number of trials may vary based on the number of uniques for the * trial set. * @param trialMgr manages the sketch and updating of a stats object * @param uPerTrial uniques for every trial of a trial set * @param trials number of trials per trial set * @return the Stats array contains measurements for each trial of the trial set */ private static Stats[] processTrialSet(TrialManager trialMgr, int uPerTrial, int trials) { Stats[] statsArr = new Stats[trials]; //System.gc(); for (int t=0; t < trials; t++) { if (statsArr[t] == null) { statsArr[t] = new Stats(); } trialMgr.doTrial(statsArr[t], uPerTrial); } return statsArr; } private static void println(String s) { System.out.println(s); } /** * This main method sets the configuration of the sketches, the TrialManager profile, and * runs the test. * @param args not used. */ public static void main(String[] args) { //Common parameters int lgK = 12; //4K boolean udSketch = true; //set true if you want to use a theta UpdateSketch, false for HLL //Theta UpdateSketch parameters Family family = Family.QUICKSELECT; ResizeFactor rf = ResizeFactor.X1;// See javadocs. boolean direct = false; //See javadocs and the setSketchProfile code float p = 1.0F; boolean rebuild = false; //set true if rebuild is desired to reduce size down to k. //HLL Parameters boolean hip = true; boolean dense = false; //Trials Profile Parameters // For speed trials use min=4,5, max= 13,14,15,16 // For accuracy trials use min=max= 10 or more int lgMinTrials = 4; int lgMaxTrials = 13; int lgMaxU = 20; int ppo = 16; //INITIALIZE TrialManager trialMgr = new TrialManager(); trialMgr.setTrialsProfile(lgMinTrials, lgMaxTrials, lgMaxU, ppo); UpdateSketchBuilder udBldr = null; HllSketchBuilder hllBldr = null; if (udSketch) { //UpdateSketch Builder udBldr = UpdateSketch.builder().setNominalEntries(1 << lgK).setFamily(family).setP(p) .setResizeFactor(rf); trialMgr.setUpdateSketchBuilder(udBldr, direct, rebuild); } else { //HLL Builder hllBldr = HllSketch.builder().setLogBuckets(lgK).setHipEstimator(hip).setDenseMode(dense); trialMgr.setHllSketchBuilder(hllBldr); } //START THE TESTS SketchPerformance.start(trialMgr); //PRINT SUMMARY if (udBldr != null) { println(udBldr.toString()); } if (hllBldr != null) { println(hllBldr.toString()); } println(trialMgr.toString()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy