All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.hll.ProcessDistributionStream Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016, Yahoo! Inc. Licensed under the terms of the
 * Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.hll;

import static com.yahoo.sketches.hll.ProcessIpStream.checkLen;
import static com.yahoo.sketches.hll.ProcessIpStream.printStats;
import static com.yahoo.sketches.hll.ProcessIpStream.printTaskTime;
import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.BufferedReader;
import java.io.InputStreamReader;

import com.yahoo.sketches.Util;


/**
 * Processes an input stream of pairs of integers from Standard-In into the UniqueCountMap.
 * The input stream defines a distribution whereby each pair defines the number of keys with the
 * corresponding number of unique IDs. Each pair is of the form:
 *
 * 

<NumIDs><TAB><NumKeys><line-separator>.

* *

For each input pair, this model generates NumIDs unique identifiers for each of * NumKeys (also unique) and inputs them into the UniqueCountMap.

* *

The end of the stream is a null input line.

* *

At the end of the stream, UniqueCountMap.toString() is called and sent to Standard-Out.

* *

To run, create a jar of the test code for sketches-core. * A typical command line might be as follows:

* *

* cat NumIDsTabNumKeys.txt | java -cp sketches-misc.jar:sketches-core.jar:memory.jar \ * com.yahoo.sketches.hll.ProcessDistributionStream *

*/ public class ProcessDistributionStream { private static final int IP_BYTES = 4; private static final int INIT_ENTRIES = 1000; private ProcessDistributionStream() {} /** * Main entry point. * @param args Not used. * @throws RuntimeException Generally an IOException. */ public static void main(String[] args) throws RuntimeException { ProcessDistributionStream pds = new ProcessDistributionStream(); pds.processDistributionModel(); } private void processDistributionModel() { StringBuilder sb = new StringBuilder(); long start_mS = System.currentTimeMillis(); String line = ""; long lineCount = 0; long updateCount = 0; int ipCount = 0; byte[] ipBytes = new byte[IP_BYTES]; byte[] valBytes = new byte[Long.BYTES]; UniqueCountMap map = new UniqueCountMap(INIT_ENTRIES, IP_BYTES); long updateTime_nS = 0; try (BufferedReader br = new BufferedReader(new InputStreamReader(System.in, UTF_8))) { while ((line = br.readLine()) != null) { String[] tokens = line.split("\t"); checkLen(tokens); lineCount++; long numValues = Long.parseLong(tokens[0]); //Verify the token order! long numIps = Long.parseLong(tokens[1]); for (long nips = 0; nips < numIps; nips++) { ipCount++; Util.intToBytes(ipCount, ipBytes); for (long vals = 0; vals < numValues; vals++) { long start_nS = System.nanoTime(); updateCount++; // never repeated for any ip map.update(ipBytes, Util.longToBytes(updateCount, valBytes)); long end_nS = System.nanoTime(); updateTime_nS += end_nS - start_nS; } } } String className = this.getClass().getSimpleName(); printStats(sb, className, map, lineCount, ipCount, updateCount, updateTime_nS); } catch (Exception e) { throw new RuntimeException(e); } long total_mS = System.currentTimeMillis() - start_mS; printTaskTime(sb, total_mS, updateCount); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy