All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.cmd.CommandLine Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.cmd;

import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.Util.TAB;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import com.yahoo.sketches.frequencies.ErrorType;
import com.yahoo.sketches.frequencies.ItemsSketch;
import com.yahoo.sketches.frequencies.LongsSketch.Row;
import com.yahoo.sketches.quantiles.DoublesSketch;
import com.yahoo.sketches.quantiles.DoublesSketchBuilder;
import com.yahoo.sketches.theta.Sketches;
import com.yahoo.sketches.theta.UpdateSketch;
import com.yahoo.sketches.theta.UpdateSketchBuilder;

//CHECKSTYLE.OFF: JavadocMethod
//CHECKSTYLE.OFF: WhitespaceAround
/**
 * Command line access to the basic sketch functions.  This is intentionally a very simple parser
 * with limited functionality that can be used for small experiments and for demos.
 * Although the sketching library can be used on a single machine, the more typical use case is on
 * large, highly distributed system architectures where a CLI is not of much use.
 */
public class CommandLine {
  private static final String BOLD = "\033[1m";
  private static final String OFF = "\033[0m";
  private boolean disablePrint = false;

  CommandLine() {}

  /**
   * Used for testing
   * @param disablePrint if true, disables normal System.out messages, but not System.err messages.
   * @param args the same args list as used for main.
   */
  CommandLine(boolean disablePrint, String[] args) {
    this.disablePrint = disablePrint;
    this.parseType(args);
  }

  public static void main(String[] args) {
    CommandLine cl = new CommandLine();
    cl.parseType(args);
  }

  private void parseType(String[] args) {
    if ((args == null) || (args.length == 0) || (args[0].isEmpty())) {
      help();
      return;
    }
    String token1 = args[0].toLowerCase();
    switch (token1) {
      case "uniq": parseUniq(args); break;
      case "rank": parseRank(args); break;
      case "hist": parseHist(args); break;
      case "loghist": parseLogHist(args); break;
      case "freq": parseFreq(args); break;
      case "help": help(); break;
      default: {
        printlnErr("Unrecognized TYPE: "+token1);
        help();
      }
    }
  }

  private static int parseArgsCase(String[] args) { //we already know type, args[0] is valid
    int len = args.length;
    int ret = 0;
    switch (len) {
      case 1: ret = 1; break; //only type, assume default k, System.in
      case 2: {
        String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
        if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
        if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
        ret = 4; //2nd arg must be numeric, assume System.in
        break;
      }
      default: { //3 or more
        String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
        if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
        if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
        //2nd arg is numeric, 3rd arg must be filename
        ret = 5;
        break;
      }
    }
    return ret;
  }

  private void parseUniq(String[] args) {
    UpdateSketchBuilder bldr = Sketches.updateSketchBuilder();
    UpdateSketch sketch;
    int argsCase = parseArgsCase(args);
    switch (argsCase) {
      case 1:
        doUniq(getBR(null), bldr.build()); break; //[default k], [System.in]
      case 2:
        uniqHelp(); break; //help
      case 3: //2nd arg not numeric, must be a filename
        doUniq(getBR(args[1]), bldr.build()); break; //[default k], file
      case 4: //2nd arg is numeric, no filename
        sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
        doUniq(getBR(null), sketch); //user k, [System.in]
        break;
      case 5: //3 valid args
        sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
        doUniq(getBR(args[2]), sketch);
    }
  }

  private void doUniq(BufferedReader br, UpdateSketch sketch) {
    String itemStr = "";
    try {
      while ((itemStr = br.readLine()) != null) {
        sketch.update(itemStr);
      }
    } catch (IOException e) {
      printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
      throw new RuntimeException(e);
    }
    println(sketch.toString());
  }

  private void parseRank(String[] args) {
    DoublesSketchBuilder bldr = new DoublesSketchBuilder();
    DoublesSketch sketch;
    int argsCase = parseArgsCase(args);
    switch (argsCase) {
      case 1:
        doRank(getBR(null), bldr.build()); break; //[default k], [System.in]
      case 2:
        rankHelp(); break; //help
      case 3: //2nd arg not numeric, must be a filename
        doRank(getBR(args[1]), bldr.build()); break; //[default k], file
      case 4: //2nd arg is numeric, no filename
        sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
        doRank(getBR(null), sketch); //user k, [System.in]
        break;
      case 5: //3 valid args
        sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
        doRank(getBR(args[2]), sketch);
    }
  }

  private void doRank(BufferedReader br, DoublesSketch sketch) {
    String itemStr = "";
    try {
      while ((itemStr = br.readLine()) != null) {
        double item = Double.parseDouble(itemStr);
        sketch.update(item);
      }
    } catch (IOException | NumberFormatException e ) {
      printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
      throw new RuntimeException(e);
    }
    int ranks = 101;
    double[] valArr = sketch.getQuantiles(ranks);
    println("Rank"+TAB+ "Value");
    for (int i=0; i sketch;
    int defaultSize = 1 << 17; //128K
    int argsCase = parseArgsCase(args);
    switch (argsCase) {
      case 1:
        sketch = new ItemsSketch(defaultSize);
        doFreq(getBR(null), sketch); break; //[default k], [System.in]
      case 2:
        freqHelp(); break; //help
      case 3: //2nd arg not numeric, must be a filename
        sketch = new ItemsSketch(defaultSize);
        doFreq(getBR(args[1]), sketch); break; //[default k], file
      case 4: //2nd arg is numeric, no filename
        sketch = new ItemsSketch(Integer.parseInt(args[1])); //args[1] is numeric = k
        doFreq(getBR(null), sketch); //user k, [System.in]
        break;
      case 5: //3 valid args
        sketch = new ItemsSketch(Integer.parseInt(args[1])); //args[1] is numeric = k
        doFreq(getBR(args[2]), sketch);
    }
  }

  private void doFreq(BufferedReader br, ItemsSketch sketch) {
    String itemStr = "";
    try {
      while ((itemStr = br.readLine()) != null) {
        sketch.update(itemStr);
      }
    } catch (IOException e ) {
      printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
      throw new RuntimeException(e);
    }
    //NFP is a subset of NFN
    ItemsSketch.Row[] rowArr = sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES);
    int len = rowArr.length;
    println("Qualifying Rows: "+len);
    println(Row.getRowHeader());
    for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy