All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.pig.theta.SketchToString Maven / Gradle / Ivy

/*
 * Copyright 2015, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */
package com.yahoo.sketches.pig.theta;

import static com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch;

import java.io.IOException;

import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;

import com.yahoo.sketches.Util;
import com.yahoo.sketches.theta.Sketch;

/**
 * This is a User Defined Function (UDF) for "pretty printing" the summary of a sketch 
 * from a Sketch Tuple.
 * 
 * Refer to {@link DataToSketch#exec(Tuple)} for the definition of a Sketch Tuple.
 * 
 * @author Lee Rhodes
 */
public class SketchToString extends EvalFunc {
  private boolean detailOut = false;
  private final long seed_;
  
  /**
   * Pretty prints only the sketch summary.
   */
  public SketchToString() {
    this(false, Util.DEFAULT_UPDATE_SEED);
  }

  /**
   * Pretty prints all bucket detail plus the sketch summary based on outputDetail.
   * 
   * @param outputDetailStr If the first character is a "T" or "t" the output will include the bucket
   * detail. Otherwise only the sketch summary will be output.
   */
  public SketchToString(String outputDetailStr) {
    this( outputDetailStr.substring(0, 1).equalsIgnoreCase("T"), Util.DEFAULT_UPDATE_SEED);
  }
  
  /**
   * Pretty prints all bucket detail plus the sketch summary based on outputDetail.
   * 
   * @param outputDetailStr If the first character is a "T" or "t" the output will include the bucket
   * detail. Otherwise only the sketch summary will be output.
   * @param seedStr the seed string
   */
  public SketchToString(String outputDetailStr, String seedStr) {
    this( outputDetailStr.substring(0, 1).equalsIgnoreCase("T"), Long.parseLong(seedStr));
  }
  
  /**
   * Pretty prints all bucket detail plus the sketch summary based on outputDetail.
   * 
   * @param outputDetail If the first character is a "T" or "t" the output will include the bucket
   * detail. Otherwise only the sketch summary will be output.
   * @param seed the seed string
   */
  public SketchToString(boolean outputDetail, long seed) {
    super();
    detailOut = outputDetail;
    seed_ = seed;
  }

  @Override
  public String exec(Tuple sketchTuple) throws IOException { //throws is in API
    if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
      return null;
    }
    Sketch sketch = tupleToSketch(sketchTuple, seed_);
    return sketch.toString(true, detailOut, 8, true);
  }

  /**
   * The output is a String Tuple.
   */
  @Override
  public Schema outputSchema(Schema input) {
    if (input != null) {
      try {
        Schema tupleSchema = new Schema();
        tupleSchema.add(new Schema.FieldSchema("PrettyString", DataType.CHARARRAY));
        return new Schema(new Schema.FieldSchema(getSchemaName(this
            .getClass().getName().toLowerCase(), input), tupleSchema, DataType.TUPLE));
      } 
      catch (Exception e) {
        // fall through
      }
    }
    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy