All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.hive.tuple.DoubleSummarySketchToEstimatesUDF Maven / Gradle / Ivy

There is a newer version: 0.13.0
Show newest version
/*
 * Copyright 2016, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.hive.tuple;

import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;

import com.yahoo.memory.Memory;
import com.yahoo.sketches.tuple.DoubleSummary;
import com.yahoo.sketches.tuple.Sketch;
import com.yahoo.sketches.tuple.SketchIterator;
import com.yahoo.sketches.tuple.Sketches;

@Description(
    name = "DoubleSummarySketchToEstimates",
    value = "_FUNC_(sketch)",
    extended = "Returns a list of estimates from a given Sketch."
    + " The result will be two double values."
    + " The first estimate is the estimate of the number of unique keys in the"
    + " original population. Next there is an estimate of the sum of the parameter in the"
    + " original population (sum of the values in the sketch scaled to the original population."
    + " This estimate assumes that the DoubleSummary was used in the Sum mode.)")
public class DoubleSummarySketchToEstimatesUDF extends UDF {

  /**
   * Get estimates from a given Sketch<DoubleSummary>
   * @param serializedSketch DoubleSummarySketch in a serialized binary form
   * @return list of estimates
   */
  public List evaluate(final BytesWritable serializedSketch) {
    if (serializedSketch == null) { return null; }
    final Sketch sketch =
        Sketches.heapifySketch(Memory.wrap(serializedSketch.getBytes()));
    double sum = 0;
    final SketchIterator it = sketch.iterator();
    while (it.next()) {
      sum += it.getSummary().getValue();
    }
    return Arrays.asList(
      sketch.getEstimate(),
      sum / sketch.getTheta()
    );
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy